In [59]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.metrics import r2_score

In [58]:
df = pd.read_csv('Data.csv')
df.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [29]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [30]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

## Multiple Linear Regression

In [37]:
from sklearn.linear_model import LinearRegression

mul_reg = LinearRegression()
mul_reg.fit(X_train, y_train)

y_pred_mul = mul_reg.predict(X_test)

np.set_printoptions(precision=2)
np.concatenate(
    (
        y_pred_mul.reshape(len(y_pred_mul), 1), 
        y_test.reshape(len(y_test), 1)
    ),
    1
)

array([[431.43, 431.23],
       [458.56, 460.01],
       [462.75, 461.14],
       ...,
       [469.52, 473.26],
       [442.42, 438.  ],
       [461.88, 463.28]])

In [62]:
r2_mul = r2_score(y_test, y_pred_mul)
round(r2_mul, 3)

0.933

## Polynomial Regression

In [38]:
from sklearn.preprocessing import PolynomialFeatures

poly_reg = PolynomialFeatures(degree=4)
X_poly = poly_reg.fit_transform(X_train)

lin_reg = LinearRegression()
lin_reg.fit(X_poly, y_train)

y_pred_poly = lin_reg.predict(poly_reg.transform(X_test))

np.set_printoptions(precision=2)
np.concatenate(
    (
        y_pred_poly.reshape(len(y_pred_poly), 1), 
        y_test.reshape(len(y_test), 1)
    ),
    1
)

array([[433.94, 431.23],
       [457.9 , 460.01],
       [460.52, 461.14],
       ...,
       [469.53, 473.26],
       [438.27, 438.  ],
       [461.67, 463.28]])

In [63]:
r2_poly = r2_score(y_test, y_pred_poly)
round(r2_poly, 3)

0.946

## Support Vector Regression

In [52]:
y_svr = y.reshape(len(y), 1)

X_train_svr, X_test_svr, y_train_svr, y_test_svr = train_test_split(X, y_svr, test_size=0.2, random_state=0)

from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
sc_y = StandardScaler()

X_train_svr = sc_X.fit_transform(X_train_svr)
y_train_svr = sc_y.fit_transform(y_train_svr)

from sklearn.svm import SVR

svr_reg = SVR(kernel='rbf')
svr_reg.fit(X_train_svr, y_train_svr)

y_pred_svr = sc_y.inverse_transform(svr_reg.predict(sc_X.transform(X_test)))

np.set_printoptions(precision=2)
np.concatenate(
    (
        y_pred_svr.reshape(len(y_pred_svr), 1), 
        y_test.reshape(len(y_test), 1)
    ),
    1
)

  y = column_or_1d(y, warn=True)


array([[434.05, 431.23],
       [457.94, 460.01],
       [461.03, 461.14],
       ...,
       [470.6 , 473.26],
       [439.42, 438.  ],
       [460.92, 463.28]])

In [64]:
r2_svr = r2_score(y_test, y_pred_svr)
round(r2_svr, 3)

0.948

## Decision Tree Regression

In [53]:
from sklearn.tree import DecisionTreeRegressor

dt_reg = DecisionTreeRegressor(random_state=0)
dt_reg.fit(X_train, y_train)

y_pred_dt = dt_reg.predict(X_test)

np.set_printoptions(precision=2)
np.concatenate(
    (
        y_pred_dt.reshape(len(y_pred_dt), 1), 
        y_test.reshape(len(y_test), 1)
    ),
    1
)

array([[431.28, 431.23],
       [462.81, 460.01],
       [460.06, 461.14],
       ...,
       [471.46, 473.26],
       [437.76, 438.  ],
       [462.55, 463.28]])

In [65]:
r2_dt = r2_score(y_test, y_pred_dt)
round(r2_dt, 3)

0.923

## Random Forest Regression

In [55]:
from sklearn.ensemble import RandomForestRegressor

rf_reg = RandomForestRegressor(n_estimators=10, random_state=0)
rf_reg.fit(X_train, y_train)

y_pred_rf = rf_reg.predict(X_test)

np.set_printoptions(precision=2)
np.concatenate(
    (
        y_pred_rf.reshape(len(y_pred_rf), 1), 
        y_test.reshape(len(y_test), 1)
    ),
    1
)

array([[433.78, 431.23],
       [457.99, 460.01],
       [463.14, 461.14],
       ...,
       [470.16, 473.26],
       [439.51, 438.  ],
       [460.32, 463.28]])

In [66]:
r2_rf = r2_score(y_test, y_pred_rf)
round(r2_rf, 3)

0.962

## Evaluating model performance

After carrying out all the regression methods on the data, we can evaluate the model performance by calculating the R-squared value. The model with the highest R-squared is the model that performs the best - in our case this is Random Forest Regression.