# Regression Model Selection

In [75]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [76]:
df=pd.read_csv('Data.csv')
df

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.40,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.50,1009.23,96.62,473.90
...,...,...,...,...,...
9563,16.65,49.69,1014.01,91.00,460.03
9564,13.19,39.18,1023.67,66.78,469.62
9565,31.32,74.33,1012.92,36.48,429.57
9566,24.48,69.45,1013.86,62.39,435.74


In [77]:
x=df.iloc[:,:-1].values
y=df.iloc[:,-1].values
Y = y.reshape(len(y),1)

In [78]:
y

array([463.26, 444.37, 488.56, ..., 429.57, 435.74, 453.28])

## Splitting the model

In [79]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=0,test_size=0.2)

## Multiple Linear Regression

In [80]:
from sklearn.linear_model import LinearRegression
mlr=LinearRegression()
mlr.fit(x_train,y_train)

LinearRegression()

In [81]:
y_pred=mlr.predict(x_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[431.43 431.23]
 [458.56 460.01]
 [462.75 461.14]
 ...
 [469.52 473.26]
 [442.42 438.  ]
 [461.88 463.28]]


In [82]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.9325315554761303

In [83]:
mlr.score(x_test,y_test)

0.9325315554761303

## Polynomial Regression 

In [84]:
from sklearn.preprocessing import PolynomialFeatures
poly_reg=PolynomialFeatures(degree=4)
x_poly=poly_reg.fit_transform(x_train)
pr=LinearRegression()
pr.fit(x_poly,y_train)

LinearRegression()

In [85]:
y_pred=pr.predict(poly_reg.transform(x_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[433.94 431.23]
 [457.9  460.01]
 [460.52 461.14]
 ...
 [469.53 473.26]
 [438.27 438.  ]
 [461.67 463.28]]


In [86]:
r2_score(y_test,y_pred)

0.9458193628375003

In [87]:
x_test_poly=poly_reg.fit_transform(x_test)
pr.score(x_test_poly,y_test)

0.9458193628375003

## Support Vector Regression

In [88]:
from sklearn.svm import SVR
sv=SVR(kernel='rbf')
sv.fit(x_train,y_train)

SVR()

In [89]:
y_pred=sv.predict(x_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[445.3  431.23]
 [455.02 460.01]
 [457.85 461.14]
 ...
 [457.13 473.26]
 [450.72 438.  ]
 [455.28 463.28]]


In [90]:
r2_score(y_test,y_pred)

0.38182110451293594

In [91]:
sv.score(x_test,y_test)

0.38182110451293594

## Decision Tree

In [92]:
from sklearn.tree import DecisionTreeRegressor

In [93]:
dt=DecisionTreeRegressor(random_state=0)
dt.fit(x_train,y_train)

DecisionTreeRegressor(random_state=0)

In [94]:
y_pred=dt.predict(x_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[431.28 431.23]
 [459.59 460.01]
 [460.06 461.14]
 ...
 [471.46 473.26]
 [437.76 438.  ]
 [462.74 463.28]]


In [95]:
dt.score(x_test,y_test)

0.922905874177941

In [96]:
r2_score(y_test,y_pred)

0.922905874177941

## Random Forest

In [97]:
from sklearn.ensemble import RandomForestRegressor

In [99]:
rf=RandomForestRegressor(n_estimators=10,random_state=0)
rf.fit(x_train,y_train)

RandomForestRegressor(n_estimators=10, random_state=0)

In [100]:
y_pred=rf.predict(x_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[434.05 431.23]
 [458.79 460.01]
 [463.02 461.14]
 ...
 [469.48 473.26]
 [439.57 438.  ]
 [460.38 463.28]]


In [101]:
r2_score(y_test,y_pred)

0.9615908334363876

In [102]:
rf.score(x_test,y_test)

0.9615908334363876

#### So it can be observed that Random Forest gives the most accurate results followed by Polynomial Regression