In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

from sklearn.pipeline import Pipeline                
from sklearn.compose import ColumnTransformer

from sklearn.model_selection import train_test_split

In [3]:
houses_trans = pd.read_csv('trans_DF_S4_Part2.csv' ,index_col = 0)
houses_trans.head()

Unnamed: 0,crime_rate,resid_area,air_qual,room_num,age,teachers,poor_prop,parks,dist,airport_NO,airport_YES,waterbody_Lake,waterbody_Lake and River,waterbody_None,waterbody_River,price
0,-0.401149,-1.226095,-1.199016,2.231987,-1.278274,1.753696,-1.266398,-1.272867,0.623366,1.0,0.0,0.0,0.0,1.0,0.0,48.5
1,0.702723,0.995482,1.587329,0.176596,1.008537,-0.808919,0.940407,2.139962,-0.804815,0.0,1.0,0.0,1.0,0.0,0.0,17.1
2,-0.389296,-1.028718,-0.391243,-1.008059,0.697674,0.868429,0.26193,-0.148523,-0.556639,1.0,0.0,0.0,0.0,0.0,1.0,23.1
3,-0.391244,-1.19584,-0.950404,0.487675,-0.399281,0.216127,-0.867459,-0.740051,-0.13638,1.0,0.0,0.0,0.0,0.0,1.0,28.4
4,-0.403599,-1.035921,-1.251491,-0.559197,-0.763742,-1.228256,0.277382,-1.483282,1.653529,0.0,1.0,0.0,0.0,1.0,0.0,18.9


In [4]:
df = houses_trans.copy()

In [5]:
X = df.drop('price' ,axis = 1)

In [6]:
y = df['price']

In [7]:
X_train ,X_test ,y_train ,y_test = train_test_split(X ,y ,test_size= 0.2 , random_state= 123)

In [8]:
X_train.shape ,y_train.shape

((404, 15), (404,))

In [9]:
X_test.shape , y_test.shape

((102, 15), (102,))

## Training models

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [11]:
from sklearn.metrics import mean_squared_error,r2_score

### Various Algorithms for Regression Task : 

- Linear Regression, Polynomial Regression, Ridge Regression, Lasso Regression, Elastic Net Regression.
- Support Vector Regression (SVR),
- Decision Tree Regression, 
- Random Forest Regression, and 
- Gradient Boosting Regression.

In [12]:
models = [LinearRegression(), SVR(),DecisionTreeRegressor(), RandomForestRegressor()]

In [13]:
import warnings
warnings.filterwarnings('ignore')

In [14]:
for model in models :
    
    model.fit(X_train.values, y_train)
    y_hat_train = model.predict(X_train.values)
    y_hat_test = model.predict(X_test.values)
    
    print(f"******************* \33[1m{str(model)}\33[0m *******************\n")
    
    print("\n\33[1mTraining Report :\33[0m \n")
    print("Mean Squared Error : ",round(mean_squared_error(y_train,y_hat_train),2))
    print("RMSE : ",round(np.sqrt(mean_squared_error(y_train,y_hat_train)),2))
    print("R2 Square : ",r2_score(y_train,y_hat_train))
    
    print("\n\33[1mTesting Report :\33[0m \n")
    print("Mean Squared Error : ",round(mean_squared_error(y_test,y_hat_test),2))
    print("RMSE : ",round(np.sqrt(mean_squared_error(y_test,y_hat_test)),2))
    print("R2 Square : ",r2_score(y_test,y_hat_test))
   
    print()

******************* [1mLinearRegression()[0m *******************


[1mTraining Report :[0m 

Mean Squared Error :  23.88
RMSE :  4.89
R2 Square :  0.6904200500374265

[1mTesting Report :[0m 

Mean Squared Error :  23.42
RMSE :  4.84
R2 Square :  0.7885004549396641

******************* [1mSVR()[0m *******************


[1mTraining Report :[0m 

Mean Squared Error :  23.72
RMSE :  4.87
R2 Square :  0.6925090031027239

[1mTesting Report :[0m 

Mean Squared Error :  36.78
RMSE :  6.06
R2 Square :  0.6678101227671878

******************* [1mDecisionTreeRegressor()[0m *******************


[1mTraining Report :[0m 

Mean Squared Error :  0.0
RMSE :  0.0
R2 Square :  1.0

[1mTesting Report :[0m 

Mean Squared Error :  13.71
RMSE :  3.7
R2 Square :  0.8762180707584132

******************* [1mRandomForestRegressor()[0m *******************


[1mTraining Report :[0m 

Mean Squared Error :  1.41
RMSE :  1.19
R2 Square :  0.9816838246362783

[1mTesting Report :[0m 

Mean Squa