In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## STEP 1 : Import Transforrmed Dataset

In [2]:
filepath_train = r"S1_Part2_Train_DS.csv"
filepath_test = r"S1_Part2_Test_DS.csv"

In [3]:
d_train = pd.read_csv(filepath_train  ,index_col= 0 , header= 0)
d_test = pd.read_csv(filepath_test  ,index_col= 0 , header= 0)

In [4]:
d_train.head(3)

Unnamed: 0,crime_rate,resid_area,air_qual,room_num,age,dist1,dist2,dist3,dist4,teachers,...,rainfall,parks,airport_NO,airport_YES,waterbody_Lake,waterbody_Lake and River,waterbody_None,waterbody_River,bus_ter_YES,price
0,17.8667,48.1,0.671,6.223,100.0,1.48,1.13,1.48,1.46,19.8,...,46.0,0.058067,1.0,0.0,0.0,1.0,0.0,0.0,1.0,10.2
1,0.79041,39.9,0.544,6.122,52.8,2.89,2.34,2.77,2.55,21.6,...,46.0,0.055734,0.0,1.0,0.0,0.0,0.0,1.0,1.0,22.1
2,1.22358,49.58,0.605,6.943,97.4,1.99,1.76,1.92,1.84,25.3,...,57.0,0.054814,0.0,1.0,0.0,0.0,1.0,0.0,1.0,41.3


In [5]:
d_test.head(3)

Unnamed: 0,crime_rate,resid_area,air_qual,room_num,age,dist1,dist2,dist3,dist4,teachers,...,rainfall,parks,airport_NO,airport_YES,waterbody_Lake,waterbody_Lake and River,waterbody_None,waterbody_River,bus_ter_YES,price
0,0.03768,31.52,0.404,7.274,38.3,7.53,7.22,7.46,7.03,27.4,...,43.0,0.041174,1.0,0.0,1.0,0.0,0.0,0.0,1.0,34.6
1,0.44178,36.2,0.504,6.552,21.4,3.56,3.28,3.42,3.23,22.6,...,47.0,0.044352,0.0,1.0,0.0,0.0,1.0,0.0,1.0,31.5
2,0.04527,41.93,0.573,6.12,76.7,2.44,2.11,2.46,2.14,19.0,...,20.0,0.059903,0.0,1.0,0.0,1.0,0.0,0.0,1.0,20.6


## STEP 2 :  Train - Test Split

In [6]:
X_train = d_train.drop(labels= ['price'] , axis = 1)
y_train = d_train['price']

X_test = d_test.drop(labels= ['price'] , axis = 1)
y_test = d_test['price']

In [7]:
X_train.shape , y_train.shape

((404, 22), (404,))

In [8]:
X_test.shape , y_test.shape

((102, 22), (102,))

## STEP 3 : Training models

In [9]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.neighbors import KNeighborsRegressor

In [10]:
from sklearn.metrics import mean_squared_error,r2_score

### Various Algorithms for Regression Task : 

- Linear Regression, Polynomial Regression, Ridge Regression, Lasso Regression, Elastic Net Regression.
- Support Vector Regression (SVR),
- Decision Tree Regression, 
- Random Forest Regression, and 
- Gradient Boosting Regression.

In [11]:
models = [LinearRegression(), SVR(),KNeighborsRegressor(), DecisionTreeRegressor(), RandomForestRegressor(), ]

In [12]:
import warnings
warnings.filterwarnings('ignore')

In [13]:
def ModelTraining(ML_models , X_train ,y_train ,X_test,y_test):
    
    for model in ML_models :

        model.fit(X_train.values, y_train)
        y_hat_train = model.predict(X_train.values)
        y_hat_test = model.predict(X_test.values)

        print(f"******************* \33[1m{str(model)}\33[0m *******************\n")

        print("\33[1mTraining Report :\33[0m \n")
        print("Mean Squared Error : ",round(mean_squared_error(y_train,y_hat_train),2))
        print("RMSE : ",round(np.sqrt(mean_squared_error(y_train,y_hat_train)),2))
        print("R2 Square : ",r2_score(y_train,y_hat_train))

        print() 

        print("\33[1mTesting Report :\33[0m \n")
        print("Mean Squared Error : ",round(mean_squared_error(y_test,y_hat_test),2))
        print("RMSE : ",round(np.sqrt(mean_squared_error(y_test,y_hat_test)),2))
        print("R2 Square : ",r2_score(y_test,y_hat_test))


        print()

In [14]:
ModelTraining(ML_models = models ,  X_train = X_train ,y_train = y_train ,
             X_test = X_test, y_test = y_test)

******************* [1mLinearRegression()[0m *******************

[1mTraining Report :[0m 

Mean Squared Error :  22.69
RMSE :  4.76
R2 Square :  0.7198030471441095

[1mTesting Report :[0m 

Mean Squared Error :  26.25
RMSE :  5.12
R2 Square :  0.7279578448432433

******************* [1mSVR()[0m *******************

[1mTraining Report :[0m 

Mean Squared Error :  55.69
RMSE :  7.46
R2 Square :  0.3122522544529428

[1mTesting Report :[0m 

Mean Squared Error :  68.48
RMSE :  8.28
R2 Square :  0.2902806211526078

******************* [1mKNeighborsRegressor()[0m *******************

[1mTraining Report :[0m 

Mean Squared Error :  26.44
RMSE :  5.14
R2 Square :  0.6734930935951073

[1mTesting Report :[0m 

Mean Squared Error :  38.09
RMSE :  6.17
R2 Square :  0.6052584916720938

******************* [1mDecisionTreeRegressor()[0m *******************

[1mTraining Report :[0m 

Mean Squared Error :  0.0
RMSE :  0.0
R2 Square :  1.0

[1mTesting Report :[0m 

Mean Squared 

In [15]:
y_test.describe()

count    102.000000
mean      22.129412
std        9.871397
min        6.300000
25%       15.575000
50%       20.900000
75%       24.650000
max       50.000000
Name: price, dtype: float64