In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## STEP 1 : Import Transformed Dataset

In [2]:
filepath_train = r"S2a_Part3_test_pipeline_outlier_trimming_Train_DS.csv"
filepath_test = r"S2a_Part3_test_pipeline_outlier_trimming_Test_DS.csv"

In [3]:
d_train = pd.read_csv(filepath_train  ,index_col= 0 , header= 0)
d_test = pd.read_csv(filepath_test  ,index_col= 0 , header= 0)

In [4]:
d_train.head(3)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,population,median_income,rooms_per_household,bedrooms_per_room,population_per_household,ocean_proximity_<1H OCEAN,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN,median_house_value
0,-1.554442,1.636779,-1.240082,-0.970473,-1.064046,-0.249954,1.262319,-0.07829,0.031965,0.0,1.0,0.0,0.0,0.0,108300.0
1,-0.623047,-0.148644,-1.398615,0.278105,0.334906,-0.339735,-0.199874,0.040216,-0.029367,0.0,0.0,0.0,0.0,1.0,183200.0
2,-1.360194,2.267753,-0.84375,0.633399,0.262335,-0.272571,0.113653,-0.597804,-0.054939,0.0,1.0,0.0,0.0,0.0,118400.0


In [5]:
d_test.head(3)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,population,median_income,rooms_per_household,bedrooms_per_room,population_per_household,ocean_proximity_<1H OCEAN,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN,median_house_value
0,0.736691,-0.8544,0.583043,-0.704691,-0.862947,-0.086419,0.340263,-0.815927,-0.050302,1.0,0.0,0.0,0.0,0.0,245300.0
1,-1.340271,1.019826,1.851305,-0.466451,-0.235167,-1.380994,-0.540981,1.14067,-0.015714,0.0,0.0,0.0,1.0,0.0,104200.0
2,0.228657,-0.125275,-1.240082,0.863376,0.646173,0.505882,0.528296,-0.955074,0.012981,0.0,1.0,0.0,0.0,0.0,146200.0


## STEP 2 :  Train - Test Split

In [6]:
X_train = d_train.drop(labels= ['median_house_value'] , axis = 1)
y_train = d_train['median_house_value']

X_test = d_test.drop(labels= ['median_house_value'] , axis = 1)
y_test = d_test['median_house_value']

In [7]:
X_train.shape , y_train.shape

((14510, 14), (14510,))

In [8]:
X_test.shape , y_test.shape

((4128, 14), (4128,))

## STEP 3 : Training models

In [9]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.neighbors import KNeighborsRegressor

In [10]:
from sklearn.metrics import mean_squared_error,r2_score

### Various Algorithms for Regression Task : 

- Linear Regression, Polynomial Regression, Ridge Regression, Lasso Regression, Elastic Net Regression.
- Support Vector Regression (SVR),
- Decision Tree Regression, 
- Random Forest Regression, and 
- Gradient Boosting Regression.

In [11]:
models = [LinearRegression(), SVR(),KNeighborsRegressor(), DecisionTreeRegressor(), RandomForestRegressor(), ]

In [12]:
import warnings
warnings.filterwarnings('ignore')

In [13]:
def ModelTraining(ML_models , X_train ,y_train ,X_test,y_test):
    
    for model in ML_models :

        model.fit(X_train.values, y_train)
        y_hat_train = model.predict(X_train.values)
        y_hat_test = model.predict(X_test.values)

        print(f"******************* \33[1m{str(model)}\33[0m *******************\n")

        print("\33[1mTraining Report :\33[0m \n")
        print("Mean Squared Error : ",round(mean_squared_error(y_train,y_hat_train),2))
        print("RMSE : ",round(np.sqrt(mean_squared_error(y_train,y_hat_train)),2))
        print("R2 Square : ",r2_score(y_train,y_hat_train))

        print() 

        print("\33[1mTesting Report :\33[0m \n")
        print("Mean Squared Error : ",round(mean_squared_error(y_test,y_hat_test),2))
        print("RMSE : ",round(np.sqrt(mean_squared_error(y_test,y_hat_test)),2))
        print("R2 Square : ",r2_score(y_test,y_hat_test))


        print()

In [14]:
ModelTraining(ML_models = models ,  X_train = X_train ,y_train = y_train ,
                   X_test = X_test, y_test = y_test)

******************* [1mLinearRegression()[0m *******************

[1mTraining Report :[0m 

Mean Squared Error :  4382867583.48
RMSE :  66203.23
R2 Square :  0.6139893542227199

[1mTesting Report :[0m 

Mean Squared Error :  4953043472.14
RMSE :  70377.86
R2 Square :  0.6323991718505035

******************* [1mSVR()[0m *******************

[1mTraining Report :[0m 

Mean Squared Error :  11805192208.62
RMSE :  108651.7
R2 Square :  -0.03971424670724266

[1mTesting Report :[0m 

Mean Squared Error :  14407669136.4
RMSE :  120031.95
R2 Square :  -0.06929630963977584

******************* [1mKNeighborsRegressor()[0m *******************

[1mTraining Report :[0m 

Mean Squared Error :  2317736288.87
RMSE :  48142.87
R2 Square :  0.7958708848560669

[1mTesting Report :[0m 

Mean Squared Error :  4002381573.26
RMSE :  63264.38
R2 Square :  0.7029545996966398

******************* [1mDecisionTreeRegressor()[0m *******************

[1mTraining Report :[0m 

Mean Squared Error

In [18]:
y_test.describe()

count      4128.000000
mean     205656.130814
std      116091.504411
min       22500.000000
25%      118175.000000
50%      177750.000000
75%      262500.000000
max      500001.000000
Name: median_house_value, dtype: float64