In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

from sklearn.pipeline import Pipeline                
from sklearn.compose import ColumnTransformer

from sklearn.model_selection import train_test_split

## STEP 1 : Import Transforrmed Dataset

In [3]:
filepath_train = r"S2_Part3_DP_Trimming2_test_train_dataset.csv"
filepath_test = r"S2_Part3_DP_Trimming2_test_test_dataset.csv"

In [4]:
d_train = pd.read_csv(filepath_train  ,index_col= 0 , header= 0)
d_test = pd.read_csv(filepath_test  ,index_col= 0 , header= 0)

In [5]:
d_train.shape

(14051, 15)

In [6]:
d_train.head(2)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,population,median_income,rooms_per_household,bedrooms_per_room,population_per_household,ocean_proximity_<1H OCEAN,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN,median_house_value
0,-1.533576,1.587011,-1.374316,-1.507353,-1.687685,-0.167906,1.297679,-0.105796,0.484355,0.0,1.0,0.0,0.0,0.0,108300.0
1,-0.601758,-0.175406,-1.536809,1.115272,1.202695,-0.281535,-0.171424,0.008622,-0.174307,0.0,0.0,0.0,0.0,1.0,183200.0


In [7]:
d_test.head(2)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,population,median_income,rooms_per_household,bedrooms_per_room,population_per_household,ocean_proximity_<1H OCEAN,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN,median_house_value
0,0.758598,-0.872068,0.494354,-0.949081,-1.272193,0.039068,0.371265,-0.817992,-0.399138,1.0,0.0,0.0,0.0,0.0,245300.0
1,-1.319308,0.978008,1.794299,-0.448661,0.024865,-1.599376,-0.514142,1.07112,-0.02769,0.0,0.0,0.0,1.0,0.0,104200.0


In [8]:
d_test.shape

(4128, 15)

## STEP 2 :  Train - Test Split

In [9]:
X_train = d_train.drop(labels= ['median_house_value'] , axis = 1)
y_train = d_train['median_house_value']

X_test = d_test.drop(labels= ['median_house_value'] , axis = 1)
y_test = d_test['median_house_value']

In [10]:
X_train.shape , y_train.shape

((14051, 14), (14051,))

In [11]:
X_test.shape , y_test.shape

((4128, 14), (4128,))

## STEP 3 : Training models

In [12]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [13]:
from sklearn.metrics import mean_squared_error,r2_score

### Various Algorithms for Regression Task : 

- Linear Regression, Polynomial Regression, Ridge Regression, Lasso Regression, Elastic Net Regression.
- Support Vector Regression (SVR),
- Decision Tree Regression, 
- Random Forest Regression, and 
- Gradient Boosting Regression.

In [14]:
models = [LinearRegression(), SVR(),DecisionTreeRegressor(), RandomForestRegressor()]

In [15]:
import warnings
warnings.filterwarnings('ignore')

In [16]:
for model in models :
    
    model.fit(X_train.values, y_train)
    y_hat_train = model.predict(X_train.values)
    y_hat_test = model.predict(X_test.values)
    
    print(f"******************* \33[1m{str(model)}\33[0m *******************\n")
  
    print("\Regression Report : \n")
    print("Mean Squared Error : ",round(mean_squared_error(y_test,y_hat_test),2))
    print("RMSE : ",round(np.sqrt(mean_squared_error(y_test,y_hat_test)),2))
    print("R2 Square : ",r2_score(y_test,y_hat_test))
    
  
    print()

******************* [1mLinearRegression()[0m *******************

\Regression Report : 

Mean Squared Error :  5807860708.89
RMSE :  76209.32
R2 Square :  0.5689570627887581

******************* [1mSVR()[0m *******************

\Regression Report : 

Mean Squared Error :  14392698771.18
RMSE :  119969.57
R2 Square :  -0.06818525162365807

******************* [1mDecisionTreeRegressor()[0m *******************

\Regression Report : 

Mean Squared Error :  4977272811.0
RMSE :  70549.79
R2 Square :  0.6306009390910603

******************* [1mRandomForestRegressor()[0m *******************

\Regression Report : 

Mean Squared Error :  2618150827.44
RMSE :  51167.87
R2 Square :  0.8056882767533079

