In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

from sklearn.pipeline import Pipeline                
from sklearn.compose import ColumnTransformer

from sklearn.model_selection import train_test_split

## STEP 1 : Import Transforrmed Dataset

In [3]:
houses_trans = pd.read_csv('S2_Part3_test_Trimming_DataPreprocessing2.csv' ,index_col = 0)
houses_trans.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,population,median_income,bedrooms_per_room,population_per_household,ocean_proximity_<1H OCEAN,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN,median_house_value
0,0.615915,-0.762698,0.086541,0.749785,1.801964,-0.617102,0.730645,0.207138,1.0,0.0,0.0,0.0,0.0,254200.0
1,0.765404,-0.721071,0.7373,-1.260907,-1.335955,1.541872,-0.923528,0.03361,0.0,1.0,0.0,0.0,0.0,396700.0
2,0.630864,-0.822826,-0.076149,0.373635,0.176938,0.797921,0.05384,-0.351104,1.0,0.0,0.0,0.0,0.0,244000.0
3,0.765404,-0.836702,-0.320184,-0.558435,-0.383733,1.014086,-0.626033,0.425248,1.0,0.0,0.0,0.0,0.0,244900.0
4,-0.928805,1.351044,-0.970943,-0.554527,-0.142921,-0.87014,-0.104804,0.527326,0.0,1.0,0.0,0.0,0.0,89400.0


In [4]:
df = houses_trans.copy()

## STEP 2 : Features and Target Split

In [8]:
X = df.drop('median_house_value' ,axis = 1)

In [9]:
y = df['median_house_value']

## STEP 3 :  Train - Test Split

In [10]:
X_train ,X_test ,y_train ,y_test = train_test_split(X ,y ,test_size= 0.2 , random_state= 123)

In [11]:
X_train.shape ,y_train.shape

((13886, 14), (13886,))

In [12]:
X_test.shape , y_test.shape

((3472, 14), (3472,))

## STEP 4 : Training models

In [13]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [14]:
from sklearn.metrics import mean_squared_error,r2_score

### Various Algorithms for Regression Task : 

- Linear Regression, Polynomial Regression, Ridge Regression, Lasso Regression, Elastic Net Regression.
- Support Vector Regression (SVR),
- Decision Tree Regression, 
- Random Forest Regression, and 
- Gradient Boosting Regression.

In [15]:
models = [LinearRegression(), SVR(),DecisionTreeRegressor(), RandomForestRegressor()]

In [16]:
import warnings
warnings.filterwarnings('ignore')

In [17]:
for model in models :
    
    model.fit(X_train.values, y_train)
    y_hat_train = model.predict(X_train.values)
    y_hat_test = model.predict(X_test.values)
    
    print(f"******************* \33[1m{str(model)}\33[0m *******************\n")
  
    print("\Regression Report : \n")
    print("Mean Squared Error : ",round(mean_squared_error(y_test,y_hat_test),2))
    print("RMSE : ",round(np.sqrt(mean_squared_error(y_test,y_hat_test)),2))
    print("R2 Square : ",r2_score(y_test,y_hat_test))
    
  
    print()

******************* [1mLinearRegression()[0m *******************

\Regression Report : 

Mean Squared Error :  4255902381.59
RMSE :  65237.28
R2 Square :  0.6302514810804443

******************* [1mSVR()[0m *******************

\Regression Report : 

Mean Squared Error :  11757967091.25
RMSE :  108434.16
R2 Square :  -0.02152035636422478

******************* [1mDecisionTreeRegressor()[0m *******************

\Regression Report : 

Mean Squared Error :  4976860866.92
RMSE :  70546.87
R2 Square :  0.5676153329150726

******************* [1mRandomForestRegressor()[0m *******************

\Regression Report : 

Mean Squared Error :  2385601395.65
RMSE :  48842.62
R2 Square :  0.7927413498513676

