In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

from sklearn.pipeline import Pipeline                
from sklearn.compose import ColumnTransformer

from sklearn.model_selection import train_test_split

## STEP 1 : Import Transforrmed Dataset

In [3]:
houses_trans = pd.read_csv('S4_Part3_Trimming_DataPreprocessing2.csv' ,index_col = 0)
houses_trans.head()

Unnamed: 0,crime_rate,resid_area,air_qual,room_num,age,teachers,poor_prop,dist,airport_NO,airport_YES,waterbody_Lake,waterbody_Lake and River,waterbody_None,waterbody_River,price
0,-0.730598,-0.59728,-0.782594,-0.6269,-0.243555,-0.080221,0.431855,0.074131,1.0,0.0,0.0,1.0,0.0,0.0,20.7
1,1.93435,1.040462,1.325589,-2.629425,1.138512,-0.831225,1.662088,-1.185532,0.0,1.0,0.0,0.0,0.0,1.0,10.5
2,1.198306,1.040462,1.950898,-0.299989,0.542608,-0.831225,0.139448,-0.540276,1.0,0.0,0.0,1.0,0.0,0.0,22.7
3,-1.12494,-0.857496,-0.327012,-0.442124,-1.212797,-0.831225,-0.468419,1.137132,0.0,1.0,0.0,0.0,1.0,0.0,21.1
4,1.146823,1.040462,1.441718,-0.53846,0.704148,-0.831225,0.991267,-0.613542,1.0,0.0,0.0,0.0,0.0,1.0,12.7


In [4]:
df = houses_trans.copy()

## STEP 2 : Features and Target Split

In [5]:
X = df.drop('price' ,axis = 1)

In [6]:
y = df['price']

## STEP 3 :  Train - Test Split

In [7]:
X_train ,X_test ,y_train ,y_test = train_test_split(X ,y ,test_size= 0.2 , random_state= 123)

In [8]:
X_train.shape ,y_train.shape

((385, 14), (385,))

In [9]:
X_test.shape , y_test.shape

((97, 14), (97,))

## STEP 4 : Training models

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [11]:
from sklearn.metrics import mean_squared_error,r2_score

### Various Algorithms for Regression Task : 

- Linear Regression, Polynomial Regression, Ridge Regression, Lasso Regression, Elastic Net Regression.
- Support Vector Regression (SVR),
- Decision Tree Regression, 
- Random Forest Regression, and 
- Gradient Boosting Regression.

In [12]:
models = [LinearRegression(), SVR(),DecisionTreeRegressor(), RandomForestRegressor()]

In [13]:
import warnings
warnings.filterwarnings('ignore')

In [14]:
for model in models :
    
    model.fit(X_train.values, y_train)
    y_hat_train = model.predict(X_train.values)
    y_hat_test = model.predict(X_test.values)
    
    print(f"******************* \33[1m{str(model)}\33[0m *******************\n")
  
    print("\Regression Report : \n")
    print("Mean Squared Error : ",round(mean_squared_error(y_test,y_hat_test),2))
    print("RMSE : ",round(np.sqrt(mean_squared_error(y_test,y_hat_test)),2))
    print("R2 Square : ",r2_score(y_test,y_hat_test))
    
  
    print()

******************* [1mLinearRegression()[0m *******************

\Regression Report : 

Mean Squared Error :  24.59
RMSE :  4.96
R2 Square :  0.7767268491055086

******************* [1mSVR()[0m *******************

\Regression Report : 

Mean Squared Error :  42.33
RMSE :  6.51
R2 Square :  0.6156287696532654

******************* [1mDecisionTreeRegressor()[0m *******************

\Regression Report : 

Mean Squared Error :  10.95
RMSE :  3.31
R2 Square :  0.9005742985960872

******************* [1mRandomForestRegressor()[0m *******************

\Regression Report : 

Mean Squared Error :  13.12
RMSE :  3.62
R2 Square :  0.8808256170199379



**Without Outlier Removal**<br>
    ******************* LinearRegression() *******************

    \Regression Report : 

    Mean Squared Error :  23.37
    RMSE :  4.83
    R2 Square :  0.7889297842627835

    ******************* SVR() *******************

    \Regression Report : 

    Mean Squared Error :  83.98
    RMSE :  9.16
    R2 Square :  0.24157755953772664

    ******************* DecisionTreeRegressor() *******************

    \Regression Report : 

    Mean Squared Error :  13.64
    RMSE :  3.69
    R2 Square :  0.8768555514569143

    ******************* RandomForestRegressor() *******************

    \Regression Report : 

    Mean Squared Error :  8.7
    RMSE :  2.95
    R2 Square :  0.9213876567955999

In [15]:
y_test.describe()

count    97.000000
mean     23.705155
std      10.548572
min       8.200000
25%      17.800000
50%      20.600000
75%      27.000000
max      50.000000
Name: price, dtype: float64