In [77]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [78]:
import warnings
warnings.filterwarnings('ignore')

In [79]:
df = pd.read_csv('Housing.csv')

In [80]:
## List of Variables to map
varlist = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
 
##Defining the map function

def binary_map(x):
    return x.map({'yes' : 1, 'no' : 0})

##Applying the function to the housing list

df[varlist] = df[varlist].apply(binary_map)

In [81]:
## Get the dummy variables for the featur 'furnishingstatus' and store it in a new variable - 'status'
status = pd.get_dummies(df['furnishingstatus'])

In [82]:
## Let's drop the first column from status df using 'drop_first = True'
status = pd.get_dummies(df['furnishingstatus'], drop_first = True)

In [83]:
df = pd.concat([df, status], axis = 1)

In [84]:
# Drop 'furnishingstatus' as we habe created the dummies for it
df.drop(['furnishingstatus'], axis = 1, inplace = True)

In [85]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,semi-furnished,unfurnished
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0,0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0,0
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1,0
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0,0
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0,0


In [86]:
y  = df.pop('price')
X= df

In [87]:
from sklearn.model_selection import train_test_split

In [88]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.7, test_size = 0.3, random_state = 100)

In [89]:
num_vars = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']

In [90]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [91]:
scalar = MinMaxScaler()

In [92]:
X_train[num_vars] = scalar.fit_transform(X_train[num_vars])  ## scaling X_train

In [93]:
X_test[num_vars] = scalar.transform(X_test[num_vars]) ## scaling test

In [94]:
y_train = scalar.fit_transform(y_train.values.reshape(-1, 1))

In [95]:
y_test = scalar.transform(y_test.values.reshape(-1, 1))

In [96]:
from sklearn.linear_model import LinearRegression

In [97]:
lr = LinearRegression()

In [98]:
lr.fit(X_train,y_train)

LinearRegression()

In [99]:
y_pred = lr.predict(X_test)

In [100]:
y_pred

array([[0.20724668],
       [0.42521187],
       [0.34390078],
       [0.30965517],
       [0.26679159],
       [0.20081042],
       [0.54605698],
       [0.34459482],
       [0.15132084],
       [0.33824567],
       [0.10885184],
       [0.38778083],
       [0.24973636],
       [0.48168266],
       [0.34081042],
       [0.4416681 ],
       [0.55569757],
       [0.22883672],
       [0.37058649],
       [0.27048717],
       [0.21622618],
       [0.08541382],
       [0.23984756],
       [0.16672718],
       [0.08343775],
       [0.143553  ],
       [0.29819593],
       [0.41218154],
       [0.4093423 ],
       [0.28367798],
       [0.12030014],
       [0.24278338],
       [0.10898889],
       [0.08625354],
       [0.27681818],
       [0.24897191],
       [0.24859295],
       [0.13302812],
       [0.13846168],
       [0.21930575],
       [0.11751055],
       [0.36890977],
       [0.37424519],
       [0.10598081],
       [0.07806882],
       [0.2499371 ],
       [0.23687591],
       [0.289

In [101]:
y_actual = y_test

In [102]:
y_actual

array([[0.22969697],
       [0.48484848],
       [0.3030303 ],
       [0.24242424],
       [0.22787879],
       [0.25454545],
       [0.58787879],
       [0.3030303 ],
       [0.08484848],
       [0.36363636],
       [0.16666667],
       [0.42424242],
       [0.05454545],
       [0.3630303 ],
       [0.33333333],
       [0.27878788],
       [0.90606061],
       [0.22424242],
       [0.53030303],
       [0.36363636],
       [0.12060606],
       [0.19393939],
       [0.23575758],
       [0.0969697 ],
       [0.14848485],
       [0.07818182],
       [0.23636364],
       [0.38484848],
       [0.18787879],
       [0.63636364],
       [0.07878788],
       [0.15151515],
       [0.13939394],
       [0.01212121],
       [0.26666667],
       [0.21212121],
       [0.21212121],
       [0.09090909],
       [0.16666667],
       [0.19545455],
       [0.28484848],
       [0.32727273],
       [0.33333333],
       [0.03030303],
       [0.07878788],
       [0.25454545],
       [0.21212121],
       [0.072

In [117]:
train_score_lr = lr.score(X_train, y_train)
test_score_lr = lr.score(X_test, y_test)

In [118]:
print("The Train score for Linear Regresion Model is : {}".format(train_score_lr))
print("The test score for LinearRegression Model is : {}".format(test_score_lr))

The Train score for Linear Regresion Model is : 0.6814893088451202
The test score for LinearRegression Model is : 0.672958274345992


# Alpha

# Ridge

In [119]:
from sklearn.linear_model import Ridge

In [120]:
rid = Ridge(alpha = 0.0001)

In [121]:
rid.fit(X_train,y_train)

Ridge(alpha=0.0001)

In [122]:
train_score_rid = rid.score(X_train, y_train)
test_score_rid = rid.score(X_test, y_test)

In [123]:
print("The Train score for Ridge Model is : {}".format(train_score_rid))
print("The test score for Ridge Model is : {}".format(test_score_rid))

The Train score for Ridge Model is : 0.6814893088383123
The test score for Ridge Model is : 0.6729583913175085


# Lasso

In [124]:
from sklearn.linear_model import Lasso

In [125]:
las = Ridge(alpha = 0.0001)

In [126]:
las.fit(X_train,y_train)

Ridge(alpha=0.0001)

In [127]:
train_score_las = las.score(X_train, y_train)
test_score_las = las.score(X_test, y_test)

In [128]:
print("The Train score for Lasso Model is : {}".format(train_score_las))
print("The test score for Lasso Model is : {}".format(test_score_las))

The Train score for Lasso Model is : 0.6814893088383123
The test score for Lasso Model is : 0.6729583913175085
