In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb


In [None]:
house_df=pd.read_csv("/content/Housing.csv")

# New Section

In [None]:
house_df.head()

In [None]:
house_df.shape

In [None]:
house_df.info()

In [None]:
house_df.isnull().sum()

In [None]:
house_df.describe()

In [None]:
sb.pairplot(house_df)
plt.show()

In [None]:
plot1 = sb.histplot(house_df['price'])
plt.title('Price')
plt.show()

plot2 = sb.histplot(house_df['area'])
plt.title('Area')
plt.show()

plot3 = sb.histplot(house_df['bedrooms'])
plt.title('Bedrooms')
plt.show()

plot4 = sb.histplot(house_df['bathrooms'])
plt.title('Bathrooms')
plt.show()

plot5 = sb.histplot(house_df['stories'])
plt.title('Stories')
plt.show()

plot6 = sb.histplot(house_df['parking'])
plt.title('Parking')
plt.show()

In [None]:
plt.figure(figsize=(20, 12))
plt.subplot(2,3,1)
sb.boxplot(x = 'mainroad', y = 'price', data = house_df)
plt.subplot(2,3,2)
sb.boxplot(x = 'guestroom', y = 'price', data = house_df)
plt.subplot(2,3,3)
sb.boxplot(x = 'basement', y = 'price', data = house_df)
plt.subplot(2,3,4)
sb.boxplot(x = 'hotwaterheating', y = 'price', data = house_df)
plt.subplot(2,3,5)
sb.boxplot(x = 'airconditioning', y = 'price', data = house_df)
plt.subplot(2,3,6)
sb.boxplot(x = 'furnishingstatus', y = 'price', data = house_df)
plt.show()

In [None]:
sb.relplot(
    data=house_df, kind="line",
    x="furnishingstatus", y="price", hue="airconditioning",
    dashes=False, markers=True,
)

In [None]:
categorical_list =  ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

def binary_map(x):
    return x.map({'yes': 1, "no": 0})
house_df[categorical_list] = house_df[categorical_list].apply(binary_map)

In [None]:
house_df.head()

In [None]:
three_list = pd.get_dummies(house_df['furnishingstatus'])

In [None]:
three_list.head()

In [None]:
three_list = pd.get_dummies(house_df['furnishingstatus'], drop_first = True)

In [None]:
house_df=pd.concat([house_df, three_list], axis = 1)

In [None]:
house_df.head()

In [None]:
house_df.drop(['furnishingstatus'], axis = 1, inplace = True)

In [None]:
house_df.head

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [None]:
number_vars = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']

house_df[number_vars] = scaler.fit_transform(house_df[number_vars])

In [None]:
house_df

In [None]:
X = house_df.drop("price", axis=1)
y = house_df["price"]

In [None]:
X

In [None]:
y

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

from sklearn.linear_model import LinearRegression
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LinearRegression

# Create a Linear regressor
model= LinearRegression()

# Train the model using the training sets
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_train)

In [None]:
from sklearn import metrics
print('R^2:',metrics.r2_score(y_train, y_pred))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train, y_pred)))

In [None]:
plt.scatter(y_train, y_pred)
plt.xlabel("Prices")
plt.ylabel("Predicted prices")
plt.title("Prices vs Predicted prices")
plt.show()

In [None]:
plt.scatter(y_pred,y_train-y_pred)
plt.title("Predicted vs residuals")
plt.xlabel("Predicted")
plt.ylabel("Residuals")
plt.show()

In [None]:
sb.distplot(y_train-y_pred)
plt.title("Histogram of Residuals")
plt.xlabel("Residuals")
plt.ylabel("Frequency")
plt.show()

In [None]:
y_test_pred = model.predict(X_test)
print('R^2:',metrics.r2_score(y_test, y_test_pred))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))


In [None]:
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
ridge = Ridge()
ridge.fit(X_train, y_train)
predictions = ridge.predict(X_train)

In [None]:
print('R^2:',metrics.r2_score(y_test, predictions))
print('MAE:',metrics.mean_absolute_error(y_test, predictions))
print('MSE:',metrics.mean_squared_error(y_test, predictions))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, predictions)))

In [None]:
plt.scatter(y_test,predictions)
plt.xlabel("Prices")
plt.ylabel("Predicted prices")
plt.title("Prices vs Predicted prices")
plt.show()

In [None]:
sb.distplot(y_test-predictions)
plt.title("Histogram of Residuals")
plt.xlabel("Residuals")
plt.ylabel("Frequency")
plt.show()

In [None]:
lasso = Lasso()
lasso.fit(X_train, y_train)
predictions = lasso.predict(X_test)

In [None]:
print('R^2:',metrics.r2_score(y_test, predictions))
print('MAE:',metrics.mean_absolute_error(y_test, predictions))
print('MSE:',metrics.mean_squared_error(y_test, predictions))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, predictions)))

In [None]:
plt.scatter(y_test,predictions)
plt.xlabel("Prices")
plt.ylabel("Predicted prices")
plt.title("Prices vs Predicted prices")
plt.show()

In [None]:
from sklearn.ensemble import RandomForestRegressor
reg = RandomForestRegressor()
reg.fit(X_train, y_train)

In [None]:
y_pred = reg.predict(X_train)

In [None]:
print('R^2:',metrics.r2_score(y_train, y_pred))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train,y_pred)))

In [None]:
plt.scatter(y_train,y_pred)
plt.xlabel("Prices")
plt.ylabel("Predicted prices")
plt.title("Prices vs Predicted prices")
plt.show()

In [None]:
plt.scatter(y_pred,y_train-y_pred)
plt.title("Predicted vs residuals")
plt.xlabel("Predicted")
plt.ylabel("Residuals")
plt.show()

In [None]:
from sklearn import svm
from sklearn.svm import SVR
svr = SVR(C=100000)
svr.fit(X_train, y_train)
predictions = svr.predict(X_test)

In [None]:
print('R^2:',metrics.r2_score(y_test, predictions))
print('MAE:',metrics.mean_absolute_error(y_test,predictions))
print('MSE:',metrics.mean_squared_error(y_test, predictions))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))

In [None]:
plt.scatter(y_test, predictions)
plt.xlabel("Prices")
plt.ylabel("Predicted prices")
plt.title("Prices vs Predicted prices")
plt.show()

In [None]:
from xgboost import XGBRegressor
reg = XGBRegressor()
reg.fit(X_train, y_train)

In [None]:
y_pred = reg.predict(X_train)

In [None]:
print('R^2:',metrics.r2_score(y_train, y_pred))
print('MAE:',metrics.mean_absolute_error(y_train,y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train,y_pred)))

In [None]:
plt.scatter(y_train, y_pred)
plt.xlabel("Prices")
plt.ylabel("Predicted prices")
plt.title("Prices vs Predicted prices")
plt.show()

In [None]:
plt.scatter(y_pred,y_train-y_pred)
plt.title("Predicted vs residuals")
plt.xlabel("Predicted")
plt.ylabel("Residuals")
plt.show()

In [None]:
y_test_pred = reg.predict(X_test)

In [None]:
print('R^2:',metrics.r2_score(y_test, y_test_pred))
print('MAE:',metrics.mean_absolute_error(y_test,y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,y_test_pred)))