In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
%matplotlib inline

In [None]:
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'PRICE']
boston_pd = pd.read_csv('../data/housing.csv', header=None, delimiter=r"\s+", names=column_names)
boston_pd.head()

In [None]:
fig = px.histogram(boston_pd, x="PRICE")
fig.show()

In [None]:
corr_mat = boston_pd.corr().round(1)
sns.set(rc={'figure.figsize':(10,8)})
sns.heatmap(data=corr_mat, annot=True, cmap='bwr');

In [None]:
sns.set_style('darkgrid')
sns.set(rc={'figure.figsize':(12,6)})
fig, ax = plt.subplots(ncols=2)
sns.regplot(x='RM', y='PRICE', data=boston_pd, ax=ax[0])
sns.regplot(x='LSTAT', y='PRICE', data=boston_pd, ax=ax[1])

In [None]:
x = boston_pd.drop('PRICE', axis=1)
y = boston_pd['PRICE']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=29)
reg = LinearRegression()
reg.fit(x_train, y_train)

In [None]:
pred_tr = reg.predict(x_train)
pred_test = reg.predict(x_test)
rmse_tr = (np.sqrt(mean_squared_error(y_train, pred_tr)))
rmse_test = (np.sqrt(mean_squared_error(y_test, pred_test)))
plt.scatter(y_test, pred_test)
plt.xlabel("Actual House Prices")
plt.ylabel("Predicted Prices")
plt.title("Real vs Predicted")
plt.plot([0,48], [0,48], 'r')
plt.show()

In [None]:
x = boston_pd.drop(['PRICE', 'LSTAT'], axis=1)
y = boston_pd['PRICE']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=29)
reg = LinearRegression()
reg.fit(x_train, y_train)

In [None]:
pred_tr = reg.predict(x_train)
pred_test = reg.predict(x_test)
rmse_tr = (np.sqrt(mean_squared_error(y_train, pred_tr)))
rmse_test = (np.sqrt(mean_squared_error(y_test, pred_test)))
plt.scatter(y_test, pred_test)
plt.xlabel("Actual House Prices")
plt.ylabel("Predicted Prices")
plt.title("Real vs Predicted")
plt.plot([0,48], [0,48], 'r')
plt.show()