# End-to-End ML Linear Regression Implementation

# Boston House Price Prediction

In [None]:
#Importing the required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

: 

In [None]:
#Loding the dataset
from sklearn.datasets import load_boston

: 

In [None]:
boston=load_boston()

: 

In [None]:
boston.keys()

: 

In [None]:
## Lets check the description of the dataset
print(boston.DESCR)

: 

In [None]:
print(boston.target)

: 

# Data Preparation

In [None]:
dataset=pd.DataFrame(boston.data,columns=boston.feature_names)

: 

In [None]:
dataset.head()

: 

In [None]:
dataset['Price']=boston.target

: 

In [None]:
dataset.head()

: 

In [None]:
dataset.info()

: 

In [None]:
#Statistical Information about the data
dataset.describe()

: 

In [None]:
#Check for missing values
dataset.isnull().sum()

: 

# Exploratory Data Analysis(EDA)

In [None]:
#Correlation
dataset.corr()

: 

In [None]:
sns.heatmap(dataset.corr())

: 

In [None]:
#Plotting scatterplots w.r.t to price and CRIM
plt.scatter(dataset['CRIM'], dataset['Price'])
plt.xlabel('CRIM')
plt.ylabel('Price')

: 

In [None]:
plt.scatter(dataset['RM'], dataset['Price'])
plt.xlabel('RM')
plt.ylabel('Price')

: 

In [None]:
#Let's try to plot the same scatter plot using regplot()
sns.regplot(x='RM', y='Price', data=dataset)

: 

In [None]:
#Plotting regplot() w.r.t price and LSTAT
sns.regplot(x='LSTAT', y='Price', data=dataset)

: 

In [None]:
#Plotting regplot() w.r.t price and PTRATIO
sns.regplot(x='PTRATIO', y='Price', data=dataset)

: 

In [None]:
#Let's try to plot those features which are not at all correlated 
sns.regplot(x='CHAS', y='Price', data=dataset)

: 

In [None]:
#DEpendent and Independent features
X=dataset.iloc[:,:-1]
y=dataset.iloc[:,-1]

: 

In [None]:
X.head()

: 

In [None]:
y

: 

# Train test split

In [None]:
#Train test split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

: 

In [None]:
X_train

: 

In [None]:
X_test

: 

# Standardization

In [None]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()

: 

In [None]:
X_train_scaler=scaler.fit_transform(X_train)

: 

In [None]:
X_test=scaler.transform(X_test)

: 

In [None]:
X_train_scaler

: 

In [None]:
X_test

: 

# Model Training 

In [None]:
from sklearn.linear_model import LinearRegression
regression=LinearRegression()

: 

In [None]:
regression.fit(X_train_scaler,y_train)

: 

In [None]:
#Print the coefficients and intercept
regression.coef_

: 

In [None]:
print(regression.intercept_)

: 

In [None]:
regression.get_params()

: 

In [None]:
#Prediction with test data
reg_pred=regression.predict(X_test)

: 

In [None]:
reg_pred

: 

In [None]:
#Plotting a scatter plot w.r.t prediction
plt.scatter(y_test,reg_pred)

: 

In [None]:
#Residuals
residuals=y_test - reg_pred
residuals

: 

In [None]:
#Plot the residuals
sns.displot(residuals, kind="kde")

: 

In [None]:
#Plot a scatter plot w.r.t reg_pred and residuals
plt.scatter(reg_pred,residuals)

: 

In [None]:
#Performance metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

: 

In [None]:
print(mean_absolute_error(y_test,reg_pred))
print(mean_squared_error(y_test,reg_pred))
print(np.sqrt(mean_squared_error(y_test,reg_pred)))

: 

In [None]:
score=r2_score(y_test,reg_pred)
print(score)

: 

In [None]:
#display adjusted R-squared
1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)

: 

# New Data Prediction

In [None]:
boston.data[0].reshape(1,-1)

: 

In [None]:
##transformation of new data
scaler.transform(boston.data[0].reshape(1,-1))

: 

In [None]:
regression.predict(scaler.transform(boston.data[0].reshape(1,-1)))

: 

# Pickling The Model file For Deployment

In [None]:
import pickle

: 

In [None]:
pickle.dump(regression, open('regmodel.pkl','wb'))

: 

In [None]:
pickled_model=pickle.load(open('regmodel.pkl','rb'))

: 

In [None]:
## Prediction
pickled_model.predict(scaler.transform(boston.data[0].reshape(1,-1)))

: 

: 