In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Lets Load the Boston House Pricing Dataset

In [None]:
from sklearn.datasets import load_boston

In [None]:
boston=load_boston()

## Lets check the description of the datasets

In [None]:
print(boston.DESCR)

In [None]:
print(boston.data)

In [None]:
print(boston.target)

In [None]:
print(boston.feature_names)

 ## Preparing The Dataset

In [None]:
dataset=pd.DataFrame(boston.data,columns=boston.feature_names)

In [None]:
dataset

In [None]:
dataset.head()

In [None]:
dataset.tail()

In [None]:
dataset['Price']=boston.target

In [None]:
dataset.head()

In [None]:
dataset.info()

## Summerizing the stats of data

In [None]:
dataset.describe()

### Check the missing values

In [None]:
dataset.isnull().sum()

### Exploratory Data Analysis
## Correlation

In [None]:
dataset.corr()

In [None]:
import seaborn as sns
sns.pairplot(dataset)

In [None]:
plt.scatter(dataset['CRIM'],dataset['Price'])
plt.xlabel("Crime Rate")
plt.ylabel("Price")

In [None]:
plt.scatter(dataset['RM'],dataset['Price'])
plt.xlabel("AVG Num of Room")
plt.ylabel("Price")

In [None]:
import seaborn as sns
sns.regplot(x="RM",y="Price",data=dataset)

In [None]:
sns.regplot(x="LSTAT",y="Price",data=dataset)

In [None]:
sns.regplot(x="CHAS",y="Price",data=dataset)

In [None]:
sns.regplot(x="PTRATIO",y="Price",data=dataset)

 ## Preparation for creating our Model

### Independent and Dependent Features

In [None]:
x=dataset.iloc[:,:-1]
y=dataset.iloc[:,-1]

In [None]:
x.head()

In [None]:
y

### Train Test Split

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.35,random_state=45)

In [None]:
X_train

In [None]:
X_test

 ## Very Important Step 

### Standarize the dateset

In [None]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()

In [None]:
X_train=scaler.fit_transform(X_train)

In [None]:
X_test=scaler.transform (X_test)

In [None]:
import pickle
pickle.dump(scaler,open('scaling.pkl','wb'))

In [None]:
X_train

In [None]:
X_test

##  Model Training

In [None]:
from sklearn.linear_model import LinearRegression


In [None]:
regression=LinearRegression()

In [None]:
regression.fit(X_train,y_train)

## Print the Coefficients and Intercept

In [None]:
print(regression.coef_)

In [None]:
print(regression.intercept_)

In [None]:
## On which parameters the model has been trained
regression.get_params()

In [None]:
### Prediction of Test data
reg_pred=regression.predict(X_test)

In [None]:
reg_pred

### Assumptions

In [None]:
plt.scatter(y_test,reg_pred)

In [None]:
residuals=y_test-reg_pred

In [None]:
residuals

In [None]:
### Plot the residuals

sns.displot(residuals,kind="kde")

In [None]:
### Scatter plot with respt. to prediction and residuals
## Uniform Distribution
plt.scatter(reg_pred,residuals)

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

print(mean_absolute_error(y_test,reg_pred))
print(mean_squared_error(y_test,reg_pred))
print(np.sqrt(mean_squared_error(y_test,reg_pred)))

# R square and adjusted square

## Formula 

R^2 = 1 - SSR/SST

R^2 = Coefficient of determinaton 
SSR= Sum of Squares of residuals 
SST = Total sum of sqaures

In [None]:
from sklearn.metrics import r2_score
score=r2_score(y_test,reg_pred)
print(score)

## Adjusted R2 = 1 – [(1-R2)*(n-1)/(n-k-1)]

where:

R2: The R2 of the model n: The number of observations k: The number of predictor variables

In [None]:
#display adjusted R-squared
1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)

# New Data Prediction


In [None]:
boston.data[0].reshape(1,-1)

## Transformation of new data
scaler.transform(boston.data[0].reshape(1,-1))

In [None]:
regression.predict(scaler.transform(boston.data[0].reshape(1,-1)))

# Pickling The Model File for Deployment

In [None]:
import pickle

pickle.dump(regression,open('regmodel_opt.pkl','wb'))

pickled_model=pickle.load(open('regmodel_opt.pkl','rb'))

## Prediction

In [None]:
pickled_model.predict(scaler.transform(boston.data[0].reshape(1,-1)))