In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn import metrics
from sklearn.datasets import load_boston
%matplotlib inline

##### exploration on basic data information

In [None]:
dataset = load_boston()
dataset["target"] = dataset["target"].reshape(-1,1)
dataset["data"] = np.concatenate((dataset["data"], dataset["target"]),axis=1)
df = pd.DataFrame(dataset["data"])
dataset["feature_names"] = np.append(dataset["feature_names"],"MEDV")
df.columns = dataset["feature_names"]
df

##### Exploratory Data Analysis

In [None]:
sns.set_palette("GnBu_d")
sns.set_style('whitegrid')
#sns.set(font_scale = 2.5)

##### Training and Testing

In [None]:
col = df.columns
x = col[:-1]
X_data = df[x]
y = col[-1]
Y_data = df[y]

In [None]:
x_train,x_test,y_train,y_test = train_test_split(X_data,Y_data, test_size = 0.3, random_state = 1)

In [None]:
f, axes = plt.subplots(1, 4, figsize=(48, 12), sharex=True)
for deg in range(4):
    polynomial_features= PolynomialFeatures(degree=(deg+1))
    x_poly_train = polynomial_features.fit_transform(x_train)
    x_poly_test = polynomial_features.fit_transform(x_test)
    model = LinearRegression()
    model.fit(x_poly_train,y_train)
    prediction = model.predict(x_poly_test)
    
    Data = np.concatenate((np.array(y_test).reshape(-1,1),prediction.reshape(-1,1)),axis=1)
    d = pd.DataFrame(Data,columns = ["y_test","prediction"])
    ax = sns.scatterplot(x="y_test", y="prediction", data=d, ax=axes[deg],label="polynomial = {}".format(deg+1),s =100)

In [None]:
for i in range(4)
    polynomial_features= PolynomialFeatures(degree=(i+1))
    x_poly_train = polynomial_features.fit_transform(x_train)
    x_poly_test = polynomial_features.fit_transform(x_test)
    model = LinearRegression()
    model.fit(x_poly_train,y_train)
    prediction = model.predict(x_poly_test)
    sns.distplot(y_test - prediction,bins=50,label="polynominal = {}".format(i+1))
    plt.legend()
    plt.savefig("polynominal = {}.png".format(i+1))

In [None]:
MAE = []
MSE = []
RMAE = []
for deg in range(4):
    polynomial_features= PolynomialFeatures(degree=(deg+1))
    x_poly_train = polynomial_features.fit_transform(x_train)
    x_poly_test = polynomial_features.fit_transform(x_test)
    model = LinearRegression()
    model.fit(x_poly_train,y_train)
    prediction = model.predict(x_poly_test)
    MAE.append(metrics.mean_absolute_error(y_test,prediction))
    MSE.append(metrics.mean_squared_error(y_test,prediction))
    RMAE.append(np.sqrt(metrics.mean_squared_error(y_test,prediction)))
MAE = np.array(MAE).reshape(-1,1)
MSE = np.array(MSE).reshape(-1,1)
RMAE = np.array(RMAE).reshape(-1,1)
polynomial = np.arange(1,5).reshape(-1,1)
MAE_df = pd.DataFrame(np.concatenate((polynomial,MAE),axis=1),columns=["polynomial","MAE"])
MSE_df = pd.DataFrame(np.concatenate((polynomial,MSE),axis=1),columns=["polynomial","MSE"])
RMAE_df = pd.DataFrame(np.concatenate((polynomial,RMAE),axis=1),columns=["polynomial","RMAE"])

sns.lineplot(x="polynomial", y="RMAE", data=RMAE_df,label="RMAE")
plt.savefig("RMAE.png")
sns.lineplot(x="polynomial", y="MAE", data=RMAE_df,label="MAE")
plt.savefig("MAE.png")
sns.lineplot(x="polynomial", y="MSE", data=RMAE_df,label="MSE")
plt.savefig("MSE.png")

##### showing plots between features and weights

In [None]:
col = df.columns
x = col[:-1]
X_data = df[x]
y = col[-1]
Y_data = df[y]
x_train,x_test,y_train,y_test = train_test_split(X_data,Y_data, test_size = 0.3, random_state = 1)
polynomial_features= PolynomialFeatures(degree=1)
x_poly_train = polynomial_features.fit_transform(x_train)
x_poly_test = polynomial_features.fit_transform(x_test)
model = LinearRegression()
model.fit(x_poly_train,y_train)
coeffecients = pd.DataFrame(model.coef_[:-1], X_data.columns)
coeffecients.columns = ["Coefficient"]
coeffecients