# Importing Necessary Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
%matplotlib inline
warnings.filterwarnings("ignore")

# Importing Dataset into Dataframe

In [None]:
data=pd.read_csv("./HousingData.csv")

In [None]:
data

# Data Analysis

In [None]:
data.shape

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
data.isnull().sum()

# Data Preprocessing

In [None]:
data.fillna(value=0, inplace=True)

In [None]:
data.isnull().sum()

In [None]:
data

# Exploratory Data Analysis

## Create a Boxplot To check the Presence of outliers

In [None]:
fig, ax = plt.subplots(ncols=7, nrows=2, figsize=(15, 10))
index = 0
ax = ax.flatten()

for col, value in data.items():
    sns.boxplot(y=col, data=data, ax=ax[index], color="#A259FF")
    index+=1
plt.tight_layout(pad=0.5,w_pad=0.7,h_pad=5.0)

## Create a Distplot to check the Distribution of Data  

In [None]:
fig, ax = plt.subplots(ncols=7, nrows=2, figsize=(15, 10))
index = 0
ax = ax.flatten()

for col, value in data.items():
    sns.distplot(value, ax=ax[index], color="#A259FF")
    index += 1
plt.tight_layout(pad=0.5, w_pad=0.7, h_pad=5.0)

## Min-Max Normalization

In [None]:
cols = ["CRIM", "ZN", "TAX", "B"]
for col in cols:
    # find minimum and maximum of that column
    minimum = min(data[col])
    maximum = max(data[col])
    data[col] = (data[col] - minimum) / (maximum - minimum)

In [None]:
fig, ax = plt.subplots(ncols=7, nrows=2, figsize=(15, 10))
index = 0
ax = ax.flatten()

for col, value in data.items():
    sns.distplot(value, ax=ax[index], color="#A259FF")
    index += 1
plt.tight_layout(pad=0.5, w_pad=0.7, h_pad=5.0)

## Standardization

In [None]:
from sklearn import preprocessing
scalar = preprocessing.StandardScaler()

# fit our data
scaled_cols = scalar.fit_transform(data[cols])
scaled_cols = pd.DataFrame(scaled_cols, columns=cols)
scaled_cols.head()

In [None]:
for col in cols:
    data[col] = scaled_cols[col]

In [None]:
fig, ax = plt.subplots(ncols=7, nrows=2, figsize=(15, 10))
index = 0
ax = ax.flatten()

for col, value in data.items():
    sns.distplot(value, ax=ax[index], color="#A259FF")
    index += 1
plt.tight_layout(pad=0.5, w_pad=0.7, h_pad=5.0)

## Coorelation Matrix

In [None]:
corr = data.corr()
plt.figure(figsize=(20,10))
sns.heatmap(corr, annot=True, cmap='coolwarm')

In [None]:
fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(20, 10))
ax = ax.flatten()
sns.regplot(y=data['MEDV'], x=data['LSTAT'], color="#A259FF", ax=ax[0])
sns.regplot(y=data['MEDV'], x=data['RM'], color="#A259FF", ax=ax[1])

## Input Split

In [None]:
X = data.drop(columns=['MEDV', 'RAD'], axis=1)
y = data['MEDV']

In [None]:
data.insert(14, "LinearRegression", value=0)
data.insert(15, "DecisionTreeRegressor", value=0)
data.insert(16, "RandomForestRegressor", value=0)
data.insert(17, "ExtraTreesRegressor", value=0)
data.insert(18, "XGBRegressor", value=0)

In [None]:
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error
def train(model, X, y, colnum):
    # train the model
    x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=42)
    model.fit(x_train, y_train)

    # predict the training set
    pred = model.predict(x_test)

    for i in range(0,506,1):
        data.iat[i,colnum]=model.predict([[data.iat[i,0],data.iat[i,1],data.iat[i,2],data.iat[i,3],data.iat[i,4],data.iat[i,5],data.iat[i,6],data.iat[i,7],data.iat[i,9],data.iat[i,10],data.iat[i,11],data.iat[i,12]]])
    
    # perform cross-validation
    cv_score = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=5)
    cv_score = np.abs(np.mean(cv_score))

    print("Model Report")
    print("MSE:",mean_squared_error(y_test, pred))
    print('CV Score:', cv_score)

## Linear Regression Model

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
train(model, X, y, 14)
coef = pd.Series(model.coef_, X.columns).sort_values()
coef.plot(kind='bar', title='Model Coefficients')

## DecisionTreeRegressor

In [None]:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor()
train(model, X, y,15)
coef = pd.Series(model.feature_importances_, X.columns).sort_values(ascending=False)
coef.plot(kind='bar', title='Feature Importance')

## RandomForestRegressor

In [None]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
train(model, X, y,16)
coef = pd.Series(model.feature_importances_, X.columns).sort_values(ascending=False)
coef.plot(kind='bar', title='Feature Importance')

##  ExtraTreesRegressor

In [None]:
from sklearn.ensemble import ExtraTreesRegressor
model = ExtraTreesRegressor()
train(model, X, y,17)
coef = pd.Series(model.feature_importances_, X.columns).sort_values(ascending=False)
coef.plot(kind='bar', title='Feature Importance')

## XGBRegressor

In [None]:
import xgboost as xgb
model = xgb.XGBRegressor()
train(model, X, y, 18)
coef = pd.Series(model.feature_importances_, X.columns).sort_values(ascending=False)
coef.plot(kind='bar', title='Feature Importance')

In [None]:
data