# Regression Regression using Machine Learning

### In this lab, we will use multiple regression algorithm on the same dataset to see the output produced by each algorithm and choose the appropriate one for our use.

Initialise and import packages/libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

The Dataset used for the purpose of Regression model is SalePrice data of House and the respective details.

In [None]:
train = pd.read_csv('hp_train.csv')
test = pd.read_csv('hp_test.csv')

In [None]:
train.head()

We are not performing any Pre-Processing of the data,as already did in Classification and since the dataset doesnt have any missing values or encoding to be performed, hence we are training the model directly

### Training the Regression model

1. LinearRegession

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lm = LinearRegression()

In [None]:
X_train = train.iloc[:,1:]
X_test = test.iloc[:,1:]
y_train = train.iloc[:,0]
y_test = test.iloc[:,0]

In [None]:
y_pred_lm = lm.fit(X_train,y_train).predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
mean_squared_error(y_test,y_pred_lm)

In [None]:
np.sqrt(mean_squared_error(y_test,y_pred_lm))

In [None]:
mean_absolute_error(y_test,y_pred_lm)

In [None]:
r2_score(y_test,y_pred_lm)

In [None]:
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet

In [None]:
rd = Ridge(alpha = 1.0)
la = Lasso(alpha = 1.0)
el = ElasticNet(alpha=1.0, l1_ratio=0.5)

In [None]:
y_pred_rd = rd.fit(X_train,y_train).predict(X_test)
y_pred_la = la.fit(X_train,y_train).predict(X_test)
y_pred_el = el.fit(X_train,y_train).predict(X_test)

In [None]:
r2_score(y_test,y_pred_rd)

In [None]:
r2_score(y_test,y_pred_la)

In [None]:
r2_score(y_test,y_pred_el)

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
np.mean(cross_val_score(lm,X_train,y_train,scoring="r2",cv=10))

In [None]:
np.mean(cross_val_score(rd,X_train,y_train,scoring="r2",cv=10))

In [None]:
np.mean(cross_val_score(la,X_train,y_train,scoring="r2",cv=10))

In [None]:
np.mean(cross_val_score(el,X_train,y_train,scoring="r2",cv=10))

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
pf = PolynomialFeatures(degree=2, include_bias=False)
X_train_pf = pf.fit_transform(X_train)

In [None]:
cross_val_score(lm, X_train_pf,y_train,scoring="r2",cv=10)

In [None]:
from sklearn.preprocessing import scale

In [None]:
X_train_sc = scale(X_train)
X_test_sc  = scale(X_test)

In [None]:
y_pred_rd_sc = rd.fit(X_train_sc,y_train).predict(X_test_sc)

In [None]:
r2_score(y_test,y_pred_rd_sc)

2. Decision Tree Regressor

In [None]:
from sklearn.tree import DecisionTreeRegressor
dt = DecisionTreeRegressor(max_depth =2)
y_pred_dt= dt.fit(X_train,y_train).predict(X_test)
r2_score(y_test,y_pred_dt)

3. Gradient Boosting Regressor

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
gt = GradientBoostingRegressor(max_depth =2)
y_pred_gt= gt.fit(X_train,y_train).predict(X_test)
r2_score(y_test,y_pred_gt)

4. AdaBoost Regressor

In [None]:
from sklearn.ensemble import AdaBoostRegressor
ad = AdaBoostRegressor()
y_pred_ad= ad.fit(X_train,y_train).predict(X_test)
r2_score(y_test,y_pred_ad)

5. Random Forest Regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor
rfc  = RandomForestRegressor()

In [None]:
y_pred_rf= rfc.fit(X_train,y_train).predict(X_test)
r2_score(y_test,y_pred_rf)

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = [
    
    {'n_estimators': [10,15],'max_features' : [2,4]}
]

In [None]:
gs = GridSearchCV(rfc,param_grid, scoring='neg_mean_squared_error',cv=5 )

In [None]:
gs.fit(X_train,y_train)

In [None]:
gs.best_params_

In [None]:
rfc  = RandomForestRegressor(n_estimators = 15, max_features=4)

In [None]:
from sklearn.model_selection import RandomizedSearchCV

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=2)

In [None]:
pca.fit_transform(X_train)

In [None]:
pca.explained_variance_ratio_

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
sc = StandardScaler()

In [None]:
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.fit_transform(X_test)

In [None]:
X_train_pca= pca.fit_transform(X_train_sc)
X_test_pca= pca.fit_transform(X_test_sc)

In [None]:
y_pred_lm = lm.fit(X_train_pca,y_train).predict(X_test_pca)

In [None]:
r2_score(y_test,y_pred_lm)

In [None]:
pca1 = PCA(n_components=0.95)

In [None]:
pca1.fit_transform(X_train)

In [None]:
pca1.explained_variance_ratio_

In [None]:
from sklearn.cluster import KMeans

In [None]:
clu_er = []
for i in range(1,9):
    cluster = KMeans(i)
    cluster.fit(X_train_sc)
    clu_er.append(cluster.inertia_)

In [None]:
cl = pd.DataFrame({"Cluster": range(1,9), "Cluster_Inertia": clu_er})

In [None]:
cl

In [None]:
plt.plot(cl['Cluster'],cl['Cluster_Inertia'] )