# Assignment-6,Regression

Dataset-Housing sales prices prediction dataset

1)	Read dataset and perform necessary data cleaning

2)	Split the data into test/train using 80:20 proportion

3)	Perform pre-processing and EDA on the train data

4)	Use feature selection techniques like correlation analysis / embedded method to identify the important features

5)	Build multivariate linear regression model on train dataset using the important features identified in above step

6)	Capture the training performance of the model using RMSE score

7)	Print out the coefficient of the features and explain how to interpret the model using top 2 features (features with highest positive / negative coefficient)

8)	Build residual plot on train dataset-check if there is any pattern as far as errors are concerned

9)	Apply pre-processing and feature transformation on test data (as done on train dataset)

10)	Capture the performance of the model on test data set using RMSE score

11)	Based on train and test performance, comment on model’s bias and variance prediction error

12)	Can you try and improve RMSE score by iterative process we discussed in previous sessions?
	 (e.g. Scaling/Hot encoding/Feature engineering/Regularization)
     
Note-Make sure whatever feature scaling you do on train dataset, use same scaler to apply transformation on the test dataset.

In [None]:
!pip install feature_engine

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as snb
import scipy.stats as stats
import statsmodels.api as sm

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


from feature_engine.missing_data_imputers import MeanMedianImputer, CategoricalVariableImputer, AddMissingIndicator
from feature_engine.categorical_encoders import OneHotCategoricalEncoder, RareLabelCategoricalEncoder,OrdinalCategoricalEncoder
from feature_engine.outlier_removers import OutlierTrimmer
from feature_engine import variable_transformers
from feature_engine.discretisers import EqualWidthDiscretiser

In [None]:
#Getting data 

train = pd.read_csv("../input/usa-housing-dataset/housing_train.csv")
test = pd.read_csv("../input/usa-housing-dataset/housing_test.csv")
train.head(1)

In [None]:
#removing ID Column
train = train.drop(columns=["Id"])
test = test.drop(columns=["Id"])
train.head(1)

# **Defining the functions for various usage**

In [None]:
# Function for plot and check Data Distribution

def diagnostic_plots(df, variable):
    for i in variable: 
        plt.figure(figsize=(16, 4))

        # histogram
        plt.subplot(1, 3, 1)
    
        snb.distplot(df[i], bins=30)
        plt.title('Histogram')

        # Q-Q plot
        plt.subplot(1, 3, 2)
        stats.probplot(df[i], dist="norm", plot=plt)
        plt.ylabel('RM quantiles')

        # boxplot
        plt.subplot(1, 3, 3)
        snb.boxplot(y=df[i])
        plt.title('Boxplot')
        
        print("\n***********{}**********\n".format(i))
        print("**Skewness: ",df[i].skew())
        plt.show()

In [None]:
# Function to plot relationship between Categorical Variable and Target
#~ Function to plot relationship between Categorical Variable and Target

def explore_relation_catTotar(dataX,y,col_category):
    temp = dataX.copy()
    temp["target"]=y
    for i in col_category:
        fig = plt.figure()
        fig = temp.groupby([i])["target"].mean().plot()
        fig.set_title('Relationship between {} and Item_Outlet_Sales'.format(i))
        fig.set_ylabel('Mean Item_Outlet_Sales')
        plt.show()

In [None]:
# Basic Functions for Outlier Detection - Skewed Distribution & Gaussian Distribution

#Skewed Distribution

def skewed_outlier(df, variable):
    
    IQR = df[variable].quantile(0.75) - df[variable].quantile(0.25)

    lower_boundary = df[variable].quantile(0.25) - (IQR * 3)
    upper_boundary = df[variable].quantile(0.75) + (IQR * 3)

    return upper_boundary, lower_boundary

def calculating_outlier_skewed(df,variables):
    dic = {}
    for i in variables:
        ub, lb = skewed_outlier(df,i)
        ub_len = len(df[df[i]>ub])
        lb_len = len(df[df[i]<lb])
        per_total = ((ub_len + lb_len)/len(df))*100
        if per_total != 0:
            dic[i]=per_total
    return dic


# Gaussian Distribution

def gaussian_outlier(df,variable):
    
    upper_boundary = df[variable].mean() + 3 * df[variable].std()
    lower_boundary = df[variable].mean() - 3 * df[variable].std()

    return upper_boundary, lower_boundary


def calculating_outlier_gaussian(df,variables):
    dic = {}
    for i in variables:
        ub, lb = gaussian_outlier(df,i)
        ub_len = len(df[df[i]>ub])
        lb_len = len(df[df[i]<lb])
        per_total = ((ub_len + lb_len)/len(df))*100
        if per_total != 0:
            dic[i]=per_total
    return dic

# **Data Exploration & Data Cleaning**

Handling Data/Year Columns

In [None]:
#calculating number of years after house is sold
train['SoldAfterYears'] = train['YrSold'] - train['YearBuilt']
test['SoldAfterYears'] = test['YrSold'] - test['YearBuilt']

#Calculating Number of Months after house was sold
train['SoldAfterMonths'] = ((train['YrSold'] - train['YearBuilt'])*12)  + train['MoSold']
test['SoldAfterMonths'] = ((test['YrSold'] - test['YearBuilt'])*12)  + test['MoSold']


#calculating Renovation after years and difference in the renovations year and build year
train['RenovationCalculation']=train['SoldAfterYears'] - (train['YearRemodAdd'] - train['YearBuilt'])
test['RenovationCalculation']=test['SoldAfterYears'] - (test['YearRemodAdd'] - test['YearBuilt'])



#Adding New Renovation Column
# 0 - No renovation
# 1 - Renovation done
renovation_train = list(map(lambda x, y : 0 if x-y==0 else 1,train['YearRemodAdd'],train['YearBuilt']))
renovation_test = list(map(lambda x, y : 0 if x-y==0 else 1,test['YearRemodAdd'],test['YearBuilt']))
train['Renovation_Done'] = renovation_train
test['Renovation_Done'] = renovation_test


#updating garage status as per the Garage_built_year
train["Garage_status"] = train["GarageYrBlt"] - train['YearBuilt']
train['Garage_status'] = train.Garage_status.fillna(-1)
train['Garage_status'] = train['Garage_status'].replace([i for i in range(-20,0)],'No_Garag')
train['Garage_status'] = train['Garage_status'].replace(0,'Built-in')
train['Garage_status'] = train['Garage_status'].replace([i for i in range (1,210)],'Built-Later')

test["Garage_status"] = test["GarageYrBlt"] - test['YearBuilt']
test['Garage_status'] = test.Garage_status.fillna(-1)
test['Garage_status'] = test['Garage_status'].replace([i for i in range(-20,0)],'No_Garag')
test['Garage_status'] = test['Garage_status'].replace(0,'Built-in')
test['Garage_status'] = test['Garage_status'].replace([i for i in range (1,210)],'Built-Later')

In [None]:
#due to wrong values some of the output getting in -ve. replacing these with 0
train['RenovationCalculation'] = train['RenovationCalculation'].replace([-1],0)
test['RenovationCalculation'] = test['RenovationCalculation'].replace([-1,-2],0)

In [None]:
#Dropping the columns which are related to Date/Years after extracting the usefull informations
train = train.drop(columns=['YearBuilt','YearRemodAdd','YrSold','GarageYrBlt','SoldAfterMonths'],axis=1)
test = test.drop(columns=['YearBuilt','YearRemodAdd','YrSold','GarageYrBlt','SoldAfterMonths'],axis=1)

# Exploring the dataset

In [None]:
#Separating Numerical and Categorical Columns
col_number = [i for i in train.columns if train[i].dtype!='O']
col_category =[ i for i in train.columns if train[i].dtype=='O']

In [None]:
#checking the Numerical columns which has less than 20 unique values
# let's visualise the values of the discrete variables
discrete = []

for var in col_number:
    if len(train[var].unique()) < 20:
        print(var, ' values: ', train[var].unique())
        discrete.append(var)
print('\nThere are {} discrete variables'.format(len(discrete)))

There are total 15 discrete variables out of that most of them are "Ordinal Variables" and some "Nominal Data"

Out of those 15 variables below variables have good correlation with target so we keep them Numerical only and rest we will convert to string/catergorical

'OverallQual','FullBath','TotRmsAbvGrd','GarageCars'

In [None]:
cols_to_remove = ['OverallQual','FullBath','TotRmsAbvGrd','GarageCars']
for i in cols_to_remove:
    discrete.remove(i)

In [None]:
#Convert rest of discrete variables into Categorical.
train[discrete] = train[discrete].astype(str)
test[discrete] = test[discrete].astype(str)

Below columns has vary low correlation with the target. Removing them..

In [None]:
train[['1stFlrSF','BsmtFinSF2','LowQualFinSF','3SsnPorch','MiscVal','EnclosedPorch','ScreenPorch','SalePrice']].corr()

In [None]:
train = train.drop(columns=['1stFlrSF','BsmtFinSF2','LowQualFinSF','3SsnPorch','MiscVal','EnclosedPorch','ScreenPorch'],axis=1)
test = test.drop(columns=['1stFlrSF','BsmtFinSF2','LowQualFinSF','3SsnPorch','MiscVal','EnclosedPorch','ScreenPorch'],axis=1)

#  Analysing and Removing Outlier

In [None]:
#Separating Numerical and Categorical Columns
col_number = [i for i in train.columns if train[i].dtype!='O']
col_category =[ i for i in train.columns if train[i].dtype=='O']

In [None]:
discrete1 = []

for var in col_number:
    if len(train[var].unique()) < 20:
        print(var, ' values: ', train[var].unique())
        discrete1.append(var)
print()
print('There are {} discrete variables'.format(len(discrete1)))

Columns will not be considered for outlier removal = ['OverallQual', 'FullBath', 'TotRmsAbvGrd', 'GarageCars']

In [None]:
#removing them from separated col_number list

for i in discrete1:
    col_number.remove(i)

In [None]:
#As per the skewness, seperating the "Skewed Features" & "Gaussian Features"
skewnewss = dict(train[col_number].skew())
gaussian_features =[]
skewed_features =[]
for i,j in skewnewss.items():
    if (skewnewss[i]<0.75) and (skewnewss[i]> -0.75):
        gaussian_features.append(i)
    else:
        skewed_features.append(i)

In [None]:
#calculating the outlier present in each column in (%) ~ functions are defined above
gaussian_outlier_percentage = calculating_outlier_gaussian(train,gaussian_features)
skewed_outlier_percentage = calculating_outlier_skewed(train,skewed_features)

In [None]:
#getting all those column names in the list
skewed_outlier_cols_list = list(skewed_outlier_percentage.keys())
gaussian_outlier_cols_list = list(gaussian_outlier_percentage.keys())

Columns 'LotFrontage' and 'MasVnrArea' has got missing data. We will handle the outlier for this later

In [None]:
for i in ['LotFrontage','MasVnrArea']:
    skewed_outlier_cols_list.remove(i)

In [None]:
#before removing outliers
train.shape

In [None]:
#Creating Pipeline to remove the outliers

outlier_trimmer_pipe = Pipeline(steps=[
    ('Gaussian_Outliers',OutlierTrimmer(distribution='gaussian',tail='both',
                                       fold=3, variables=gaussian_outlier_cols_list)),
    ("Skewed_Outlier",OutlierTrimmer(distribution='skewed',tail='both',
                                    fold=3, variables=skewed_outlier_cols_list))
])

outlier_trimmer_pipe.fit(train)
train = outlier_trimmer_pipe.transform(train)

In [None]:
train.shape

We have removed 67 rows from the dataset in which outlier were present. THis has helped to increase the correlation of few of the columns as well with target ex - 'LoatArea

# Splitting the data into train and test

In [None]:
use_cols=list(train.columns)
use_cols.remove("SalePrice")

X_train, X_test, y_train, y_test = train_test_split(train[use_cols],train["SalePrice"],
                                                    test_size=0.25,random_state=2)

X_train.shape, X_test.shape

# Missing Data Imputation

In [None]:
#finding those variable which has missing values 
missing_data_var_Xtrain = [c for c in X_train.columns if X_train[c].isnull().mean() != 0]

In [None]:
#Variable "GarageCars" from test set is not matching X_train
test['GarageCars'] = test['GarageCars'].replace(['nan'],'0')
test['GarageCars'] = test['GarageCars'].replace(['5.0'],'4')
test['GarageCars'] = test['GarageCars'].replace(['0.0'],'0')
test['GarageCars'] = test['GarageCars'].replace(['1.0'],'1')
test['GarageCars'] = test['GarageCars'].replace(['2.0'],'2')
test['GarageCars'] = test['GarageCars'].replace(['3.0'],'3')
test['GarageCars'] = test['GarageCars'].replace(['4.0'],'4')

In [None]:
#Separating Numerical and Categorical Columns from X_train missing_values columns
missing_number = [i for i in missing_data_var_Xtrain if X_train[i].dtype!='O']
missing_category =[ i for i in missing_data_var_Xtrain if X_train[i].dtype=='O']

In [None]:
missing_number

In [None]:
num_impute_pipeline = Pipeline(steps=[
    ("Missing Indicator",AddMissingIndicator(variables=["LotFrontage"])), #Addming missing indicator
    ("Median_imputation",MeanMedianImputer(imputation_method="median",variables=['LotFrontage',"MasVnrArea"]))
])

num_impute_pipeline.fit(X_train)
X_train = num_impute_pipeline.transform(X_train)
X_test = num_impute_pipeline.transform(X_test)
test = num_impute_pipeline.transform(test)

Categorical variables which actually has missing data -

'MasVnrType' & ''Electrical'

Categorical variables which actually has missing Labels-

'Alley','BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinType2','FireplaceQu','GarageType','GarageFinish','GarageQual','GarageCond','PoolQC','Fence', 'MiscFeature'

In [None]:
cat_missing_data = ['MasVnrType','Electrical']
cat_missing_labels = ['Alley','BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinType2',
                  'FireplaceQu','GarageType','GarageFinish','GarageQual','GarageCond','PoolQC',
                  'Fence', 'MiscFeature']

In [None]:
#creating a Pipeline to impute the missing_values in categorical variables
category_missing_pipe = Pipeline(steps=[
    ('Frequent_category',CategoricalVariableImputer(imputation_method='frequent',
                                                   variables=cat_missing_data)),
    ('Missing_Labels',CategoricalVariableImputer(imputation_method='missing',
                                                variables=cat_missing_labels))
])


#imputing the missing value
category_missing_pipe.fit(X_train)
X_train = category_missing_pipe.transform(X_train)
X_test = category_missing_pipe.transform(X_test)
test = category_missing_pipe.transform(test)

In [None]:
#Checking if there are any missing date in Test Set now
missing_data_var_test = [c for c in test.columns if test[c].isnull().mean() != 0]
test[missing_data_var_test].isnull().mean()*100

In [None]:
#Separating Numerical and Categorical Columns from test missing_values columns
test_missing_number = [i for i in missing_data_var_test if test[i].dtype!='O']
test_missing_category =[ i for i in missing_data_var_test if test[i].dtype=='O']

In [None]:
test[test_missing_number].isnull().mean()

In [None]:
test_imputer_remaining = MeanMedianImputer(imputation_method="median",
                                          variables=test_missing_number)

test_imputer_remaining.fit(X_train)
test = test_imputer_remaining.transform(test)

In [None]:
test_imputer_cat = CategoricalVariableImputer(imputation_method='frequent',
                                             variables=test_missing_category)

test_imputer_cat.fit(X_train)
test = test_imputer_cat.transform(test)

# **Collecting Continuous Variables for further transformation**

In [None]:
#Separating Numerical and Categorical Columns
col_number = [i for i in X_train.columns if X_train[i].dtype!='O']
col_category =[ i for i in X_train.columns if X_train[i].dtype=='O']

In [None]:
X_train[col_number].describe().transpose()

In [None]:
log_transform = ['LotFrontage','LotArea','GrLivArea'] #does not have 0
box_cox_transform = ['MasVnrArea','BsmtFinSF1','BsmtUnfSF','TotalBsmtSF',
                     '2ndFlrSF','GarageArea','WoodDeckSF','OpenPorchSF']
avoid = ['OverallQual','FullBath','TotRmsAbvGrd','GarageCars','LotFrontage_na']
scalar_PCA = []

for i in col_number:
    if i not in avoid:
        scalar_PCA.append(i)

**Categorical Data Encoding**

Variables 'PoolArea','MSSubClass' has low cardinality & different values. We will use Discretiser on them.

In [None]:
X_train[['PoolArea','MSSubClass']]=X_train[['PoolArea','MSSubClass']].astype(int)
X_test[['PoolArea','MSSubClass']]=X_test[['PoolArea','MSSubClass']].astype(int)
test[['PoolArea','MSSubClass']]=test[['PoolArea','MSSubClass']].astype(int)

In [None]:
disc_pipe = Pipeline(steps=[
    ('PoolArea_disc',EqualWidthDiscretiser(bins = 2, variables=['PoolArea'])),
    ('MSSubClass_disc',EqualWidthDiscretiser(bins=8,variables=['MSSubClass'])),   
])

disc_pipe.fit(X_train)
X_train = disc_pipe.transform(X_train)
X_test = disc_pipe.transform(X_test)
test = disc_pipe.transform(test)

In [None]:
explore_relation_catTotar(X_train,y_train,['MSSubClass'])

'MSSubClass' is not following any monotonic relationship with the target so converting it back to categorical now and then perform target encoding on the same

In [None]:
X_train[['MSSubClass']]=X_train[['MSSubClass']].astype(str)
X_test[['MSSubClass']]=X_test[['MSSubClass']].astype(str)
test[['MSSubClass']]=test[['MSSubClass']].astype(str)

In [None]:
X_train1 = X_train.copy()
X_test1 = X_test.copy()
test1 = test.copy()

In [None]:
#Separating Numerical and Categorical Columns
col_number = [i for i in X_train1.columns if X_train1[i].dtype!='O']
col_category =[ i for i in X_train1.columns if X_train1[i].dtype=='O']

In [None]:
rare_encoder = RareLabelCategoricalEncoder(tol=0.05,
                                                   n_categories=1,
                                                    variables=col_category)

rare_encoder.fit(X_train1)
X_train1 = rare_encoder.transform(X_train1)
X_test1 = rare_encoder.transform(X_test1)
test1 = rare_encoder.transform(test1)

In [None]:
ordered =[]
one_hot=[]
for i in col_category:
    if len(X_train1[i].unique())>=5:
        ordered.append(i)
    else:
        one_hot.append(i)

**Target Incoding**

In [None]:
target_encoding = OrdinalCategoricalEncoder(encoding_method="ordered",
                                           variables=ordered)

target_encoding.fit(X_train1,y_train)

In [None]:
X_train1 = target_encoding.transform(X_train1)
X_test1 = target_encoding.transform(X_test1)
test1 = target_encoding.transform(test1)

**One-Hot Encoding**

In [None]:
one_hot = OneHotCategoricalEncoder(top_categories=None,
                                  variables=one_hot,
                                  drop_last=True)

one_hot.fit(X_train1)

In [None]:
X_train1 = one_hot.transform(X_train1)
X_test1 = one_hot.transform(X_test1)
test1 = one_hot.transform(test1)

In [None]:
X_train1.shape, X_test1.shape,test1.shape

#  **Variable Transformation**

In [None]:
X_train2 = X_train1.copy()
X_test2 = X_test1.copy()
test2 = test1.copy()

In [None]:
X_train2_scalar_pca = X_train2[scalar_PCA].copy()
X_test2_scalar_pca = X_test1[scalar_PCA].copy()
test2_scalar_pca = test2[scalar_PCA].copy()


X_train2 = X_train2.drop(columns=scalar_PCA,axis=1)
X_test2 = X_test2.drop(columns=scalar_PCA,axis=1)
test2 = test2.drop(columns=scalar_PCA,axis=1)

In [None]:
scalar = StandardScaler()

scalar.fit(X_train2_scalar_pca)
X_train2_scalar_pca = scalar.transform(X_train2_scalar_pca)
X_test2_scalar_pca = scalar.transform(X_test2_scalar_pca)
test2_scalar_pca = scalar.transform(test2_scalar_pca)

In [None]:
pca = PCA(n_components=11)
pca.fit(X_train2_scalar_pca)

In [None]:
X_train2_scalar_pca = pca.transform(X_train2_scalar_pca)
X_test2_scalar_pca = pca.transform(X_test2_scalar_pca)
test2_scalar_pca = pca.transform(test2_scalar_pca)

In [None]:
np.cumsum(pca.explained_variance_ratio_)

In [None]:
pca.explained_variance_ratio_

In [None]:
cols = []
for i in range (0,len(pca.explained_variance_ratio_)):
    var = 'col'+ str(i)
    cols.append(var)

X_train_pca=pd.DataFrame(data=X_train2_scalar_pca,columns=cols)
X_test_pca = pd.DataFrame(data=X_test2_scalar_pca,columns=cols)
test_pca = pd.DataFrame(data=test2_scalar_pca,columns=cols)

In [None]:
X_train_pca.head()

In [None]:
X_train2 = X_train2.reset_index(drop=True)
X_train2.head()

In [None]:
X_train_model = X_train2.merge(X_train_pca,left_index=True, right_index=True)
X_train_model.head()

In [None]:
X_test2 = X_test2.reset_index(drop=True)
X_test_model = X_test2.merge(X_test_pca,left_index=True, right_index=True)
X_test_model.head()

In [None]:
test2 = test2.reset_index(drop=True)
test_model = test2.merge(test_pca,left_index=True, right_index=True)
test_model.head()

In [None]:
X_train_model.shape, X_test_model.shape,test_model.shape

#  Model Building

In [None]:
y_train_log = np.log(y_train)
y_test_log = np.log(y_test)

**Linear Regression**

In [None]:
lr = LinearRegression()
lr.fit(X_train_model,y_train_log)

In [None]:
y_predict_train = lr.predict(X_train_model)
y_predict_test = lr.predict(X_test_model)

In [None]:
print("\nTraining RMSE and R-Square: ")
print("Root Mean Square Error :",mean_squared_error(y_train_log,y_predict_train)**0.5)
print("R-square :",lr.score(X_train_model,y_train_log))

print("\nTesting RMSE and R-Square: ")
print("Root Mean Square Error :",mean_squared_error(y_test_log,y_predict_test)**0.5)
print("R-square :",lr.score(X_test_model,y_test_log))

In [None]:
# get importance
importance = lr.coef_
# summarize feature importance
for i,v in enumerate(importance):
    print('Feature: %0d, Score: %.5f' % (i,v))

**Lasso**

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import Lasso

In [None]:
model = Lasso(max_iter=20000)

#define Model evealuation method
cv = RepeatedKFold(n_splits=10,n_repeats=3, random_state=1)

#define Grid
grid = {'alpha': [0.001,0.01,0.1,0.0012]}

search = GridSearchCV(model, grid, scoring='neg_mean_squared_error', cv=6)

results=search.fit(X_train_model,y_train_log)

print('Config: %s' % results.best_params_)

In [None]:
y_predict_train = search.predict(X_train_model)
y_predict_test = search.predict(X_test_model)

In [None]:
print("\nTraining RMSE and R-Square: ")
print("Root Mean Square Error :",mean_squared_error(y_train_log,y_predict_train)**0.5)


print("\nTesting RMSE and R-Square: ")
print("Root Mean Square Error :",mean_squared_error(y_test_log,y_predict_test)**0.5)

**XGBRegressor**

In [None]:
import xgboost as xgb
from xgboost.sklearn import XGBRegressor

In [None]:
param = {'objective':['reg:squarederror'],
         'learning_rate': [0.1,0.11,0.12,0.13,0.15,0.2],
        'max_depth': [1,2,3,4,5]}

xgb1 = XGBRegressor()

xgb_grid = GridSearchCV(xgb1, param, cv = 3)

xgb_grid.fit(X_train_model, y_train_log)

In [None]:
print(xgb_grid.best_score_)
print(xgb_grid.best_params_)

In [None]:
y_predict_train = xgb_grid.predict(X_train_model)
y_predict_test =  xgb_grid.predict(X_test_model)

In [None]:
print("\nTraining RMSE and R-Square: ")
print("Root Mean Square Error :",mean_squared_error(y_train_log,y_predict_train)**0.5)


print("\nTesting RMSE and R-Square: ")
print("Root Mean Square Error :",mean_squared_error(y_test_log,y_predict_test)**0.5)