# Linear Regression

In [None]:
# Function to find out mape
def mape(y_actual, y_predicted):
    s = 0
    count = 0
    for i in range(0,len(y_actual)):
        if y_actual[i] != 0:
            s = s + abs(y_actual[i] - y_predicted[i])/y_actual[i]
            count = count + 1
    s = s / count
    return s

In [None]:
from sklearn.linear_model import LinearRegression

LR = LinearRegression().fit(X_train, y_train)

y_pred_train_LR = LR.predict(X_train)
y_pred_test_LR = LR.predict(X_test)

print("R squared Score on Train data is {:.3f}".format(r2_score(y_train, y_pred_train_LR)))
print("R squared Score on Test data is {:.3f}".format(r2_score(y_test, y_pred_test_LR)))

LR_mape = mape(y_test, y_pred_test_LR)
LR_mae = mean_absolute_error(y_test, y_pred_test_LR)
LR_mse = mean_squared_error(y_test, y_pred_test_LR)
LR_rmse = LR_mse ** 0.5
LR_metrics = [LR_mape, LR_mae, LR_mse, LR_rmse]

# Decision Trees

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
table = []

for i in range(2,20):
    DR = DecisionTreeRegressor(criterion = "mse", max_depth = i, random_state=12)
    DR.fit(X_train, y_train)
    table.append([i,mape(y_test, DR.predict(X_test))])
    
d = pd.DataFrame(table, columns = ['max_depth','mape'])

depth = int(d[d.mape == d.mape.min()]['max_depth'])

DR = DecisionTreeRegressor(criterion = 'mse', max_depth = depth, random_state = 12).fit(X_train, y_train)
y_pred_DT = DR.predict(X_test)
DT_mape = mape(y_test, y_pred_DT)
DT_mae = mean_absolute_error(y_test, y_pred_DT)
DT_mse = mean_squared_error(y_test, y_pred_DT)
DT_rmse = DT_mse ** 0.5
DT_metrics = [DT_mape, DT_mae, DT_mse, DT_rmse]

# Gradient Boosted Regression

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
XGB_Perf = []
for i in range(2,20):
    gbr1 = GradientBoostingRegressor(n_estimators = i, random_state = 123)
    gbr1.fit(X_train, y_train)
    gbmape = mape(y_test, gbr1.predict(X_test))
    XGB_Perf.append([i, gbmape])
    
x = pd.DataFrame(XGB_Perf, columns = ['n_estimators', 'MAPE'])
x.head()

best_estimators = int(x[x.MAPE == x.MAPE.min()]['n_estimators'])
gbr1 = GradientBoostingRegressor(n_estimators = best_estimators, random_state = 123).fit(X_train, y_train)
y_pred_XGB = gbr1.predict(X_test)
XGB_mape = mape(y_test, y_pred_XGB)
XGB_mae = mean_absolute_error(y_test, y_pred_XGB)
XGB_mse = mean_squared_error(y_test, y_pred_XGB)
XGB_rmse = XGB_mse ** 0.5
XGB_metrics = [XGB_mape, XGB_mae, XGB_mse, XGB_rmse]

# Random Forest Regression

In [None]:
from sklearn.ensemble import RandomForestRegressor
RFR = RandomForestRegressor(n_estimators = 25, criterion = 'mse', max_depth = 30)
RFR.fit(X_train, y_train)

y_pred_RFR = RFR.predict(X_test)
RFR_mape = mape(y_test, y_pred_RFR)
RFR_mae = mean_absolute_error(y_test, y_pred_RFR)
RFR_mse = mean_squared_error(y_test, y_pred_RFR)
RFR_rmse = RFR_mse ** 0.5
RFR_metrics = [RFR_mape, RFR_mae, RFR_mse, RFR_rmse]
RFR_metrics

# Randomized Search CV with Elastic Net

In [None]:
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor

params_grid = {"l1_ratio":[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
              "alpha":[0.01,0.1,1,10,20,30,100]}

EN = ElasticNet(random_state = 0)

RSCV = RandomizedSearchCV(estimator = EN, param_distributions=params_grid, cv = 3, n_iter = 10, refit = True, random_state=0)
RSCV.fit(X_train, y_train)

y_pred_EN = RSCV.predict(X_test)
RSCV_mape = mape(y_test, y_pred_EN)
RSCV_mae = mean_absolute_error(y_test, y_pred_EN)
RSCV_mse = mean_squared_error(y_test, y_pred_EN)
RSCV_rmse = RSCV_mse ** 0.5
RSCV_metrics = [RSCV_mape, RSCV_mae, RSCV_mse, RSCV_rmse]
print(RSCV.best_estimator_)
RSCV_metrics

# AdaBoost Regressor

In [None]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
DR = DecisionTreeRegressor(criterion = 'mse', max_depth = 20, random_state = 12).fit(X_train, y_train)

ADBReg = AdaBoostRegressor(base_estimator = DR, n_estimators=20, learning_rate=0.1)

y_pred_ADB = RSCV.predict(X_test)
ADB_mape = mape(y_test, y_pred_ADB)
ADB_mae = mean_absolute_error(y_test, y_pred_ADB)
ADB_mse = mean_squared_error(y_test, y_pred_ADB)
ADB_rmse = ADB_mse ** 0.5
ADB_metrics = [ADB_mape, ADB_mae, ADB_mse, ADB_rmse]
ADB_metrics

# Stacking

In [None]:
from mlxtend.regressor import StackingRegressor

model1 = LinearRegression()
model2 = DecisionTreeRegressor(criterion = 'mse', max_depth = 13, random_state = 12)
model3 = RandomForestRegressor(n_estimators=100)
model4 = ADBReg = AdaBoostRegressor(base_estimator = DR, n_estimators=20, learning_rate=0.1)

SR = StackingRegressor(regressors=[model1, model2, model3, model4], meta_regressor= model1)
SR.fit(X_train, y_train)

y_pred_Stack = SR.predict(X_test)

Stack_mape = mape(y_test, y_pred_Stack)
Stack_mae = mean_absolute_error(y_test, y_pred_Stack)
Stack_mse = mean_squared_error(y_test, y_pred_Stack)
Stack_rmse = Stack_mse ** 0.5
Stack_metrics = [Stack_mape, Stack_mae, Stack_mse, Stack_rmse]
Stack_metrics