In [None]:
#importing libraries

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.metrics import mean_absolute_error as mae, mean_squared_error as mse
from sklearn.linear_model import LinearRegression,RidgeCV,LassoCV,ElasticNetCV
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.preprocessing import PolynomialFeatures

# loading training and testing data
x_train=pd.read_excel('Data.xlsx',sheet_name='Train_Data')
x_test=pd.read_excel('Data.xlsx',sheet_name='Test_Data')

players=x_test['PLAYER']

def preprocessing(x_train, x_test):
  
  # replacing symbol '-' with zero 
  # removing symbol '*' Ex: 100* will be 100*
  def replacing(x):
    if type(x)==str:
        x=x.replace('*','')
    return x    

  x_train=x_train.replace('-',0)
  x_train['HS']=x_train['HS'].apply(lambda x:replacing(x))
  x_test=x_test.replace('-',0)
  x_test['HS']=x_test['HS'].apply(lambda x:replacing(x))

  # forming matrices for having each row as a input vector
  [m,n]=x_train.shape
  x_train_matrix=np.zeros([m,n-1])
  x_test_matrix=np.zeros([m,n-2])
  for i in range(m):
    x_train_matrix[i,:]=list(x_train.iloc[i,1:])
    x_test_matrix[i,:]=list(x_test.iloc[i,1:])


  # separating ground truths and train data    
  labels=x_train_matrix[:,-1] 
  x_train_matrix=x_train_matrix[:,0:-1]

  x_train_matrix=np.array(x_train_matrix).astype('float32')
  x_test_matrix=np.array(x_test_matrix).astype('float32')
  labels=np.array(labels).astype('float32')

  return x_train_matrix, x_test_matrix, labels

#Model 1

def adaboost_method(x_train_matrix, x_test_matrix, labels):
  
  #Splitting the original data into training and validation data
  train_data = x_train_matrix[0:70][:]
  validation_data = x_train_matrix[70:][:]

  #input training and validation data
  x_train_data = train_data
  x_validation_data = validation_data

  #output training and validation data
  y_train_data = labels[:70]
  y_validation_data = labels[70:]

  # Base model is Random Forest Regression Model
  random_regressor = RandomForestRegressor()
  random_regressor.fit(x_train_data,y_train_data)

  # Predicting results
  y_pred_rf = random_regressor.predict(x_validation_data)

  # Random Forest Regression - Model Evaluation
  #print("---- Random Forest Regression - Model Evaluation ----")
  #print("Mean Absolute Error (MAE): {}".format(mae(y_validation_data, y_pred_rf)))
  #print("Mean Squared Error (MSE): {}".format((mse(y_validation_data, y_pred_rf))/100))
  #print("Root Mean Squared Error (RMSE): {}".format(np.sqrt(mse(y_validation_data, y_pred_rf))))
  #print()

  # AdaBoost Model using Linear Regression as the base learner
  from sklearn.ensemble import AdaBoostRegressor
  adb_regressor = AdaBoostRegressor(base_estimator=random_regressor, n_estimators=100)
  adb_regressor.fit(x_train_data, y_train_data)

  # Predicting results
  y_pred_adb = adb_regressor.predict(x_validation_data)

  # AdaBoost Regression - Model Evaluation
  print("---- AdaBoost Regression - Model Evaluation ----")
  print("Mean Absolute Error (MAE): {}".format(mae(y_validation_data, y_pred_adb)))
  print("Mean Squared Error (MSE): {}".format((mse(y_validation_data, y_pred_adb))/100))
  print("Root Mean Squared Error (RMSE): {}".format(np.sqrt(mse(y_validation_data, y_pred_adb))))
  print()

  scores_ada_boost = np.round(adb_regressor.predict(x_test_matrix))

  #verifying the model with respect to the ground truth given in the train data
  #scores_given = labels
  #scores_pred = np.round(adb_regressor.predict(x_train_matrix))

  return scores_ada_boost 

#Model 2

def poly_regression_method(x_train_matrix, x_test_matrix, labels):

  # Regression model
  x=x_train_matrix
    
  # Poly-Linear-regression
  poly=PolynomialFeatures(degree=3)
  x_poly=poly.fit_transform(x)
  model_2=LinearRegression()
  model_2.fit(x_poly,labels)

  scores_poly=model_2.predict(poly.fit_transform(x_test_matrix))
  
  # checking predictions and ground truths
  #scores_given = labels
  #scores_pred=model_2.predict(x_poly)
  print('---- Poly Linear Regression - Model Evaluation ----')
  print('\nMSE value in PolyLinReg:',mean_squared_error(labels,scores_poly))
  print('R2 score :',r2_score(labels,scores_poly))

  return scores_poly

x_train_matrix, x_test_matrix, labels = preprocessing(x_train, x_test)

scores_m1 = adaboost_method(x_train_matrix, x_test_matrix, labels)
scores_m2 = poly_regression_method(x_train_matrix, x_test_matrix, labels)

scores_m2[scores_m2<0] = 0

#Combining two models outputs
final_scores = np.round((0.85*scores_m1) + (0.15*scores_m2))

# writing results from individual models into an excel sheet 
#result_sht_1=pd.DataFrame({'PLAYER':players,'2020_Runs':scores_m1})
#result_sht_1.to_excel('Result_from_adaboost_Sheet.xlsx',index=False)

#result_sht_2=pd.DataFrame({'PLAYER':players,'2020_Runs':np.round(scores_m2)})
#result_sht_2.to_excel('Result_from_poly_regression_Sheet.xlsx',index=False)

result_sht=pd.DataFrame({'PLAYER':players,'2020_Runs':np.round(final_scores)})
result_sht.to_excel('Result_Sheet.xlsx',index=False)