# 1) OFFICIAL VERSION USED for the Best Score

In [None]:
#####1) OFFICIAL VERSION USED for the Best Score######
import numpy as np
import pandas as pd

#Libraries for feature selection
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2


#Path
path_train = "/kaggle/input/statistical-learning-sapienza-spring-2020/train/train.csv"
path_test = "/kaggle/input/statistical-learning-sapienza-spring-2020/test/test.csv"
train = pd.read_csv(path_train)
test  = pd.read_csv(path_test)

#Save the 'Id' column
train_ID = train['id']
test_ID = test['id']

#Now drop the  'Id' colum since it's unnecessary for the prediction process.
train.drop("id", axis = 1, inplace = True)
test.drop("id", axis = 1, inplace = True)

# Reshape train and test
ntrain = train.shape[0]
ntest = test.shape[0]
y_train = train.tempo.values

all_data = pd.concat((train, test)).reset_index(drop=True)
all_data.drop(['tempo'], axis=1, inplace=True)
train.drop(['tempo'], axis=1, inplace=True)

## Features Selection

In [None]:
### FEAUTURES SELECTION ### (Which actually improved our score)
new = []
for i in y_train: 
    i = int(i)
    new += [i]
X = np.array(train)
y = np.array(new)

scaler = MinMaxScaler()
scaler = scaler.fit(X)
X = scaler.transform(X)
X = pd.DataFrame(data=X[0:,0:],
            index=[i for i in range(X.shape[0])],
            columns=['f'+str(i) for i in range(X.shape[1])])

#Using SelectKBest for feature selection
bestfeatures = SelectKBest(score_func=chi2, k=40000)
fit = bestfeatures.fit(X,y)


dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X.columns)
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']   

#Taking the top 209 important features
best_columns = (featureScores.nlargest(209,'Score')) 
candidate_columns = list(best_columns.index.values)

# Let's cut the dataframe
colname = all_data.columns[candidate_columns]
df1 = all_data[colname]


# We only get the cut version 
train_cut = df1[:ntrain]
test_cut = df1[ntrain:]

## XGBRegression with the best score

In [None]:
### XGBRegression ###
import xgboost as xgb
from xgboost.sklearn import XGBRegressor
from sklearn.model_selection import GridSearchCV


# Various hyper-parameters to tune
xgb1 = XGBRegressor()
parameters = {'objective':['reg:squarederror'],
              'learning_rate': [0.01, 0.03, 0.05 ], #so called `eta` value
              'max_depth': [7, 9],
              'min_child_weight': [ 3 ,5],
              'gamma': [ 0.2 ,0.3,  0.4 ],
              'colsample_bytree': [ 0.8, 0.9, 1] ,
              'n_estimators': [500]}

# GridSearch for hyperparameters tuning
xgb_grid = GridSearchCV(xgb1,
                        parameters,
                        cv = 2,
                        n_jobs = -1,
                        verbose=True)

xgb_grid.fit(train_cut, y_train)

print(xgb_grid.best_score_)
print(xgb_grid.best_params_)


#Best Hyperparameters
my_model = XGBRegressor(colsample_bytree= 0.9, 
                        learning_rate =  0.03, 
                        gamma = 0.2,
                        max_depth =  7, 
                        n_estimators = 500,
                        min_child_weight = 3
                        )

# Add silent=True to avoid printing out updates with each cycle
my_model.fit(train_cut, y_train)


# make predictions
predictions = my_model.predict(test_cut)

#Storing in the csv file
sub = pd.DataFrame()
sub['id'] = test_ID
sub['target'] = predictions
sub.to_csv('submission4.csv',index=False)

## Golden Song Contest

In [None]:
#####1.2) Golden Song Part#####
GS_df = pd.read_csv('Desktop/StatLearning/dataset/test_golden_song/test_golden_song.csv')

##Saving the ID and dropping it
GS_ID = [0]
GS_df.drop("id", axis = 1, inplace = True)

#Taking only the relevant columns
test_GS = GS_df[colname]

predictions_GS = my_model.predict(test_GS)

#Taking the average between the predictions
predictions_GS_final = [prediction_GS.mean()]

#Storing in the csv file
sub_GS = pd.DataFrame()
sub_GS['id'] = GS_ID
sub_GS['target'] = predictions_GS_final
sub_GS.to_csv('goldensong_group13.csv',index=False)

# 2) Previous XGBRegression with lower score

In [None]:
### 2) XGBRegression Previous version with old hyperparameters###
import xgboost as xgb
from xgboost.sklearn import XGBRegressor
from sklearn.model_selection import GridSearchCV

# Various hyper-parameters to tune
xgb1 = XGBRegressor()
parameters = {'nthread':[4], #when use hyperthread, xgboost may become slower
              'objective':['reg:linear'],
              'learning_rate': [0.01, .03, 0.05, .07], #so called `eta` value
              'max_depth': [5, 6, 7],
              'min_child_weight': [4],
              'subsample': [0.7],
              'colsample_bytree': [0.7],
              'n_estimators': [100, 500, 1000]}

xgb_grid = GridSearchCV(xgb1,
                        parameters,
                        cv = 2,
                        n_jobs = 5,
                        verbose=True)

xgb_grid.fit(train_cut, y_train)

print(xgb_grid.best_score_)
print(xgb_grid.best_params_)


#Best old hyperparameters
my_model = XGBRegressor(colsample_bytree= 0.7, 
                        learning_rate =  0.03, 
                        max_depth =  6, 
                        n_estimators = 1000
                        )

# Add silent=True to avoid printing out updates with each cycle
my_model.fit(train_cut, y_train, verbose=False)


# make predictions
predictions = my_model.predict(test_cut)


sub = pd.DataFrame()
sub['id'] = test_ID
sub['target'] = predictions
sub.to_csv('submission3.csv',index=False)

# 3) From where we started

In [None]:
#### 3) We actually started from here, searching for the best model among the following ones

from sklearn.model_selection import train_test_split
import math
import sklearn.metrics as sklm
from sklearn.preprocessing import RobustScaler

#Splitting the data in order to have the possibility to compute the RMSE
x_train, x_test, y_tra, y_test = train_test_split(train_cut, y_train,test_size = .3, random_state=0)

scaler= RobustScaler()
# transform "x_train"
x_train = scaler.fit_transform(x_train)
# transform "x_test"
x_test = scaler.transform(x_test)
#Transform the test set
X_test= scaler.transform(test_cut)

# Ridge
import sklearn.model_selection as GridSearchCV
from sklearn.linear_model import Ridge
import sklearn.model_selection as ms

#Using Ridge model and searching best alpha hyperparameter
ridge=Ridge()
parameters= {'alpha':[x for x in range(1,101)]}

ridge_reg=ms.GridSearchCV(ridge, param_grid=parameters, scoring='neg_mean_squared_error', cv=15)
ridge_reg.fit(x_train,y_tra)
print("The best value of Alpha is: ",ridge_reg.best_params_)
print("The best score achieved with Alpha=11 is: ",math.sqrt(-ridge_reg.best_score_))
ridge_pred=math.sqrt(-ridge_reg.best_score_)

#Using Ridge with best alpha
ridge_mod=Ridge(alpha=100)
ridge_mod.fit(x_train,y_tra)
y_pred_train=ridge_mod.predict(x_train)
y_pred_test=ridge_mod.predict(x_test)

#Computing RMSE for Ridge
print('Root Mean Square Error train = ' + str(math.sqrt(sklm.mean_squared_error(y_tra, y_pred_train))))
print('Root Mean Square Error test = ' + str(math.sqrt(sklm.mean_squared_error(y_test, y_pred_test))))   


# Lasso
from sklearn.linear_model import Lasso


lasso_mod=Lasso(alpha=100)
lasso_mod.fit(x_train,y_tra)
y_lasso_train=lasso_mod.predict(x_train)
y_lasso_test=lasso_mod.predict(x_test)

#Computing RMSE for Lasso
print('Root Mean Square Error train = ' + str(math.sqrt(sklm.mean_squared_error(y_tra, y_lasso_train))))
print('Root Mean Square Error test = ' + str(math.sqrt(sklm.mean_squared_error(y_test, y_lasso_test))))


# ENET
from sklearn.linear_model import ElasticNetCV

#Tuning Hyperparamters
alphas = [10,1,0.1,0.01,0.001,0.002,0.003,0.004,0.005,0.00054255]
l1ratio = [0.1, 0.3,0.5, 0.9, 0.95, 0.99, 1]

elastic_cv = ElasticNetCV(cv=5, max_iter=1e7, alphas=alphas,  l1_ratio=l1ratio)

elasticmod = elastic_cv.fit(x_train, y_tra.ravel())
ela_pred=elasticmod.predict(x_test)
print('Root Mean Square Error test = ' + str(math.sqrt(sklm.mean_squared_error(y_test, ela_pred))))
print(elastic_cv.alpha_)
print(elastic_cv.l1_ratio_)

#Using the best hyperparamters
elastic_cv = ElasticNetCV(cv=5, max_iter=1e7, alphas=10,  l1_ratio=0.1)
elasticmod = elastic_cv.fit(x_train, y_tra.ravel())
ela_pred=elasticmod.predict(x_test)

#RMSE for ENET
print('Root Mean Square Error test = ' + str(math.sqrt(sklm.mean_squared_error(y_test, ela_pred))))
print(elastic_cv.alpha_)

# XGBRegressor
xgb= XGBRegressor(colsample_bytree= 0.7, 
                        learning_rate =  0.03, 
                        max_depth =  6, 
                        n_estimators = 1000
                        )
xgmod=xgb.fit(x_train,y_tra)
xg_pred=xgmod.predict(x_test)

#RMSE for XGBRegressor
print('Root Mean Square Error test = ' + str(math.sqrt(sklm.mean_squared_error(y_test, xg_pred))))

#We also tried to mix them
from sklearn.ensemble import VotingRegressor

vote_mod = VotingRegressor([('Ridge', ridge_mod), ('Lasso', lasso_mod), ('Elastic', elastic_cv), 
                            ('XGBRegressor', xgb)])
vote= vote_mod.fit(x_train, y_tra.ravel())
vote_pred=vote.predict(x_test)

#RMSE for the mix
print('Root Mean Square Error test = ' + str(math.sqrt(sklm.mean_squared_error(y_test, vote_pred))))