# XG Boost

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('Train_data.csv')

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV, validation_curve
from sklearn.preprocessing import StandardScaler, QuantileTransformer, MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

In [5]:
col_transform = [col for col in df.columns if col not in ['Target']]

In [6]:
X = df[col_transform]
y = df['Target']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [8]:
transformer = ColumnTransformer(transformers = [('scaling',RobustScaler(), col_transform)])

In [9]:
from xgboost import XGBClassifier

In [10]:
xgb = XGBClassifier(n_jobs=-1)

In [11]:
pipeline_xgb = Pipeline(steps = [('transformer',transformer),('xgb',XGBClassifier())])

In [12]:
cv_score_xgb = cross_val_score(pipeline_xgb,X_train,y_train,cv =5)

In [13]:
cv_score_xgb.mean()

0.6892811178525464

In [14]:
params_xgb = {'xgb__n_estimators':[100, 300, 500, 750, 800, 1200] ,
              'xgb__max_depth': [5, 10, 15, 20, 25,30],'xgb__learning_rate':[0.01, 0.1, 1, 0.001, 0.5]}

In [15]:
grid_xgb = GridSearchCV(pipeline_xgb,param_grid=params_xgb,cv=5,verbose=1,n_jobs=4)

In [16]:
grid_xgb.fit(X_train,y_train)

Fitting 5 folds for each of 180 candidates, totalling 900 fits


GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('transformer',
                                        ColumnTransformer(transformers=[('scaling',
                                                                         RobustScaler(),
                                                                         ['GP',
                                                                          'MIN',
                                                                          'PTS',
                                                                          'FGM',
                                                                          'FGA',
                                                                          'FG%',
                                                                          '3P '
                                                                          'Made',
                                                                          '3PA',
                         

In [17]:
grid_xgb.best_params_

{'xgb__learning_rate': 0.1, 'xgb__max_depth': 5, 'xgb__n_estimators': 1200}

In [27]:
pipeline_xgb_best = Pipeline(steps = [('transformer',transformer),('xgb',XGBClassifier(n_estimators = 1200,max_depth = 5, learning_rate = 0.1))])

In [28]:
cv_score_xgb_best = cross_val_score(pipeline_xgb_best,X,y,cv =5)

In [29]:
cv_score_xgb_best.mean()

0.7193582887700535

In [30]:
X_train = X
y_train = y

In [31]:
pipeline_xgb_best.fit(X_train,y_train)

Pipeline(steps=[('transformer',
                 ColumnTransformer(transformers=[('scaling', RobustScaler(),
                                                  ['GP', 'MIN', 'PTS', 'FGM',
                                                   'FGA', 'FG%', '3P Made',
                                                   '3PA', '3P%', 'FTM', 'FTA',
                                                   'FT%', 'OREB', 'DREB', 'REB',
                                                   'AST', 'STL', 'BLK',
                                                   'TOV'])])),
                ('xgb',
                 XGBClassifier(base_score=None, booster=None, callbacks=None,
                               colsample_bylevel=None, colsample_bynode=None,
                               colsample_bytree=None,
                               early...
                               feature_types=None, gamma=None, gpu_id=None,
                               grow_policy=None, importance_type=None,
                     

In [32]:
predictions_xgb = pipeline_xgb_best.predict(X_test)

In [33]:
predictions = pd.DataFrame(predictions_xgb, columns = ['prediction'])

In [34]:
from sklearn.metrics import classification_report

In [35]:
print(classification_report(y_test, predictions_xgb))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       180
           1       0.99      0.99      0.99       184

    accuracy                           0.99       364
   macro avg       0.99      0.99      0.99       364
weighted avg       0.99      0.99      0.99       364



# Random Forest

In [36]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split

In [37]:
df = pd.read_csv('Train_data.csv')

In [38]:
from sklearn.compose import ColumnTransformer
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV, validation_curve
from sklearn.preprocessing import StandardScaler, QuantileTransformer, MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

In [39]:
col_transform = [col for col in df.columns if col not in ['Target']]

In [40]:
X = df[col_transform]
y = df['Target']

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [42]:
transformer = ColumnTransformer(transformers = [('scaling',RobustScaler(), col_transform)])

In [43]:
rf = RandomForestClassifier(n_jobs = -1)

In [46]:
pipeline_rf = Pipeline(steps = [('transformer',transformer),('rf',RandomForestClassifier())])

In [47]:
cv_score_rf = cross_val_score(pipeline_rf,X_train,y_train,cv =5)

In [48]:
cv_score_rf.mean()

0.7015444015444017

In [53]:
params_rf = {'rf__n_estimators':[100, 300, 500, 750, 800, 1200] ,
              'rf__max_depth': [5, 10, 15, 20, 25,30]}

In [54]:
grid_rf = GridSearchCV(pipeline_rf,param_grid=params_rf,cv=5,verbose=3,n_jobs=-1)

In [55]:
grid_rf.fit(X_train,y_train)

Fitting 5 folds for each of 36 candidates, totalling 180 fits


GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('transformer',
                                        ColumnTransformer(transformers=[('scaling',
                                                                         RobustScaler(),
                                                                         ['GP',
                                                                          'MIN',
                                                                          'PTS',
                                                                          'FGM',
                                                                          'FGA',
                                                                          'FG%',
                                                                          '3P '
                                                                          'Made',
                                                                          '3PA',
                         

In [59]:
grid_rf.best_params_

{'rf__max_depth': 20, 'rf__n_estimators': 300}

In [60]:
pipeline_rf_best = Pipeline(steps = [('transformer',transformer),('rf',RandomForestClassifier(n_estimators = 300,max_depth = 20))])

In [61]:
cv_score_rf_best = cross_val_score(pipeline_rf_best,X,y,cv =5)

In [62]:
cv_score_rf_best.mean()

0.7374989716166188

In [63]:
pipeline_rf_best.fit(X_train,y_train)

Pipeline(steps=[('transformer',
                 ColumnTransformer(transformers=[('scaling', RobustScaler(),
                                                  ['GP', 'MIN', 'PTS', 'FGM',
                                                   'FGA', 'FG%', '3P Made',
                                                   '3PA', '3P%', 'FTM', 'FTA',
                                                   'FT%', 'OREB', 'DREB', 'REB',
                                                   'AST', 'STL', 'BLK',
                                                   'TOV'])])),
                ('rf', RandomForestClassifier(max_depth=20, n_estimators=300))])

In [64]:
predictions_rf = pipeline_rf_best.predict(X_test)

In [65]:
predictions = pd.DataFrame(predictions_rf, columns = ['prediction'])

In [66]:
print(classification_report(y_test, predictions_rf))

              precision    recall  f1-score   support

           0       0.75      0.75      0.75       180
           1       0.75      0.75      0.75       184

    accuracy                           0.75       364
   macro avg       0.75      0.75      0.75       364
weighted avg       0.75      0.75      0.75       364



# Decision Tree

In [67]:
from sklearn.tree import DecisionTreeClassifier

In [68]:
df = pd.read_csv('Train_data.csv')

In [69]:
col_transform = [col for col in df.columns if col not in ['Target']]

In [70]:
X = df[col_transform]
y = df['Target']

In [71]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [72]:
transformer = ColumnTransformer(transformers = [('scaling',RobustScaler(), col_transform)])

In [74]:
dt = DecisionTreeClassifier()

In [75]:
pipeline_dt = Pipeline(steps = [('transformer',transformer),('dt',DecisionTreeClassifier())])

In [76]:
cv_score_dt = cross_val_score(pipeline_dt,X_train,y_train,cv =5)

In [77]:
cv_score_dt.mean()

0.6119231476374333

In [83]:
params_dt = {'dt__criterion':['gini','entropy'] ,'dt__max_depth': [5, 10, 15, 20, 25,30]}

In [84]:
grid_dt = GridSearchCV(pipeline_dt,param_grid=params_dt,cv=5,verbose=3,n_jobs=-1)

In [85]:
grid_dt.fit(X_train,y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('transformer',
                                        ColumnTransformer(transformers=[('scaling',
                                                                         RobustScaler(),
                                                                         ['GP',
                                                                          'MIN',
                                                                          'PTS',
                                                                          'FGM',
                                                                          'FGA',
                                                                          'FG%',
                                                                          '3P '
                                                                          'Made',
                                                                          '3PA',
                         

In [90]:
grid_dt.best_params_

{'dt__criterion': 'entropy', 'dt__max_depth': 25}

In [94]:
pipeline_dt_best = Pipeline(steps = [('transformer',transformer),('dt',DecisionTreeClassifier(criterion = 'entropy', max_depth= 25))])

In [95]:
cv_score_dt_best = cross_val_score(pipeline_dt_best,X,y,cv =5)

In [96]:
cv_score_dt_best.mean()

0.6594241053064582

In [97]:
pipeline_dt_best.fit(X_train,y_train)

Pipeline(steps=[('transformer',
                 ColumnTransformer(transformers=[('scaling', RobustScaler(),
                                                  ['GP', 'MIN', 'PTS', 'FGM',
                                                   'FGA', 'FG%', '3P Made',
                                                   '3PA', '3P%', 'FTM', 'FTA',
                                                   'FT%', 'OREB', 'DREB', 'REB',
                                                   'AST', 'STL', 'BLK',
                                                   'TOV'])])),
                ('dt',
                 DecisionTreeClassifier(criterion='entropy', max_depth=25))])

In [98]:
predictions_dt = pipeline_dt_best.predict(X_test)

In [99]:
predictions = pd.DataFrame(predictions_dt, columns = ['prediction'])

In [100]:
print(classification_report(y_test, predictions_dt))

              precision    recall  f1-score   support

           0       0.64      0.69      0.66       180
           1       0.67      0.61      0.64       184

    accuracy                           0.65       364
   macro avg       0.66      0.65      0.65       364
weighted avg       0.66      0.65      0.65       364



# Support Vector Classifier

In [101]:
from sklearn.svm import SVC

In [102]:
df = pd.read_csv('Train_data.csv')

In [103]:
col_transform = [col for col in df.columns if col not in ['Target']]

In [104]:
X = df[col_transform]
y = df['Target']

In [105]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [106]:
transformer = ColumnTransformer(transformers = [('scaling',RobustScaler(), col_transform)])

In [107]:
svc = SVC()

In [108]:
pipeline_svc = Pipeline(steps = [('transformer',transformer),('svc',SVC())])

In [109]:
cv_score_svc = cross_val_score(pipeline_svc,X_train,y_train,cv =5)

In [110]:
cv_score_svc.mean()

0.6879757308328737

In [116]:
params_svc = {'svc__C':[0.1, 0.5, 1, 1.5] ,'svc__kernel' : ['linear', 'poly', 'rbf']}

In [117]:
grid_svc = GridSearchCV(pipeline_svc,param_grid=params_svc,cv=5,verbose=3,n_jobs=-1)

In [118]:
grid_svc.fit(X_train,y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('transformer',
                                        ColumnTransformer(transformers=[('scaling',
                                                                         RobustScaler(),
                                                                         ['GP',
                                                                          'MIN',
                                                                          'PTS',
                                                                          'FGM',
                                                                          'FGA',
                                                                          'FG%',
                                                                          '3P '
                                                                          'Made',
                                                                          '3PA',
                         

In [119]:
grid_svc.best_params_

{'svc__C': 1.5, 'svc__kernel': 'linear'}

In [120]:
pipeline_svc_best = Pipeline(steps = [('transformer',transformer),('svc',SVC(C = 1.5, kernel= 'linear'))])

In [121]:
cv_score_svc_best = cross_val_score(pipeline_svc_best,X,y,cv =5)

In [122]:
cv_score_svc_best.mean()

0.691201151789387

In [123]:
pipeline_svc_best.fit(X_train,y_train)

Pipeline(steps=[('transformer',
                 ColumnTransformer(transformers=[('scaling', RobustScaler(),
                                                  ['GP', 'MIN', 'PTS', 'FGM',
                                                   'FGA', 'FG%', '3P Made',
                                                   '3PA', '3P%', 'FTM', 'FTA',
                                                   'FT%', 'OREB', 'DREB', 'REB',
                                                   'AST', 'STL', 'BLK',
                                                   'TOV'])])),
                ('svc', SVC(C=1.5, kernel='linear'))])

In [124]:
predictions_svc = pipeline_svc_best.predict(X_test)

In [125]:
predictions = pd.DataFrame(predictions_svc, columns = ['prediction'])

In [126]:
print(classification_report(y_test, predictions_svc))

              precision    recall  f1-score   support

           0       0.69      0.70      0.69       180
           1       0.70      0.69      0.70       184

    accuracy                           0.70       364
   macro avg       0.70      0.70      0.70       364
weighted avg       0.70      0.70      0.70       364

