In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import sys
sys.path.append('./modules')

In [3]:
from Classicator import Classicator

In [4]:
import pandas as pd
import numpy as np


In [5]:
from xgboost.sklearn import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV


In [6]:
PATH_DATA = './data/'
PATH_SUBMISSION = './submissions/'
PATH_MODEL = './models/'

In [7]:
test_df = pd.read_csv(PATH_DATA + 'test.csv')

In [8]:
gender_submission = pd.read_csv(PATH_DATA + 'gender_submission.csv')

In [9]:
def make_submission(y_pred, gender_submission, test_df, name):
    gender_submission.Survived = y_pred
    gender_submission.PassengerId = test_df.PassengerId
    gender_submission.to_csv(PATH_SUBMISSION + name, index=False)

# KNN blanks

In [10]:
train_knn_df = pd.read_csv(PATH_DATA  +'train_knn_tree.csv')
test_knn_df = pd.read_csv(PATH_DATA  +'test_knn_tree.csv')
target_name = 'Survived'

In [11]:
classificator_knn_blanks = Classicator(train_knn_df, test_knn_df, target_name)

## Simple Classificator

In [12]:
classificator_knn_blanks.set_class_classicator(XGBClassifier)
classificator_knn_blanks.make_simple_classificator()

XGBClassifier
              precision    recall  f1-score   support

           0       0.85      0.84      0.85       134
           1       0.77      0.78      0.77        89

    accuracy                           0.82       223
   macro avg       0.81      0.81      0.81       223
weighted avg       0.82      0.82      0.82       223



## Search best classificator

In [13]:
parameters = {
    'n_estimators':np.arange(10,100,10),
    'booster':['gbtree',],
    'objective':['binary:logistic',],
    "eval_metric":["error",],
    'eta':np.arange(0.1,0.6,0.1),
    'gamma': [0,],
    'max_depth': np.arange(3, 10,1),
    'min_child_weight' : np.arange(1,10,1),
    'max_delta_step':[0,],
    'subsample': [1,],
    'colsample_bytree':[1,],
    'silent':[1,],
    'seed':[42,],
    'base_score':[0.5,]
}


parametrs_class_searher = {
    'class_searcher': GridSearchCV,
    'cv':5,
    'scoring':'accuracy',
    'n_jobs': 5
}
classificator_knn_blanks.set_parametr_search(parameters)
classificator_knn_blanks.set_class_searcher(**parametrs_class_searher)

In [14]:
classificator_knn_blanks.searh_best_classificator()

Best estimator

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eta=0.1,
              eval_metric='error', gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints=None, learning_rate=0.100000001,
              max_delta_step=0, max_depth=4, min_child_weight=8, missing=nan,
              monotone_constraints=None, n_estimators=60, n_jobs=0,
              num_parallel_tree=1, objective='binary:logistic', random_state=42,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=42, silent=1,
              subsample=1, tree_method=None, validate_parameters=False, ...)

Best parametrs

{'base_score': 0.5, 'booster': 'gbtree', 'colsample_bytree': 1, 'eta': 0.1, 'eval_metric': 'error', 'gamma': 0, 'max_delta_step': 0, 'max_depth': 4, 'min_child_weight': 8, 'n_estimators': 60, 'objective': 'binary:logistic', 'seed': 42, 'silent': 1, 'subsample': 1}

XGBClassifier
              precisi

## Best classificator

In [15]:
classificator_knn_blanks.make_best_classificator()

XGBClassifier
              precision    recall  f1-score   support

           0       0.84      0.90      0.87       134
           1       0.84      0.74      0.79        89

    accuracy                           0.84       223
   macro avg       0.84      0.82      0.83       223
weighted avg       0.84      0.84      0.84       223



## Final classificator

In [16]:
classificator_knn_blanks.make_final_classificator()

XGBClassifier
              precision    recall  f1-score   support

           0       0.89      0.95      0.92       134
           1       0.91      0.83      0.87        89

    accuracy                           0.90       223
   macro avg       0.90      0.89      0.90       223
weighted avg       0.90      0.90      0.90       223



## Submission

In [17]:
y_pred = classificator_knn_blanks.make_predict_with_final_classificator()
make_submission(y_pred, gender_submission, test_df, 'submission_xgb_knn.csv')

## Save model

In [18]:
classificator_knn_blanks.save_final_model(PATH_MODEL, 'xgb_knn.joblib')

# IMP Blanks

In [19]:
train_imp_df = pd.read_csv(PATH_DATA  +'train_imp_tree.csv')
test_imp_df = pd.read_csv(PATH_DATA  +'test_imp_tree.csv')
target_name = 'Survived'

In [20]:
classificator_imp_blanks = Classicator(train_imp_df, test_imp_df, target_name)

## Simple Classificator

In [21]:
classificator_imp_blanks.set_class_classicator(XGBClassifier)
classificator_imp_blanks.make_simple_classificator()

XGBClassifier
              precision    recall  f1-score   support

           0       0.85      0.82      0.84       134
           1       0.74      0.79      0.77        89

    accuracy                           0.81       223
   macro avg       0.80      0.80      0.80       223
weighted avg       0.81      0.81      0.81       223



## Search best classificator

In [22]:
parameters = {
    'n_estimators':np.arange(10,100,10),
    'booster':['gbtree',],
    'objective':['binary:logistic',],
    "eval_metric":["error",],
    'eta':np.arange(0.1,0.6,0.1),
    'gamma': [0,],
    'max_depth': np.arange(3, 10,1),
    'min_child_weight' : np.arange(1,10,1),
    'max_delta_step':[0,],
    'subsample': [1,],
    'colsample_bytree':[1,],
    'silent':[1,],
    'seed':[42,],
    'base_score':[0.5,]
}

parametrs_class_searher = {
    'class_searcher': GridSearchCV,
    'cv':5,
    'scoring':'accuracy',
    'n_jobs': 5
}
classificator_imp_blanks.set_parametr_search(parameters)
classificator_imp_blanks.set_class_searcher(**parametrs_class_searher)

In [23]:
classificator_imp_blanks.searh_best_classificator()

Best estimator

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eta=0.30000000000000004,
              eval_metric='error', gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints=None, learning_rate=0.300000012,
              max_delta_step=0, max_depth=3, min_child_weight=6, missing=nan,
              monotone_constraints=None, n_estimators=80, n_jobs=0,
              num_parallel_tree=1, objective='binary:logistic', random_state=42,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=42, silent=1,
              subsample=1, tree_method=None, validate_parameters=False, ...)

Best parametrs

{'base_score': 0.5, 'booster': 'gbtree', 'colsample_bytree': 1, 'eta': 0.30000000000000004, 'eval_metric': 'error', 'gamma': 0, 'max_delta_step': 0, 'max_depth': 3, 'min_child_weight': 6, 'n_estimators': 80, 'objective': 'binary:logistic', 'seed': 42, 'silent': 1, 'subsample': 1}

XGB

## Best classificator

In [24]:
classificator_imp_blanks.make_best_classificator()

XGBClassifier
              precision    recall  f1-score   support

           0       0.87      0.88      0.87       134
           1       0.82      0.80      0.81        89

    accuracy                           0.85       223
   macro avg       0.84      0.84      0.84       223
weighted avg       0.85      0.85      0.85       223



## Final classificator

In [25]:
classificator_imp_blanks.make_final_classificator()

XGBClassifier
              precision    recall  f1-score   support

           0       0.93      0.94      0.93       134
           1       0.91      0.89      0.90        89

    accuracy                           0.92       223
   macro avg       0.92      0.91      0.92       223
weighted avg       0.92      0.92      0.92       223



## Submission

In [26]:
y_pred = classificator_imp_blanks.make_predict_with_final_classificator()
make_submission(y_pred, gender_submission, test_df, 'submission_xgb_imp.csv')

## Save model

In [27]:
classificator_imp_blanks.save_final_model(PATH_MODEL, 'xgb_imp.joblib')