In [6]:
import warnings
warnings.filterwarnings("ignore")

In [7]:
import sys
sys.path.append('./modules')

In [8]:
from Classicator import Classicator

In [9]:
import pandas as pd
import numpy as np


In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV


In [11]:
PATH_DATA = './data/'
PATH_SUBMISSION = './submissions/'
PATH_MODEL = './models/'

In [12]:
test_df = pd.read_csv(PATH_DATA + 'test.csv')

In [13]:
gender_submission = pd.read_csv(PATH_DATA + 'gender_submission.csv')

In [14]:
def make_submission(y_pred, gender_submission, test_df, name):
    gender_submission.Survived = y_pred
    gender_submission.PassengerId = test_df.PassengerId
    gender_submission.to_csv(PATH_SUBMISSION + name, index=False)

# KNN blanks

In [15]:
train_knn_df = pd.read_csv(PATH_DATA  +'train_knn_tree.csv')
test_knn_df = pd.read_csv(PATH_DATA  +'test_knn_tree.csv')
target_name = 'Survived'

In [16]:
classificator_knn_blanks = Classicator(train_knn_df, test_knn_df, target_name)

## Simple Classificator

In [17]:
classificator_knn_blanks.set_class_classicator(RandomForestClassifier)
classificator_knn_blanks.make_simple_classificator()

RandomForestClassifier
              precision    recall  f1-score   support

           0       0.85      0.87      0.86       134
           1       0.79      0.78      0.78        89

    accuracy                           0.83       223
   macro avg       0.82      0.82      0.82       223
weighted avg       0.83      0.83      0.83       223



## Search best classificator

In [18]:
parameters = {
    'n_estimators':np.arange(100,1001,100),
    'max_depth':np.arange(2,10,2),
    'min_samples_leaf':np.arange(1,10,1)
}
parametrs_class_searher = {
    'class_searcher': GridSearchCV,
    'cv':5,
    'scoring':'precision',
    'n_jobs': 5
}
classificator_knn_blanks.set_parametr_search(parameters)
classificator_knn_blanks.set_class_searcher(**parametrs_class_searher)

In [19]:
classificator_knn_blanks.searh_best_classificator()

Best estimator

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=2, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=5, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

Best parametrs

{'max_depth': 2, 'min_samples_leaf': 5, 'n_estimators': 100}

RandomForestClassifier
              precision    recall  f1-score   support

           0       0.81      0.88      0.85       134
           1       0.79      0.70      0.74        89

    accuracy                           0.81       223
   macro avg       0.80      0.79      0.79       223
weighted avg       0.81      0.81      0.80       223



## Best classificator

In [20]:
classificator_knn_blanks.make_best_classificator()

RandomForestClassifier
              precision    recall  f1-score   support

           0       0.78      0.90      0.83       134
           1       0.81      0.61      0.69        89

    accuracy                           0.78       223
   macro avg       0.79      0.75      0.76       223
weighted avg       0.79      0.78      0.78       223



## Final classificator

In [21]:
classificator_knn_blanks.make_final_classificator()

RandomForestClassifier
              precision    recall  f1-score   support

           0       0.81      0.91      0.86       134
           1       0.83      0.67      0.75        89

    accuracy                           0.82       223
   macro avg       0.82      0.79      0.80       223
weighted avg       0.82      0.82      0.81       223



## Submission

In [26]:
y_pred = classificator_knn_blanks.make_predict_with_final_classificator()
make_submission(y_pred, gender_submission, test_df, 'submission_rf_knn.csv')

## Save model

In [27]:
classificator_knn_blanks.save_final_model(PATH_MODEL, 'rf_knn.joblib')

# IMP Blanks

In [28]:
train_imp_df = pd.read_csv(PATH_DATA  +'train_imp_tree.csv')
test_imp_df = pd.read_csv(PATH_DATA  +'test_imp_tree.csv')
target_name = 'Survived'

In [29]:
classificator_imp_blanks = Classicator(train_imp_df, test_imp_df, target_name)

## Simple Classificator

In [30]:
classificator_imp_blanks.set_class_classicator(RandomForestClassifier)
classificator_imp_blanks.make_simple_classificator()

RandomForestClassifier
              precision    recall  f1-score   support

           0       0.85      0.83      0.84       134
           1       0.75      0.78      0.76        89

    accuracy                           0.81       223
   macro avg       0.80      0.80      0.80       223
weighted avg       0.81      0.81      0.81       223



## Search best classificator

In [31]:
parameters = {
    'n_estimators':np.arange(100,1001,100),
    'max_depth':np.arange(2,10,2),
    'min_samples_leaf':np.arange(1,10,1)
}
parametrs_class_searher = {
    'class_searcher': GridSearchCV,
    'cv':5,
    'scoring':'precision',
    'n_jobs': 5
}
classificator_imp_blanks.set_parametr_search(parameters)
classificator_imp_blanks.set_class_searcher(**parametrs_class_searher)

In [32]:
classificator_imp_blanks.searh_best_classificator()

Best estimator

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=2, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=3, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=500,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

Best parametrs

{'max_depth': 2, 'min_samples_leaf': 3, 'n_estimators': 500}

RandomForestClassifier
              precision    recall  f1-score   support

           0       0.80      0.94      0.86       134
           1       0.88      0.64      0.74        89

    accuracy                           0.82       223
   macro avg       0.84      0.79      0.80       223
weighted avg       0.83      0.82      0.81       223



## Best classificator

In [33]:
classificator_imp_blanks.make_best_classificator()

RandomForestClassifier
              precision    recall  f1-score   support

           0       0.76      0.95      0.84       134
           1       0.87      0.54      0.67        89

    accuracy                           0.78       223
   macro avg       0.81      0.74      0.75       223
weighted avg       0.80      0.78      0.77       223



## Final classificator

In [34]:
classificator_imp_blanks.make_final_classificator()

RandomForestClassifier
              precision    recall  f1-score   support

           0       0.80      0.93      0.86       134
           1       0.86      0.64      0.74        89

    accuracy                           0.82       223
   macro avg       0.83      0.79      0.80       223
weighted avg       0.82      0.82      0.81       223



## Submission

In [35]:
y_pred = classificator_imp_blanks.make_predict_with_final_classificator()
make_submission(y_pred, gender_submission, test_df, 'submission_rf_imp.csv')

## Save model

In [36]:
classificator_imp_blanks.save_final_model(PATH_MODEL, 'rf_imp.joblib')