In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import sys
sys.path.append('./modules')

In [3]:
from Classicator import Classicator

In [4]:
import pandas as pd
import numpy as np


In [5]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV


In [6]:
PATH_DATA = './data/'
PATH_SUBMISSION = './submissions/'
PATH_MODEL = './models/'

In [7]:
test_df = pd.read_csv(PATH_DATA + 'test.csv')

In [8]:
gender_submission = pd.read_csv(PATH_DATA + 'gender_submission.csv')

In [9]:
def make_submission(y_pred, gender_submission, test_df, name):
    gender_submission.Survived = y_pred
    gender_submission.PassengerId = test_df.PassengerId
    gender_submission.to_csv(PATH_SUBMISSION + name, index=False)

# KNN blanks

In [10]:
train_knn_df = pd.read_csv(PATH_DATA  +'train_knn_tree.csv')
test_knn_df = pd.read_csv(PATH_DATA  +'test_knn_tree.csv')
target_name = 'Survived'

In [11]:
classificator_knn_blanks = Classicator(train_knn_df, test_knn_df, target_name)

## Simple Classificator

In [12]:
classificator_knn_blanks.set_class_classicator(KNeighborsClassifier)
classificator_knn_blanks.make_simple_classificator()

KNeighborsClassifier
              precision    recall  f1-score   support

           0       0.82      0.87      0.84       134
           1       0.78      0.71      0.74        89

    accuracy                           0.80       223
   macro avg       0.80      0.79      0.79       223
weighted avg       0.80      0.80      0.80       223



## Search best classificator

In [13]:
parameters = {
    'n_neighbors':np.arange(3, 30), 
    'metric' : ['cosine', 'euclidean','manhattan', 'chebyshev', 'wminkowski']
}
parametrs_class_searher = {
    'class_searcher': GridSearchCV,
    'cv':5,
    'scoring':'precision',
    'n_jobs': 5
}
classificator_knn_blanks.set_parametr_search(parameters)
classificator_knn_blanks.set_class_searcher(**parametrs_class_searher)

In [14]:
classificator_knn_blanks.searh_best_classificator()

Best estimator

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='manhattan',
                     metric_params=None, n_jobs=None, n_neighbors=6, p=2,
                     weights='uniform')

Best parametrs

{'metric': 'manhattan', 'n_neighbors': 6}

KNeighborsClassifier
              precision    recall  f1-score   support

           0       0.86      0.95      0.90       134
           1       0.91      0.76      0.83        89

    accuracy                           0.87       223
   macro avg       0.88      0.86      0.86       223
weighted avg       0.88      0.87      0.87       223



## Best classificator

In [15]:
classificator_knn_blanks.make_best_classificator()

KNeighborsClassifier
              precision    recall  f1-score   support

           0       0.80      0.93      0.86       134
           1       0.85      0.65      0.74        89

    accuracy                           0.82       223
   macro avg       0.83      0.79      0.80       223
weighted avg       0.82      0.82      0.81       223



## Final classificator

In [16]:
classificator_knn_blanks.make_final_classificator()

KNeighborsClassifier
              precision    recall  f1-score   support

           0       0.86      0.95      0.90       134
           1       0.91      0.76      0.83        89

    accuracy                           0.87       223
   macro avg       0.88      0.86      0.86       223
weighted avg       0.88      0.87      0.87       223



## Submission

In [17]:
y_pred = classificator_knn_blanks.make_predict_with_final_classificator()
make_submission(y_pred, gender_submission, test_df, 'submission_knn_with_knn_blanks.csv')

## Save model

In [18]:
classificator_knn_blanks.save_final_model(PATH_MODEL, 'knn_with_knn_blanks.joblib')

# IMP Blanks

In [19]:
train_imp_df = pd.read_csv(PATH_DATA  +'train_imp_tree.csv')
test_imp_df = pd.read_csv(PATH_DATA  +'test_imp_tree.csv')
target_name = 'Survived'

In [20]:
classificator_imp_blanks = Classicator(train_imp_df, test_imp_df, target_name)

## Simple Classificator

In [21]:
classificator_imp_blanks.set_class_classicator(KNeighborsClassifier)
classificator_imp_blanks.make_simple_classificator()

KNeighborsClassifier
              precision    recall  f1-score   support

           0       0.82      0.84      0.83       134
           1       0.75      0.73      0.74        89

    accuracy                           0.79       223
   macro avg       0.79      0.78      0.78       223
weighted avg       0.79      0.79      0.79       223



## Search best classificator

In [22]:
parameters = {
    'n_neighbors':np.arange(3, 30), 
    'metric' : ['cosine', 'euclidean','manhattan', 'chebyshev', 'wminkowski']
}
parametrs_class_searher = {
    'class_searcher': GridSearchCV,
    'cv':5,
    'scoring':'precision',
    'n_jobs': 5
}
classificator_imp_blanks.set_parametr_search(parameters)
classificator_imp_blanks.set_class_searcher(**parametrs_class_searher)

In [23]:
classificator_imp_blanks.searh_best_classificator()

Best estimator

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='manhattan',
                     metric_params=None, n_jobs=None, n_neighbors=14, p=2,
                     weights='uniform')

Best parametrs

{'metric': 'manhattan', 'n_neighbors': 14}

KNeighborsClassifier
              precision    recall  f1-score   support

           0       0.84      0.92      0.88       134
           1       0.86      0.73      0.79        89

    accuracy                           0.84       223
   macro avg       0.85      0.82      0.83       223
weighted avg       0.84      0.84      0.84       223



## Best classificator

In [24]:
classificator_imp_blanks.make_best_classificator()

KNeighborsClassifier
              precision    recall  f1-score   support

           0       0.81      0.93      0.86       134
           1       0.86      0.67      0.75        89

    accuracy                           0.83       223
   macro avg       0.83      0.80      0.81       223
weighted avg       0.83      0.83      0.82       223



## Final classificator

In [25]:
classificator_imp_blanks.make_final_classificator()

KNeighborsClassifier
              precision    recall  f1-score   support

           0       0.84      0.92      0.88       134
           1       0.86      0.73      0.79        89

    accuracy                           0.84       223
   macro avg       0.85      0.82      0.83       223
weighted avg       0.84      0.84      0.84       223



## Submission

In [26]:
y_pred = classificator_imp_blanks.make_predict_with_final_classificator()
make_submission(y_pred, gender_submission, test_df, 'submission_knn_with_imp_blanks.csv')

## Save model

In [27]:
classificator_imp_blanks.save_final_model(PATH_MODEL, 'knn_with_imp_blanks.joblib')