In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import sys
sys.path.append('./modules')

In [3]:
from Classicator import Classicator

In [4]:
import pandas as pd
import numpy as np


In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV


In [6]:
PATH_DATA = './data/'
PATH_SUBMISSION = './submissions/'
PATH_MODEL = './models/'

In [7]:
test_df = pd.read_csv(PATH_DATA + 'test.csv')

In [8]:
gender_submission = pd.read_csv(PATH_DATA + 'gender_submission.csv')

In [9]:
def make_submission(y_pred, gender_submission, test_df, name):
    gender_submission.Survived = y_pred
    gender_submission.PassengerId = test_df.PassengerId
    gender_submission.to_csv(PATH_SUBMISSION + name, index=False)

# KNN blanks

In [10]:
train_knn_df = pd.read_csv(PATH_DATA  +'train_knn_liner.csv')
test_knn_df = pd.read_csv(PATH_DATA  +'test_knn_liner.csv')
target_name = 'Survived'

In [11]:
classificator_knn_blanks = Classicator(train_knn_df, test_knn_df, target_name)

## Simple Classificator

In [12]:
classificator_knn_blanks.set_class_classicator(LogisticRegression)
classificator_knn_blanks.make_simple_classificator()

LogisticRegression
              precision    recall  f1-score   support

           0       0.85      0.86      0.86       134
           1       0.78      0.78      0.78        89

    accuracy                           0.83       223
   macro avg       0.82      0.82      0.82       223
weighted avg       0.82      0.83      0.82       223



## Search best classificator

In [13]:
parameters = {
    'penalty':['l1', 'l2', 'none'],
    'C':np.arange(0,2,0.05),
}
parametrs_class_searher = {
    'class_searcher': GridSearchCV,
    'cv':5,
    'scoring':'precision',
    'n_jobs': 5
}
classificator_knn_blanks.set_parametr_search(parameters)
classificator_knn_blanks.set_class_searcher(**parametrs_class_searher)

In [14]:
classificator_knn_blanks.searh_best_classificator()

Best estimator

LogisticRegression(C=1.1500000000000001, class_weight=None, dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

Best parametrs

{'C': 1.1500000000000001, 'penalty': 'l2'}

LogisticRegression
              precision    recall  f1-score   support

           0       0.88      0.89      0.88       134
           1       0.83      0.81      0.82        89

    accuracy                           0.86       223
   macro avg       0.85      0.85      0.85       223
weighted avg       0.86      0.86      0.86       223



## Best classificator

In [15]:
classificator_knn_blanks.make_best_classificator()

LogisticRegression
              precision    recall  f1-score   support

           0       0.85      0.85      0.85       134
           1       0.78      0.78      0.78        89

    accuracy                           0.82       223
   macro avg       0.81      0.81      0.81       223
weighted avg       0.82      0.82      0.82       223



## Final classificator

In [16]:
classificator_knn_blanks.make_final_classificator()

LogisticRegression
              precision    recall  f1-score   support

           0       0.88      0.89      0.88       134
           1       0.83      0.81      0.82        89

    accuracy                           0.86       223
   macro avg       0.85      0.85      0.85       223
weighted avg       0.86      0.86      0.86       223



## Submission

In [17]:
y_pred = classificator_knn_blanks.make_predict_with_final_classificator()
make_submission(y_pred, gender_submission, test_df, 'submission_log_loss_knn.csv')

## Save model

In [18]:
classificator_knn_blanks.save_final_model(PATH_MODEL, 'log_loss_knn.joblib')

# IMP Blanks

In [19]:
train_imp_df = pd.read_csv(PATH_DATA  +'train_imp_liner.csv')
test_imp_df = pd.read_csv(PATH_DATA  +'test_imp_liner.csv')
target_name = 'Survived'

In [20]:
classificator_imp_blanks = Classicator(train_imp_df, test_imp_df, target_name)

## Simple Classificator

In [21]:
classificator_imp_blanks.set_class_classicator(LogisticRegression)
classificator_imp_blanks.make_simple_classificator()

LogisticRegression
              precision    recall  f1-score   support

           0       0.86      0.84      0.85       134
           1       0.77      0.79      0.78        89

    accuracy                           0.82       223
   macro avg       0.81      0.81      0.81       223
weighted avg       0.82      0.82      0.82       223



## Search best classificator

In [22]:
parameters = {
    'penalty':['l1', 'l2', 'none'],
    'C':np.arange(0,2,0.05),
}
parametrs_class_searher = {
    'class_searcher': GridSearchCV,
    'cv':5,
    'scoring':'precision',
    'n_jobs': 5
}
classificator_imp_blanks.set_parametr_search(parameters)
classificator_imp_blanks.set_class_searcher(**parametrs_class_searher)

In [23]:
classificator_imp_blanks.searh_best_classificator()

Best estimator

LogisticRegression(C=0.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='none',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

Best parametrs

{'C': 0.0, 'penalty': 'none'}

LogisticRegression
              precision    recall  f1-score   support

           0       0.98      0.97      0.97       134
           1       0.96      0.97      0.96        89

    accuracy                           0.97       223
   macro avg       0.97      0.97      0.97       223
weighted avg       0.97      0.97      0.97       223



## Best classificator

In [24]:
classificator_imp_blanks.make_best_classificator()

LogisticRegression
              precision    recall  f1-score   support

           0       0.79      0.80      0.80       134
           1       0.69      0.69      0.69        89

    accuracy                           0.75       223
   macro avg       0.74      0.74      0.74       223
weighted avg       0.75      0.75      0.75       223



## Final classificator

In [25]:
classificator_imp_blanks.make_final_classificator()

LogisticRegression
              precision    recall  f1-score   support

           0       0.98      0.97      0.97       134
           1       0.96      0.97      0.96        89

    accuracy                           0.97       223
   macro avg       0.97      0.97      0.97       223
weighted avg       0.97      0.97      0.97       223



## Submission

In [26]:
y_pred = classificator_imp_blanks.make_predict_with_final_classificator()
make_submission(y_pred, gender_submission, test_df, 'submission_log_loss_imp.csv')

## Save model

In [27]:
classificator_imp_blanks.save_final_model(PATH_MODEL, 'log_loss_imp.joblib')