## Importing libraries

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

---

## Importing data

In [None]:
y_train_path = os.path.join('data', 'preprocessed_y_train.csv')
x_train_path = os.path.join('data', 'preprocessed_x_train.csv')
x_test_path = os.path.join('data', 'preprocessed_x_test.csv')

y_train = pd.read_csv(y_train_path, index_col = [0])
x_train = pd.read_csv(x_train_path, index_col = [0])
x_test = pd.read_csv(x_test_path, index_col = [0])

In [None]:
y_train.head()

In [None]:
x_train.head()

In [None]:
x_test.head()

---

## Importing Models

In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression

---

## 1) LogisticRegression

In [None]:
logisticRegression = LogisticRegression()

## Model Evaluation

In [None]:
def evaluate(model, x_train, y_train, n_splits):
    indexes = np.array(x_train.index)
    np.random.shuffle(indexes)
    x_shuffled = x_train.loc[indexes].copy()
    y_shuffled = y_train.loc[indexes].copy()
    kFold = KFold(n_splits)
    kFold_auc = []

    for (train_indexes, eval_indexes) in kFold.split(x_train):
        kFold_x_train = x_shuffled.iloc[train_indexes].copy()
        kFold_y_train = y_shuffled.iloc[train_indexes].copy()
        kFold_x_eval = x_shuffled.iloc[eval_indexes].copy()
        kFold_y_eval = y_shuffled.iloc[eval_indexes].copy()

        model_instance = model()
        model_instance.fit(kFold_x_train, kFold_y_train.values.ravel())
        kFold_y_pred = model_instance.predict(kFold_x_eval)
        fpr, tpr, thresholds = roc_curve(kFold_y_eval, kFold_y_pred, pos_label=1)
        kFold_auc.append(auc(fpr, tpr))
    
    print(f'{model_instance} KFold auc: {sum(kFold_auc)/len(kFold_auc)}')

In [None]:
evaluate(LogisticRegression, x_train, y_train, 10)

In [None]:
model = logisticRegression
model.fit(x_train, y_train.values.ravel())

y_pred = pd.DataFrame(model.predict(x_test), index = x_test.index.rename('Id'), columns = ['Choice'])
y_pred.index = y_pred.index + 1

y_pred_path = os.path.join('data', 'predictions.csv')
y_pred.to_csv(y_pred_path, index = True)