### Load Packages

In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

### Load the Data

In [2]:
train = pd.read_csv('../../train.csv')
test = pd.read_csv('../../test.csv')

### Prepare the training and testing data

In [3]:
X_train = train.drop(['ID_code', 'target'], axis=1)
y_train = train.target
X_test = test.drop(['ID_code'], axis=1)

scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

### Set up the Metric

This challege is rated on the roc-auc score

In [4]:
mt = 'roc_auc'
kf = StratifiedKFold(n_splits=4, random_state=0, shuffle=True)

### Define the functions for scoring and grid search

In [5]:
def performance(model):
    scores = cross_val_score(model, X_train, y_train, cv=kf, scoring=mt, n_jobs=1)
    print('Score Mean: {:.4f}'.format(scores.mean()))
    print('Score Std: {:.4f}'.format(scores.std()))

In [6]:
def grid_search(model, params):
    grid = GridSearchCV(model, params, scoring=mt, cv=kf, verbose=True, n_jobs=4).fit(X_train, y_train)
    print('Best Score: \n{}'.format(grid.best_score_))
    print('Best Parameters: \n{}'.format(grid.best_params_))
    return grid.best_estimator_

### Get the baseline of different models

In [7]:
performance(LogisticRegression())

Score Mean: 0.8596
Score Std: 0.0022


In [8]:
performance(GaussianNB())

Score Mean: 0.8885
Score Std: 0.0027
