In [1]:
from sklearn.model_selection import StratifiedKFold

In [2]:
#libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.model_selection import ShuffleSplit, KFold
from sklearn.model_selection import cross_val_score

import os
df_train_features = pd.read_csv('./train.csv', index_col='match_id_hash')
df_train_targets = pd.read_csv('./train_targets.csv', index_col='match_id_hash')

In [3]:
X = df_train_features
y = df_train_targets['radiant_win']

In [4]:
X=X.fillna(0).replace([np.inf, -np.inf], 0)

In [5]:
x_train, x_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3,random_state=17)

In [6]:
#logistic Regression
C = 1
penalty = 'l2'
max_iter = 100
solver = 'liblinear'
random_state = 17
n_jobs = -1
verbose = 1

clf_lr = LogisticRegression(C=C,
                            penalty=penalty,
                            max_iter=max_iter, 
                            random_state=random_state,
                            verbose=verbose,
                            n_jobs=n_jobs,
                           solver=solver)

In [7]:
clf_lr.fit(x_train, y_train)

  " = {}.".format(effective_n_jobs(self.n_jobs)))


[LibLinear]



LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=-1, penalty='l2', random_state=17,
                   solver='liblinear', tol=0.0001, verbose=1, warm_start=False)

In [8]:
%%time
y_pred = clf_lr.predict(x_valid)
print('Log Regression validation roc_auc score {} '.format(roc_auc_score(y_pred, y_valid)))

Log Regression validation roc_auc score 0.7348164353153849 
Wall time: 546 ms


In [9]:
cv = StratifiedKFold(n_splits=5, random_state=17)

In [10]:
%%time
C = 1
penalty = 'l2'
max_iter = 50
solver = 'liblinear'
random_state = 17
n_jobs = -1
verbose = 1

logit = LogisticRegression(C=C,
                            penalty=penalty,
                            max_iter=max_iter, 
                            random_state=random_state,
                            verbose=verbose,
                            n_jobs=n_jobs,
                           solver=solver)

Wall time: 0 ns


In [11]:
c_values = np.logspace(-2, 2, 20)

logit_grid_searcher = GridSearchCV(estimator=logit, param_grid={'C': c_values},
                                  scoring='roc_auc', n_jobs=-1, cv=cv, verbose=1, iid = False)

In [13]:
logit.fit(X,y)

  " = {}.".format(effective_n_jobs(self.n_jobs)))


[LibLinear]



LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=50,
                   multi_class='warn', n_jobs=-1, penalty='l2', random_state=17,
                   solver='liblinear', tol=0.0001, verbose=1, warm_start=False)

In [0]:
#logit_grid_searcher.best_score_, logit_grid_searcher.best_params_

In [0]:
#best_logit = logit_grid_searcher.best_estimator_
#best_logit.fit(X,y)

In [17]:
df_test_features = pd.read_csv('test.csv',index_col='match_id_hash')
df_test_features=df_test_features.fillna(0).replace([np.inf, -np.inf], 0)

In [18]:
prediction_test = logit.predict_proba(df_test_features.values)[:,1]

In [19]:
import datetime
submission_filename = 'submission_logit{}.csv'.format(
    datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))

sub=pd.DataFrame({'radiant_win_prob':prediction_test},index=df_test_features.index)
sub.to_csv(submission_filename)

print('Submission saved to {}'.format(submission_filename))
sub.head()

Submission saved to submission_logit2019-11-19_02-11-58.csv


Unnamed: 0_level_0,radiant_win_prob
match_id_hash,Unnamed: 1_level_1
30cc2d778dca82f2edb568ce9b585caa,0.556209
70e5ba30f367cea48793b9003fab9d38,0.960785
4d9ef74d3a2025d79e9423105fd73d41,0.96364
2bb79e0c1eaac1608e5a09c8e0c6a555,0.670027
bec17f099b01d67edc82dfb5ce735a43,0.257597
