In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

import numpy as np

In [2]:
data = pd.read_csv('Data/efficiency_stats_train.csv')
data.head()

Unnamed: 0,LABEL,HOME,OPASS,ORUSH,OINT,DPASS,DRUSH,TPEN
0,W,0,3.75,4.142857,0.25,3.454545,4.242424,0.626866
1,W,0,0.0,5.352113,0.0,6.350877,6.142857,0.935897
2,L,1,10.615385,1.261905,0.153846,10.125,3.716981,0.454545
3,L,1,8.761905,4.053571,0.0,19.0,3.678571,0.974026
4,W,0,10.285714,5.207792,0.0,3.421053,1.296296,0.511905


In [3]:
X = data.drop("LABEL", axis=1)
y = data["LABEL"]
print(X.shape, y.shape)

(13236, 7) (13236,)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [7]:
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
classifier = LogisticRegression()
classifier

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [9]:
classifier.fit(X_train_scaled, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [10]:
print(f"Training Data Score: {classifier.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test_scaled, y_test)}")

Training Data Score: 0.8443638561498942
Testing Data Score: 0.8359020852221215


In [11]:
predictions = classifier.predict(X_test)
print(f"First 10 Predictions:   {predictions[:10]}")
print(f"First 10 Actual labels: {y_test[:10].tolist()}")

First 10 Predictions:   ['W' 'L' 'W' 'L' 'L' 'L' 'L' 'L' 'W' 'L']
First 10 Actual labels: ['W', 'L', 'W', 'W', 'L', 'L', 'L', 'L', 'W', 'L']


In [12]:
pd.DataFrame({"Prediction": predictions, "Actual": y_test}).reset_index(drop=True).head(10)

Unnamed: 0,Actual,Prediction
0,W,W
1,L,L
2,W,W
3,W,L
4,L,L
5,L,L
6,L,L
7,L,L
8,W,W
9,L,L


In [13]:
#Efficiency Coefficients
classifier.coef_

array([[ 0.1322718 ,  1.4115213 ,  0.92459706, -0.63565543, -1.44196753,
        -1.05862955, -0.24280716]])

In [14]:
#Efficiency Constant
classifier.intercept_

array([-0.03255638])