In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression


In [2]:
data = pd.read_csv('data/log_test_data.csv')
data.head()

Unnamed: 0,LABEL,OPASS,ORUSH,OINT,DPASS,DRUSH,TPEN
0,W,18.27,5.81,0.0,7.85,2.77,0.9
1,W,7.44,5.59,0.0,5.26,1.93,0.33
2,W,8.69,4.1,0.0,7.75,1.31,1.02
3,W,17.13,3.26,0.07,8.39,1.5,1.03
4,L,9.89,3.82,0.16,6.41,3.94,0.78


In [3]:
X = data.drop("LABEL", axis=1)
y = data["LABEL"]
print(X.shape, y.shape)

(1633, 6) (1633,)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [5]:
classifier = LogisticRegression()
classifier

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [7]:
classifier.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [8]:
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

Training Data Score: 0.8415032679738562
Testing Data Score: 0.8337408312958435


In [9]:
predictions = classifier.predict(X_test)
print(f"First 10 Predictions:   {predictions[:10]}")
print(f"First 10 Actual labels: {y_test[:10].tolist()}")

First 10 Predictions:   ['W' 'L' 'L' 'W' 'W' 'L' 'W' 'W' 'W' 'L']
First 10 Actual labels: ['L', 'L', 'L', 'W', 'W', 'L', 'W', 'W', 'W', 'L']


In [12]:
pd.DataFrame({"Prediction": predictions, "Actual": y_test}).reset_index(drop=True).head()

Unnamed: 0,Actual,Prediction
0,L,W
1,L,L
2,L,L
3,W,W
4,W,W


In [14]:
classifier.coef_

array([[ 0.55588655,  0.75352965, -1.82296051, -0.58632846, -0.71272833,
        -1.00049768]])

In [15]:
classifier.intercept_

array([0.85454865])