## Display Features

In [1]:
import pandas as pd
import numpy as np
import sqlite3
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings

# from xml.etree.ElementTree import fromstring, ElementTree
# import xml.etree.ElementTree as ET

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [2]:
%matplotlib inline
pd.options.display.max_rows = 300000
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 500

# Matches

In [3]:
matches = pd.read_csv(
    "C:\\Users\\Space\\Documents\\py\\Projects\\TuringCollege\\Football\\DataSets\\matches8.csv",
    index_col=0,
)

In [5]:
X = matches.loc [:, 'HomePlayer1': 'AwayDefenceTeamWidth']
y = matches['Result']

# Logistic Regression (Multiclass "OVR")

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.2, random_state=101)

log_model = LogisticRegression(multi_class="ovr", max_iter=5000)
log_model.fit(X_train, y_train)

y_predict = log_model.predict(X_test)

In [40]:
 print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

          -1       0.48      0.51      0.50      1050
           0       0.19      0.01      0.02       933
           1       0.55      0.82      0.66      1666

    accuracy                           0.52      3649
   macro avg       0.41      0.45      0.39      3649
weighted avg       0.44      0.52      0.45      3649



# Simple Logistic Regression Liblinear Solver

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.2, random_state=101)

log_model = LogisticRegression(solver="liblinear", multi_class="ovr", max_iter=5000)
log_model.fit(X_train, y_train)

y_predict = log_model.predict(X_test)

In [29]:
 print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

          -1       0.49      0.52      0.50      1050
           0       0.16      0.00      0.01       933
           1       0.55      0.83      0.66      1666

    accuracy                           0.53      3649
   macro avg       0.40      0.45      0.39      3649
weighted avg       0.43      0.53      0.45      3649



## Standardized Features

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.2, random_state=101)

scaler = StandardScaler()
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

log_model = LogisticRegression(solver="liblinear", multi_class="ovr", max_iter=5000)
log_model.fit(scaled_X_train, y_train)

y_predict = log_model.predict(scaled_X_test)

In [31]:
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

          -1       0.49      0.52      0.50      1050
           0       0.15      0.00      0.01       933
           1       0.55      0.82      0.66      1666

    accuracy                           0.53      3649
   macro avg       0.40      0.45      0.39      3649
weighted avg       0.43      0.53      0.45      3649



# Simple Logistic Regression Saga Solver

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.2, random_state=101)

log_model = LogisticRegression(solver='saga',multi_class="ovr",max_iter=5000)
log_model.fit(X_train, y_train)

y_predict = log_model.predict(X_test)

In [27]:
 print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

          -1       0.50      0.52      0.51      1051
           0       0.14      0.01      0.01       921
           1       0.56      0.84      0.67      1677

    accuracy                           0.54      3649
   macro avg       0.40      0.46      0.40      3649
weighted avg       0.44      0.54      0.46      3649



## Standardized Features

In [45]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.2, random_state=101)

scaler = StandardScaler()
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

log_model = LogisticRegression(solver='saga',multi_class="ovr",max_iter=5000)
log_model.fit(scaled_X_train, y_train)

y_predict = log_model.predict(scaled_X_test)

In [46]:
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

          -1       0.49      0.52      0.50      1050
           0       0.15      0.00      0.01       933
           1       0.55      0.83      0.66      1666

    accuracy                           0.53      3649
   macro avg       0.40      0.45      0.39      3649
weighted avg       0.43      0.53      0.45      3649



# Simple Logistic Regression newton-cg Solver

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.2, random_state=101)

log_model = LogisticRegression(solver='newton-cg',multi_class="ovr",max_iter=5000)
log_model.fit(X_train, y_train)

y_predict = log_model.predict(X_test)

In [42]:
 print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

          -1       0.49      0.52      0.50      1050
           0       0.15      0.00      0.01       933
           1       0.55      0.82      0.66      1666

    accuracy                           0.53      3649
   macro avg       0.40      0.45      0.39      3649
weighted avg       0.43      0.53      0.45      3649

