In [2]:
import pandas as pd
import numpy as np
import matplotlib as plot
import os 

from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn import metrics


In [35]:
# Set Directory
os.chdir("")

# Read Data For Testing/Training
df = pd.read_table("master_set.txt", sep=" ").drop(columns=["Result", "Opponent", "Team", "diff_Score", "Power"])

Unnamed: 0,diff_Turnovers,diff_Passing,diff_Possession,diff_Rushing,Result_num
1,1,102,2.8,-111,0
2,-1,-83,-10.0,162,1
3,-1,21,13.54,99,1
4,-2,51,-14.16,-223,0
5,-4,90,13.7,96,1


In [4]:
# Function that takes Season Averages and takes difference. (Order Sensitive)
# You need the conference for each team. Team names must be written as expressed in their file.
def team_diffs(conference1, team1, conference2, team2):
    my_string1 = ["Games/", conference1, "/", team1,"_22_season_diffs_mean.txt"] # Team One File Path
    my_string2 = ["Games/", conference2, "/", team2,"_22_season_diffs_mean.txt"] # Team Two File Path
    team1_avg = pd.read_table("".join(my_string1), sep=" ") # Read Team 1 Data
    team2_avg = pd.read_table("".join(my_string2), sep=" ") # Read Team 2 Data
    diff_df = team1_avg.subtract(team2_avg) # Subtract Team Averages
    return(diff_df)


In [39]:
# Test Train Split using Sci-Kit Learn (25% Test Set Size, Training on the Coded Result of a game (Win or Loss))

x_train, x_test, y_train, y_test = model_selection.train_test_split(df.drop(columns=["Result_num"]), df["Result_num"], test_size=0.25, random_state=0)

In [40]:
# Define Logistic Model

logr = LogisticRegression(max_iter=1000)

In [41]:
# Fit Logistic Model

fit1 = logr.fit(x_train, y_train)

In [42]:
# Check General Performance

pred  = fit1.predict(x_test)

In [43]:
# Performs as expected for a logit-regression. (Below is a confusion Matrix)

metrics.confusion_matrix(y_test, pred)

array([[125,  17],
       [ 21, 161]], dtype=int64)

In [47]:
# Test Case Prediction. Have to drop the diff_Score column since it's not present in the model.

game_pred = team_diffs("SEC", "Georgia", "Big10", "Ohio_State" ).drop(columns="diff_Score")

In [88]:
# UGA beats Ohio State

fit1.predict(game_pred)[0]

1

In [82]:
# Since I'm still a novice in Python, I don't know of an efficient way to do this... so here's a bunch of matchups.

UGAvOSU = team_diffs("SEC", "Georgia", "Big10", "Ohio_State" ).drop(columns="diff_Score")

GSvBUFF = team_diffs("Sun Belt", "Georgia_Southern", "MAC", "Buffalo" ).drop(columns="diff_Score")

DUKEvUCF = team_diffs("ACC", "Duke", "AAC", "UCF" ).drop(columns="diff_Score")

MICHvTCU = team_diffs("Big10", "Michigan", "Big12", "TCU" ).drop(columns="diff_Score")

MTvSDSU = team_diffs("CUSA", "Middle_Tennessee", "MountainWest", "San_Diego_State" ).drop(columns="diff_Score")

MEMPvUTST = team_diffs("AAC", "Memphis", "MountainWest", "Utah_State" ).drop(columns="diff_Score")

CCUvECU = team_diffs("Sun Belt", "Coastal_Carolina", "AAC", "East_Carolina" ).drop(columns="diff_Score")

WISCvOKST = team_diffs("Big10", "Wisconsin", "Big12", "Oklahoma_State" ).drop(columns="diff_Score")

KANvARK = team_diffs("Big12", "Kansas", "SEC", "Arkansas" ).drop(columns="diff_Score")

OREvUNC = team_diffs("PAC12", "Oregon", "ACC", "North_Carolina" ).drop(columns="diff_Score")

TTvOLEM = team_diffs("Big12", "Texas_Tech", "SEC", "Ole_Miss" ).drop(columns="diff_Score")

OKLvFSU = team_diffs("Big12", "Oklahoma", "ACC", "Florida_State" ).drop(columns="diff_Score")

TEXvWASH = team_diffs("Big12", "Texas", "PAC12", "Washington" ).drop(columns="diff_Score")

TENNvCLEM = team_diffs("SEC", "Tennessee", "ACC", "Clemson" ).drop(columns="diff_Score")

BAMAvKSTATE = team_diffs("SEC", "Alabama", "Big12", "Kansas_State" ).drop(columns="diff_Score")

IOWAvKEN = team_diffs("Big10", "Iowa", "SEC", "Kentucky" ).drop(columns="diff_Score")

UGAvMICH = team_diffs("SEC", "Georgia", "Big10", "Michigan" ).drop(columns="diff_Score")



In [83]:
# Putting matchups in a list

games = [UGAvOSU, GSvBUFF, DUKEvUCF,
            MICHvTCU, MTvSDSU, MEMPvUTST,
            CCUvECU, WISCvOKST, KANvARK,
            OREvUNC, TTvOLEM, OKLvFSU,
            TEXvWASH, TENNvCLEM, BAMAvKSTATE,
            IOWAvKEN, UGAvMICH]

In [84]:
# Generating a dictionary so I can point to the predicted game results quickly in a loop.
game_dic = {}

for i in range(len(games)):
    game_dic[i] = games[i]


In [85]:
# Said loop for prediction.

for i in range(len(game_dic)):
  print(fit1.predict(game_dic[i]))
    

[1]
[0]
[1]
[1]
[1]
[1]
[0]
[1]
[1]
[1]
[0]
[0]
[1]
[1]
[1]
[0]
[0]


In [87]:
fit1.predict(team_diffs("ACC", "Wake_Forest", "SEC", "Missouri" ).drop(columns="diff_Score"))

array([1], dtype=int64)