# Libraries

In [73]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [74]:
fixture = pd.read_csv('fixture.csv')
fixture

Unnamed: 0,year,round,date,time,region,venue,hteam,ateam,hscore,ascore,home_win,hdiff
0,2017,1,2017-03-26,14:50:00,SA,Adelaide Oval,Adelaide,Greater Western Sydney,147,91,1,56
1,2017,1,2017-03-23,19:20:00,VIC,M.C.G.,Carlton,Richmond,89,132,0,-43
2,2017,1,2017-03-24,19:50:00,VIC,M.C.G.,Collingwood,Western Bulldogs,86,100,0,-14
3,2017,1,2017-03-25,19:25:00,VIC,M.C.G.,Essendon,Hawthorn,116,91,1,25
4,2017,1,2017-03-26,16:40:00,WA,Subiaco,Fremantle,Geelong,73,115,0,-42
...,...,...,...,...,...,...,...,...,...,...,...,...
1408,2023,26,2023-09-15,19:50:00,VIC,M.C.G.,Melbourne,Carlton,71,73,0,-2
1409,2023,26,2023-09-16,19:10:00,SA,Adelaide Oval,Port Adelaide,Greater Western Sydney,70,93,0,-23
1410,2023,27,2023-09-23,17:15:00,QLD,Gabba,Brisbane Lions,Carlton,79,63,1,16
1411,2023,27,2023-09-22,19:50:00,VIC,M.C.G.,Collingwood,Greater Western Sydney,58,57,1,1


# Modeling

## Logistic Regression Model

In [75]:
fixture[fixture['year'] == 2023]

Unnamed: 0,year,round,date,time,region,venue,hteam,ateam,hscore,ascore,home_win,hdiff
1197,2023,1,2023-03-17,19:40:00,VIC,M.C.G.,Geelong,Collingwood,103,125,0,-22
1198,2023,1,2023-03-18,19:00:00,QLD,Carrara,Gold Coast,Sydney,61,110,0,-49
1199,2023,1,2023-03-19,13:10:00,NSW,Sydney Showground,Greater Western Sydney,Adelaide,106,90,1,16
1200,2023,1,2023-03-19,15:20:00,VIC,M.C.G.,Hawthorn,Essendon,65,124,0,-59
1201,2023,1,2023-03-18,19:25:00,VIC,M.C.G.,Melbourne,Western Bulldogs,115,65,1,50
...,...,...,...,...,...,...,...,...,...,...,...,...
1408,2023,26,2023-09-15,19:50:00,VIC,M.C.G.,Melbourne,Carlton,71,73,0,-2
1409,2023,26,2023-09-16,19:10:00,SA,Adelaide Oval,Port Adelaide,Greater Western Sydney,70,93,0,-23
1410,2023,27,2023-09-23,17:15:00,QLD,Gabba,Brisbane Lions,Carlton,79,63,1,16
1411,2023,27,2023-09-22,19:50:00,VIC,M.C.G.,Collingwood,Greater Western Sydney,58,57,1,1


In [76]:
# convert to datetime dtype and extract month and day
fixture['date'] = pd.to_datetime(fixture['date'])
fixture['month'] = fixture['date'].dt.month
fixture['day'] = fixture['date'].dt.day_name()
fixture

Unnamed: 0,year,round,date,time,region,venue,hteam,ateam,hscore,ascore,home_win,hdiff,month,day
0,2017,1,2017-03-26,14:50:00,SA,Adelaide Oval,Adelaide,Greater Western Sydney,147,91,1,56,3,Sunday
1,2017,1,2017-03-23,19:20:00,VIC,M.C.G.,Carlton,Richmond,89,132,0,-43,3,Thursday
2,2017,1,2017-03-24,19:50:00,VIC,M.C.G.,Collingwood,Western Bulldogs,86,100,0,-14,3,Friday
3,2017,1,2017-03-25,19:25:00,VIC,M.C.G.,Essendon,Hawthorn,116,91,1,25,3,Saturday
4,2017,1,2017-03-26,16:40:00,WA,Subiaco,Fremantle,Geelong,73,115,0,-42,3,Sunday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1408,2023,26,2023-09-15,19:50:00,VIC,M.C.G.,Melbourne,Carlton,71,73,0,-2,9,Friday
1409,2023,26,2023-09-16,19:10:00,SA,Adelaide Oval,Port Adelaide,Greater Western Sydney,70,93,0,-23,9,Saturday
1410,2023,27,2023-09-23,17:15:00,QLD,Gabba,Brisbane Lions,Carlton,79,63,1,16,9,Saturday
1411,2023,27,2023-09-22,19:50:00,VIC,M.C.G.,Collingwood,Greater Western Sydney,58,57,1,1,9,Friday


In [84]:
# Removing colons and converting time to integer
fixture['time_int'] = fixture['time'].str.replace(':', '').astype(int) / 100
fixture['time_int'] = fixture['time_int'].astype(int)
fixture

Unnamed: 0,year,round,date,time,region,venue,hteam,ateam,hscore,ascore,home_win,hdiff,month,day,time2,time_int
0,2017,1,2017-03-26,14:50:00,SA,Adelaide Oval,Adelaide,Greater Western Sydney,147,91,1,56,3,Sunday,1450.0,1450
1,2017,1,2017-03-23,19:20:00,VIC,M.C.G.,Carlton,Richmond,89,132,0,-43,3,Thursday,1920.0,1920
2,2017,1,2017-03-24,19:50:00,VIC,M.C.G.,Collingwood,Western Bulldogs,86,100,0,-14,3,Friday,1950.0,1950
3,2017,1,2017-03-25,19:25:00,VIC,M.C.G.,Essendon,Hawthorn,116,91,1,25,3,Saturday,1925.0,1925
4,2017,1,2017-03-26,16:40:00,WA,Subiaco,Fremantle,Geelong,73,115,0,-42,3,Sunday,1640.0,1640
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1408,2023,26,2023-09-15,19:50:00,VIC,M.C.G.,Melbourne,Carlton,71,73,0,-2,9,Friday,1950.0,1950
1409,2023,26,2023-09-16,19:10:00,SA,Adelaide Oval,Port Adelaide,Greater Western Sydney,70,93,0,-23,9,Saturday,1910.0,1910
1410,2023,27,2023-09-23,17:15:00,QLD,Gabba,Brisbane Lions,Carlton,79,63,1,16,9,Saturday,1715.0,1715
1411,2023,27,2023-09-22,19:50:00,VIC,M.C.G.,Collingwood,Greater Western Sydney,58,57,1,1,9,Friday,1950.0,1950


In [52]:
# Define your features and target variable
X = fixture.drop(['home_win', 'ascore', 'hscore', 'hdiff', 'date'], axis=1)
y = fixture['home_win']

In [53]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [54]:
X_test

Unnamed: 0,year,round,time,region,venue,hteam,ateam,month,day
1130,2022,17,19:00:00,SA,Adelaide Oval,Port Adelaide,Greater Western Sydney,7,Saturday
1131,2022,17,19:25:00,VIC,Docklands,St Kilda,Fremantle,7,Saturday
1132,2022,17,19:50:00,NSW,S.C.G.,Sydney,Western Bulldogs,7,Friday
1133,2022,17,14:40:00,WA,Perth Stadium,West Coast,Carlton,7,Sunday
1134,2022,18,13:15:00,SA,Adelaide Oval,Adelaide,Collingwood,7,Saturday
...,...,...,...,...,...,...,...,...,...
1408,2023,26,19:50:00,VIC,M.C.G.,Melbourne,Carlton,9,Friday
1409,2023,26,19:10:00,SA,Adelaide Oval,Port Adelaide,Greater Western Sydney,9,Saturday
1410,2023,27,17:15:00,QLD,Gabba,Brisbane Lions,Carlton,9,Saturday
1411,2023,27,19:50:00,VIC,M.C.G.,Collingwood,Greater Western Sydney,9,Friday


In [37]:
# Create and fit the model
model = LogisticRegression()
model.fit(X_train, y_train)

ValueError: could not convert string to float: '14:50:00'