# Libraries

In [21]:
library(fitzRoy)
library(dplyr)
library(jsonlite)
library(lubridate)
library(purrr)


Attaching package: ‘purrr’


The following object is masked from ‘package:jsonlite’:

    flatten




# Functions

In [2]:
preprocess_fixture <- function(fixture) {
    # create new column for each state. 
    fixture_clean <- fixture %>% mutate(region = case_when(
        venue %in% c('Adelaide Hills', 'Adelaide Oval', 'Norwood Oval', 'Football Park') ~ "SA",
        venue %in% c('M.C.G.', 'Docklands', 'Eureka Stadium', 'Kardinia Park', 'Marvel Stadium', 
                     'GMHBA Stadium', 'Mars Stadium') ~ "VIC",
        venue %in% c('Carrara', 'Gabba', "Cazaly's Stadium", "Riverway Stadium") ~ "QLD",
        venue %in% c('S.C.G.', 'Sydney Showground', 'Stadium Australia', 'Blacktown') ~ "NSW",
        venue %in% c('Marrara Oval', 'Traeger Park') ~ 'NT',
        venue %in% c('Bellerive Oval', 'York Park', 'University of Tasmania Stadium') ~ "TAS",
        venue %in% c('Manuka Oval', 'UNSW Canberra Oval') ~ 'ACT',
        venue %in% c('Perth Stadium', 'Optus Stadium', 'Subiaco') ~ 'WA',
        venue %in% c('Jiangwan Stadium', 'Adelaide Arena at Jiangwan Stadium') ~ 'CHN',
        TRUE ~ NA_character_  # set NA for all other observations
    ))
    
    
    fixture_clean$date <- as.Date(fixture_clean$localtime)
    fixture_clean$time <- format(ymd_hms(fixture_clean$localtime), "%H:%M:%S")
    fixture_clean$home_win <- ifelse(fixture_clean$hscore > fixture_clean$ascore, 1, 0) 
    fixture_clean$hdiff <- fixture_clean$hscore - fixture_clean$ascore
    
    # select specific rows
    fixture_clean <- select(fixture_clean, year, round, date, time, region, venue, hteam, ateam, hscore, ascore, 
                            is_grand_final, is_final, home_win, hdiff)
    
    return(fixture_clean)
}


# function to check if a column contains NA values.
check_na_column <- function(df, col_name) {
    return(sum(is.na(df[[col_name]])))
}

# Data Cleaning

## Fixture Data

Start with the cruncher data as it provides a good baseline to work with. 

In [4]:
# cruncher gives some good clean baseline data to work with  
cruncher <- fromJSON("https://thecruncherau.vercel.app/afl/data.json")

ERROR: Error in fromJSON("https://thecruncherau.vercel.app/afl/data.json"): could not find function "fromJSON"


Create a new column to represent a home win.

In [40]:
fixture_12 <- fetch_fixture_squiggle(2012)
fixture_13 <- fetch_fixture_squiggle(2013)
fixture_14 <- fetch_fixture_squiggle(2014)
fixture_15 <- fetch_fixture_squiggle(2015)
fixture_16 <- fetch_fixture_squiggle(2016)
fixture_17 <- fetch_fixture_squiggle(2017)
fixture_18 <- fetch_fixture_squiggle(2018)
fixture_19 <- fetch_fixture_squiggle(2019)
fixture_20 <- fetch_fixture_squiggle(2020)
fixture_21 <- fetch_fixture_squiggle(2021)
fixture_22 <- fetch_fixture_squiggle(2022)
fixture_23 <- fetch_fixture_squiggle(2023)

fixture = rbind(fixture_12, fixture_13, fixture_14, fixture_15, fixture_16, fixture_17, 
                fixture_18, fixture_19, fixture_20, fixture_21, fixture_22, fixture_23)

[36mℹ[39m No round specified - returning results for all rounds in [34m[34m2012[34m[39m

[36mℹ[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2012[39m

[32m✔[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2012[39m ... done



[36mℹ[39m No round specified - returning results for all rounds in [34m[34m2013[34m[39m

[36mℹ[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2013[39m

[32m✔[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2013[39m ... done



[36mℹ[39m No round specified - returning results for all rounds in [34m[34m2014[34m[39m

[36mℹ[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2014[39m

[32m✔[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2014[39m ... done



[36mℹ[39m No round specified - returning results for all rounds in [34m[34m2015[34m[39m

[36mℹ[39m Getting data from [32mhttps://api.squiggle.com

Combine all the rows of each of the dataframes to get data for years 2017 to 2023.

Local time used instead of date as it has the time based on the location of the game. Will split local time into two variables, date and time. Will using venue will create a column for State/region. 

In [49]:
colnames(fixture)

In [50]:
head(fixture)

round,venue,hscore,complete,is_final,winner,roundname,ascore,unixtime,hteam,⋯,ateamid,hbehinds,hgoals,is_grand_final,updated,year,agoals,abehinds,winnerteamid,date
<int>,<fct>,<int>,<int>,<int>,<chr>,<chr>,<int>,<int>,<chr>,⋯,<int>,<int>,<int>,<int>,<chr>,<int>,<int>,<int>,<int>,<chr>
1,Subiaco,105,100,0,Fremantle,Round 1,101,1333183500,Fremantle,⋯,7,9,16,0,2018-06-26 12:17:05,2012,15,11,6,2012-03-31 19:45:00
1,Carrara,68,100,0,Adelaide,Round 1,137,1333172700,Gold Coast,⋯,1,8,10,0,2018-06-26 12:17:05,2012,19,23,1,2012-03-31 16:45:00
1,Stadium Australia,37,100,0,Sydney,Round 1,100,1332577200,Greater Western Sydney,⋯,16,7,5,0,2018-06-26 12:17:05,2012,14,16,16,2012-03-24 19:20:00
1,M.C.G.,137,100,0,Hawthorn,Round 1,115,1333097400,Hawthorn,⋯,4,17,20,0,2018-06-26 12:17:05,2012,16,19,10,2012-03-30 19:50:00
1,M.C.G.,78,100,0,Brisbane Lions,Round 1,119,1333161900,Melbourne,⋯,2,12,11,0,2018-06-26 12:17:05,2012,17,17,2,2012-03-31 13:45:00
1,Docklands,102,100,0,Essendon,Round 1,104,1333183500,North Melbourne,⋯,5,12,15,0,2018-06-26 12:17:05,2012,14,20,5,2012-03-31 19:45:00


In [51]:
fixture$venue <- as.factor(fixture$venue)  # convert to factor datatype
summary(fixture$venue)

# check if region has NA values, to see if new/old stadiums in use
check_na_column(fixture, 'region')

In [41]:
player_stats_23 <- fetch_player_stats(season=2023, round=1)

[36mℹ[39m Fetching match ids

[32m✔[39m Fetching match ids ... done



[36mℹ[39m Finding player stats for [34m9[39m matches.

[32m✔[39m Finding player stats for [34m9[39m matches. ... done





In [42]:
player_stats_23

providerId,utcStartTime,status,compSeason.shortName,round.name,round.roundNumber,venue.name,home.team.name,home.team.club.name,away.team.name,⋯,extendedStats.centreBounceAttendances,extendedStats.kickins,extendedStats.kickinsPlayon,player.playerId,player.captain,player.playerJumperNumber,player.givenName,player.surname,teamStatus,team.name
<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<chr>,<lgl>,<int>,<chr>,<chr>,<chr>,<chr>
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,1,1,CD_I1000223,FALSE,7,Liam,Baker,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,0,0,CD_I1002245,FALSE,21,Noah,Balta,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,11,0,0,CD_I993993,FALSE,29,Shai,Bolton,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,0,0,CD_I295203,FALSE,35,Nathan,Broad,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,6,0,0,CD_I270896,FALSE,9,Trent,Cotchin,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,2,2,CD_I280819,FALSE,2,Dylan,Grimes,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,0,0,CD_I293813,FALSE,19,Tom,Lynch,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,0,0,CD_I1008478,FALSE,31,Rhyan,Mansell,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,0,0,CD_I993771,FALSE,50,Marlion,Pickett,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,16,0,0,CD_I998172,FALSE,14,Tim,Taranto,home,Richmond


In [40]:
player_details <- fetch_player_details(season = 2022, source = "afltables")

→ For the afltables source, details are returned for all seasons. Ignoring `current` argument

[36mℹ[39m Fetching player details for all teams

[36mℹ[39m Fetching player details for Adelaide

[32m✔[39m Fetching player details for Adelaide ... done



[36mℹ[39m Fetching player details for all teams
[36mℹ[39m Fetching player details for Brisbane Lions

[32m✔[39m Fetching player details for Brisbane Lions ... done



[36mℹ[39m Fetching player details for all teams
[36mℹ[39m Fetching player details for Brisbane Bears

[32m✔[39m Fetching player details for Brisbane Bears ... done



[36mℹ[39m Fetching player details for all teams
[36mℹ[39m Fetching player details for Carlton

[32m✔[39m Fetching player details for Carlton ... done



[36mℹ[39m Fetching player details for all teams
[36mℹ[39m Fetching player details for Collingwood

[32m✔[39m Fetching player details for Collingwood ... done



[36mℹ[39m Fetching player details for all teams
[36mℹ[39m Fetching 

Player,Team,Cap,#,HT,WT,Games,Wins,Draws,Losses,Goals,Seasons,Debut,Last,date_accessed
<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<date>
Grantley Fielke,Adelaide,1,30,178cm,78kg,24,11,0,13,6,1991-1992,29y 4d,30y 45d,2023-12-21
Darel Hart,Adelaide,2,3,178cm,80kg,39,20,0,19,38,1991-1992,27y 78d,28y 239d,2023-12-21
Eddie Hocking,Adelaide,3,8,168cm,68kg,11,5,0,6,4,1991,21y 75d,21y 222d,2023-12-21
Rod Jameson,Adelaide,4,35,185cm,87kg,153,78,1,74,113,1991-1999,20y 265d,29y 60d,2023-12-21
Andrew Jarman,Adelaide,5,2,177cm,91kg,110,51,1,58,92,1991-1996,25y 67d,30y 230d,2023-12-21
John Klug,Adelaide,6,20,188cm,98kg,26,11,0,15,34,1991-1992,25y 175d,26y 259d,2023-12-21
Scott Lee,Adelaide,7,13,178cm,86kg,86,43,1,42,18,1991-1995,27y 290d,32y 55d,2023-12-21
Bruce Lindner,Adelaide,8,9,185cm,87kg,19,15,0,4,27,1991-1992,29y 275d,31y 70d,2023-12-21
Bruce Lindsay,Adelaide,9,32,178cm,77kg,6,2,0,4,,1991,29y 243d,29y 286d,2023-12-21
David Marshall,Adelaide,10,16,177cm,77kg,26,11,0,15,14,1991-1992,30y 265d,32y 19d,2023-12-21


In [31]:
player_details_22

Player,Team,Cap,#,HT,WT,Games,Wins,Draws,Losses,Goals,Seasons,Debut,Last,date_accessed
<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<date>
Grantley Fielke,Adelaide,1,30,178cm,78kg,24,11,0,13,6,1991-1992,29y 4d,30y 45d,2023-12-21
Darel Hart,Adelaide,2,3,178cm,80kg,39,20,0,19,38,1991-1992,27y 78d,28y 239d,2023-12-21
Eddie Hocking,Adelaide,3,8,168cm,68kg,11,5,0,6,4,1991,21y 75d,21y 222d,2023-12-21
Rod Jameson,Adelaide,4,35,185cm,87kg,153,78,1,74,113,1991-1999,20y 265d,29y 60d,2023-12-21
Andrew Jarman,Adelaide,5,2,177cm,91kg,110,51,1,58,92,1991-1996,25y 67d,30y 230d,2023-12-21
John Klug,Adelaide,6,20,188cm,98kg,26,11,0,15,34,1991-1992,25y 175d,26y 259d,2023-12-21
Scott Lee,Adelaide,7,13,178cm,86kg,86,43,1,42,18,1991-1995,27y 290d,32y 55d,2023-12-21
Bruce Lindner,Adelaide,8,9,185cm,87kg,19,15,0,4,27,1991-1992,29y 275d,31y 70d,2023-12-21
Bruce Lindsay,Adelaide,9,32,178cm,77kg,6,2,0,4,,1991,29y 243d,29y 286d,2023-12-21
David Marshall,Adelaide,10,16,177cm,77kg,26,11,0,15,14,1991-1992,30y 265d,32y 19d,2023-12-21


In [27]:
player_details_22

data_accessed
<date>


In [91]:
freo_details_23

No,surname,first_name,Games,Age,Date of Birth,Height,Weight,Origin,Position_1,Position_2
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
11,Aish,James,163,28yr 2mth,8 Oct 1995,183cm,83kg,Norwood,Midfield,
24,Amiss,Jye,25,20yr 4mth,31 Jul 2003,196cm,86kg,East Perth,Forward,
41,Banfield,Bailey,75,25yr 9mth,26 Feb 1998,190cm,88kg,Claremont,Forward,
8,Brayshaw,Andrew,123,24yr 1mth,8 Nov 1999,185cm,88kg,Sandringham Dragons,Midfield,
17,Brodie,Will,54,25yr 3mth,23 Aug 1998,190cm,89kg,Murray Bushrangers,Midfield,
5,Chapman,Heath,26,21yr 10mth,31 Jan 2002,193cm,87kg,West Perth,Defender,
6,Clark,Jordan,79,23yr 2mth,16 Oct 2000,185cm,83kg,Claremont,Defender,
19,Corbett,Josh,41,27yr 7mth,23 Apr 1996,190cm,88kg,Werribee,Forward,
36,Cox,Brennan,102,25yr 4mth,13 Aug 1998,195cm,97kg,Woodville-West Torrens,Defender,
4,Darcy,Sean,98,25yr 6mth,12 Jun 1998,203cm,111kg,Geelong Falcons,Ruck,


In [94]:
colnames(player_stats_23)

In [None]:
player_stats_22

# CSV Output
Write to a csv for further python analysis

In [39]:
write.csv(fixture, file='fixture.csv', row.names=FALSE)