# Libraries

In [1]:
library(fitzRoy)
library(dplyr)
library(jsonlite)
library(lubridate)
library(purrr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


“running command 'timedatectl' had status 1”

Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union



Attaching package: ‘purrr’


The following object is masked from ‘package:jsonlite’:

    flatten




# Functions

In [2]:
preprocess_fixture <- function(fixture) {
    # create new column for each state. 
    fixture_clean <- fixture %>% mutate(region = case_when(
        venue %in% c('Adelaide Hills', 'Adelaide Oval', 'Norwood Oval', 'Football Park') ~ "SA",
        venue %in% c('M.C.G.', 'Docklands', 'Eureka Stadium', 'Kardinia Park', 'Marvel Stadium', 
                     'GMHBA Stadium', 'Mars Stadium') ~ "VIC",
        venue %in% c('Carrara', 'Gabba', "Cazaly's Stadium", "Riverway Stadium") ~ "QLD",
        venue %in% c('S.C.G.', 'Sydney Showground', 'Stadium Australia', 'Blacktown') ~ "NSW",
        venue %in% c('Marrara Oval', 'Traeger Park') ~ 'NT',
        venue %in% c('Bellerive Oval', 'York Park', 'University of Tasmania Stadium') ~ "TAS",
        venue %in% c('Manuka Oval', 'UNSW Canberra Oval') ~ 'ACT',
        venue %in% c('Perth Stadium', 'Optus Stadium', 'Subiaco') ~ 'WA',
        venue %in% c('Jiangwan Stadium', 'Adelaide Arena at Jiangwan Stadium') ~ 'CHN',
        TRUE ~ NA_character_  # set NA for all other observations
    ))
    
    
    fixture_clean$date <- as.Date(fixture_clean$localtime)
    fixture_clean$time <- format(ymd_hms(fixture_clean$localtime), "%H:%M:%S")
    fixture_clean$home_win <- ifelse(fixture_clean$hscore > fixture_clean$ascore, 1, 0) 
    fixture_clean$hdiff <- fixture_clean$hscore - fixture_clean$ascore
    
    # select specific rows
    fixture_clean <- select(fixture_clean, year, round, date, time, region, venue, hteam, ateam, hscore, ascore, 
                            is_grand_final, is_final, home_win, hdiff)
    
    return(fixture_clean)
}


# function to check if a column contains NA values.
check_na_column <- function(df, col_name) {
    return(sum(is.na(df[[col_name]])))
}

# Data Cleaning

## Fixture Data

Start with the cruncher data as it provides a good baseline to work with. 

In [3]:
# cruncher gives some good clean baseline data to work with  
cruncher <- fromJSON("https://thecruncherau.vercel.app/afl/data.json")

Create a new column to represent a home win.

In [4]:
fixture_12 <- fetch_fixture_squiggle(2012)
fixture_13 <- fetch_fixture_squiggle(2013)
fixture_14 <- fetch_fixture_squiggle(2014)
fixture_15 <- fetch_fixture_squiggle(2015)
fixture_16 <- fetch_fixture_squiggle(2016)
fixture_17 <- fetch_fixture_squiggle(2017)
fixture_18 <- fetch_fixture_squiggle(2018)
fixture_19 <- fetch_fixture_squiggle(2019)
fixture_20 <- fetch_fixture_squiggle(2020)
fixture_21 <- fetch_fixture_squiggle(2021)
fixture_22 <- fetch_fixture_squiggle(2022)
fixture_23 <- fetch_fixture_squiggle(2023)

fixture = rbind(fixture_12, fixture_13, fixture_14, fixture_15, fixture_16, fixture_17, 
                fixture_18, fixture_19, fixture_20, fixture_21, fixture_22, fixture_23)

[36mℹ[39m No round specified - returning results for all rounds in [34m[34m2012[34m[39m

[36mℹ[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2012[39m

[32m✔[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2012[39m ... done



[36mℹ[39m No round specified - returning results for all rounds in [34m[34m2013[34m[39m

[36mℹ[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2013[39m

[32m✔[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2013[39m ... done



[36mℹ[39m No round specified - returning results for all rounds in [34m[34m2014[34m[39m

[36mℹ[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2014[39m

[32m✔[39m Getting data from [32mhttps://api.squiggle.com.au/?q=games;year=2014[39m ... done



[36mℹ[39m No round specified - returning results for all rounds in [34m[34m2015[34m[39m

[36mℹ[39m Getting data from [32mhttps://api.squiggle.com

Combine all the rows of each of the dataframes to get data for years 2017 to 2023.

Local time used instead of date as it has the time based on the location of the game. Will split local time into two variables, date and time. Will using venue will create a column for State/region. 

In [5]:
colnames(fixture)

In [6]:
head(fixture)

venue,hbehinds,ascore,hgoals,year,round,complete,tz,ateamid,unixtime,⋯,updated,roundname,hteamid,winner,hteam,id,timestr,abehinds,localtime,hscore
<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<chr>,<int>,<int>,⋯,<chr>,<chr>,<int>,<chr>,<chr>,<int>,<chr>,<int>,<chr>,<int>
Subiaco,9,101,16,2012,1,100,+11:00,7,1333183500,⋯,2018-06-26 12:17:05,Round 1,6,Fremantle,Fremantle,1463,Full Time,11,2012-03-31 16:45:00,105
Carrara,8,137,10,2012,1,100,+11:00,1,1333172700,⋯,2018-06-26 12:17:05,Round 1,8,Adelaide,Gold Coast,1461,Full Time,23,2012-03-31 15:45:00,68
Stadium Australia,7,100,5,2012,1,100,+11:00,16,1332577200,⋯,2018-06-26 12:17:05,Round 1,9,Sydney,Greater Western Sydney,1456,Full Time,16,2012-03-24 19:20:00,37
M.C.G.,17,115,20,2012,1,100,+11:00,4,1333097400,⋯,2018-06-26 12:17:05,Round 1,10,Hawthorn,Hawthorn,1459,Full Time,19,2012-03-30 19:50:00,137
M.C.G.,12,119,11,2012,1,100,+11:00,2,1333161900,⋯,2018-06-26 12:17:05,Round 1,11,Brisbane Lions,Melbourne,1460,Full Time,17,2012-03-31 13:45:00,78
Docklands,12,104,15,2012,1,100,+11:00,5,1333183500,⋯,2018-06-26 12:17:05,Round 1,12,Essendon,North Melbourne,1462,Full Time,20,2012-03-31 19:45:00,102


In [7]:
fixture$venue <- as.factor(fixture$venue)  # convert to factor datatype
summary(fixture$venue)

# check if region has NA values, to see if new/old stadiums in use
check_na_column(fixture, 'region')

In [8]:
player_stats_23 <- fetch_player_stats(season=2023, round=1)

[36mℹ[39m Fetching match ids

[32m✔[39m Fetching match ids ... done



[36mℹ[39m Finding player stats for [34m9[39m matches.

[32m✔[39m Finding player stats for [34m9[39m matches. ... done





In [9]:
player_stats_23

providerId,utcStartTime,status,compSeason.shortName,round.name,round.roundNumber,venue.name,home.team.name,home.team.club.name,away.team.name,⋯,extendedStats.centreBounceAttendances,extendedStats.kickins,extendedStats.kickinsPlayon,player.playerId,player.captain,player.playerJumperNumber,player.givenName,player.surname,teamStatus,team.name
<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<chr>,<lgl>,<int>,<chr>,<chr>,<chr>,<chr>
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,1,1,CD_I1000223,FALSE,7,Liam,Baker,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,0,0,CD_I1002245,FALSE,21,Noah,Balta,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,11,0,0,CD_I993993,FALSE,29,Shai,Bolton,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,0,0,CD_I295203,FALSE,35,Nathan,Broad,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,6,0,0,CD_I270896,FALSE,9,Trent,Cotchin,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,2,2,CD_I280819,FALSE,2,Dylan,Grimes,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,0,0,CD_I293813,FALSE,19,Tom,Lynch,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,0,0,CD_I1008478,FALSE,31,Rhyan,Mansell,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,0,0,0,CD_I993771,FALSE,50,Marlion,Pickett,home,Richmond
CD_M20230140101,2023-03-16T08:20:00.000+0000,CONCLUDED,Premiership,Round 1,1,MCG,Richmond,Richmond,Carlton,⋯,16,0,0,CD_I998172,FALSE,14,Tim,Taranto,home,Richmond


In [10]:
player_details <- fetch_player_details(season = 2022, source = "afltables")

→ For the afltables source, details are returned for all seasons. Ignoring `current` argument

[36mℹ[39m Fetching player details for all teams

[36mℹ[39m Fetching player details for Adelaide

[32m✔[39m Fetching player details for Adelaide ... done



[36mℹ[39m Fetching player details for all teams
[36mℹ[39m Fetching player details for Brisbane Lions

[32m✔[39m Fetching player details for Brisbane Lions ... done



[36mℹ[39m Fetching player details for all teams
[36mℹ[39m Fetching player details for Brisbane Bears

[32m✔[39m Fetching player details for Brisbane Bears ... done



[36mℹ[39m Fetching player details for all teams
[36mℹ[39m Fetching player details for Carlton

[32m✔[39m Fetching player details for Carlton ... done



[36mℹ[39m Fetching player details for all teams
[36mℹ[39m Fetching player details for Collingwood

[32m✔[39m Fetching player details for Collingwood ... done



[36mℹ[39m Fetching player details for all teams
[36mℹ[39m Fetching 

In [11]:
player_details_22

ERROR: Error in eval(expr, envir, enclos): object 'player_details_22' not found


In [None]:
player_details_22

In [None]:
freo_details_23

In [None]:
colnames(player_stats_23)

In [None]:
player_stats_22

# CSV Output
Write to a csv for further python analysis

In [None]:
write.csv(fixture, file='fixture.csv', row.names=FALSE)