## Data cleaning (2009) notebook

In [26]:
options(tidyverse.quiet = TRUE)
library(tidyverse)
library(janitor)
library(lubridate)
source("src/utils.R")

In [27]:
# read in downloaded data from triathlon.org

In [28]:
# Tongyeong
ty_df = read_csv("data/original_data/2009_tongyeong.csv", col_types = cols())
ty_df <- clean_df(ty_df)
mens_ty <- ty_df %>%
            filter(prog_id != 4520)
womens_ty <- ty_df %>%
            filter(prog_id == 4520)

# Madrid
md_df <- read_csv("data/original_data/2009_madrid.csv", col_types = cols())
md_df <- clean_df(md_df)
mens_md <- md_df %>%
             filter(prog_id == 4538)
womens_md <- md_df %>%
             filter(prog_id != 4538)

# Washington
ws_df <- read_csv("data/original_data/2009_washington.csv", col_types = cols())
ws_df <- clean_df(ws_df)
mens_ws <- ws_df %>%
             filter(prog_id == 4535)
womens_ws <- ws_df %>%
             filter(prog_id != 4535)

# Kitzbuehel
kz_df <- read_csv("data/original_data/2009_kitz.csv", col_types = cols())
kz_df <- clean_df(kz_df)
mens_kz<- kz_df %>%
             filter(prog_id == 4544)
womens_kz <- kz_df %>%
             filter(prog_id != 4544)

# Hamburg
hb_df <- read_csv("data/original_data/2009_hamburg.csv", col_types = cols())
hb_df <- clean_df(hb_df)
mens_hb<- hb_df %>%
             filter(prog_id == 4547)
womens_hb <- hb_df %>%
             filter(prog_id != 4547)

# London
ld_df <- read_csv("data/original_data/2009_london.csv", col_types = cols())
ld_df <- clean_df(ld_df)
mens_ld<- ld_df %>%
             filter(prog_id == 4541)
womens_ld <- ld_df %>%
             filter(prog_id != 4541)

# Yokohama
yk_df <- read_csv("data/original_data/2009_yokohama.csv", col_types = cols())
yk_df <- clean_df(yk_df)
mens_yk<- yk_df %>%
             filter(prog_id == 4532)
womens_yk <- yk_df %>%
             filter(prog_id != 4532)


# Gold Coast
gc_df <- read_csv("data/original_data/2009_gold-coast.csv", col_types = cols())
gc_df <- clean_df(gc_df)
mens_gc <- gc_df %>%
                filter(prog_id == 4819)
womens_gc <- gc_df %>%
                filter(prog_id == 4818)


## write clean 2009 race files to csv 

__Men's races__

In [20]:
write_csv(mens_ty, "data/2009_races/races/Tongyeong_men.csv")
write_csv(mens_md, "data/2009_races/races/Madrid_men.csv")
write_csv(mens_ws, "data/2009_races/races/Washington_men.csv")
write_csv(mens_kz, "data/2009_races/races/Kitz_men.csv")
write_csv(mens_hb, "data/2009_races/races/Hamburg_men.csv")
write_csv(mens_ld, "data/2009_races/races/London_men.csv")
write_csv(mens_yk, "data/2009_races/races/Yokohama_men.csv")
write_csv(mens_gc, "data/2009_races/races/Gold-coast_men.csv")

__Women's races__

In [18]:
write_csv(womens_ty, "data/2009_races/races/Tongyeong_women.csv")
write_csv(womens_md, "data/2009_races/races/Madrid_women.csv")
write_csv(womens_ws, "data/2009_races/races/Washington_women.csv")
write_csv(womens_kz, "data/2009_races/races/Kitz_women.csv")
write_csv(womens_hb, "data/2009_races/races/Hamburg_women.csv")
write_csv(womens_ld, "data/2009_races/races/London_women.csv")
write_csv(womens_yk, "data/2009_races/races/Yokohama_women.csv")
write_csv(womens_gc, "data/2009_races/races/Gold-coast_women.csv")

### Create one vs one race results
In order to use the `glicko` rankings system, each $n$-person race needs to be transformed into a dataframe where the outcomes of individual one on one races between each individual $i$ and the other $n-1$ competitors. <br>
The `period` of each race also needs to be included in this one vs one dataframe, as competitors who do not race for longer periods of time will have greater uncertainty around their rankings.

#### One vs one race results for mens 2009 races
Using function in `utils.R`

In [21]:
ty_mens_ovo <- results_df_wrapper(mens_ty, 1)
md_mens_ovo <- results_df_wrapper(mens_md, 2)
ws_mens_ovo <- results_df_wrapper(mens_ws, 3)
kz_mens_ovo <- results_df_wrapper(mens_kz, 4)
hb_mens_ovo <- results_df_wrapper(mens_hb, 5)
ld_mens_ovo <- results_df_wrapper(mens_ld, 6)
yk_mens_ovo <- results_df_wrapper(mens_yk, 7)
gc_mens_ovo <- results_df_wrapper(mens_gc, 8)

In [25]:
## write to file
write_csv(ty_mens_ovo, "data/2009_races/ovo_races/Tongyeong_men.csv")
write_csv(md_mens_ovo, "data/2009_races/ovo_races/Madrid_men.csv")
write_csv(ws_mens_ovo, "data/2009_races/ovo_races/Washington_men.csv")
write_csv(kz_mens_ovo, "data/2009_races/ovo_races/Kitz_men.csv")
write_csv(hb_mens_ovo, "data/2009_races/ovo_races/Hamburg_men.csv")
write_csv(ld_mens_ovo, "data/2009_races/ovo_races/London_men.csv")
write_csv(yk_mens_ovo, "data/2009_races/ovo_races/Yokohama_men.csv")
write_csv(gc_mens_ovo, "data/2009_races/ovo_races/Gold-coast_men.csv")