### 2010 Data extraction (API) notebook 

In [1]:
import requests
import pandas as pd
import os
from collections import defaultdict

In order to access the race results from the 2010 WTS season, the ITU triathlon API needed to be queried and parsed appropriately. The API has both a `results` and an `events` endpoint. 
- In order to get a results sheet from the `results` API, you need the specific `event ID` and `program ID` for each race.

- This will take multiple separate calls, first to find the event ID's, and then the program ID's, and then match them all together to query for race results

#### API call for event ID's of all 2010 WTS races

In [6]:
password = os.environ['ITU_API']

# THIS GIVES EVENT ID
url = "https://api.triathlon.org/v1/events?category_id=351&start_date=2010-01-01&end_date=2010-12-31"

headers = {'apikey': password}

response = requests.request("GET", url, headers=headers)

races_2010 = (response.json())['data']

__Each race with associated event ID__

In [7]:
season_2010 = defaultdict(list)
for race in races_2010:
    season_2010[race['event_title']].append(race['event_id'])

season_2010

defaultdict(list,
            {'2010 Dextro Energy Triathlon - ITU World Championship Series Sydney': [34488],
             '2010 Dextro Energy Triathlon - ITU World Championship Series Seoul': [34489],
             '2010 Dextro Energy Triathlon - ITU World Championship Series Madrid': [34490],
             '2010 Dextro Energy Triathlon - ITU World Championship Series Hamburg': [34491],
             '2010 Dextro Energy Triathlon - ITU World Championship Series London': [34492],
             '2010 Dextro Energy Triathlon - ITU World Championship Series Kitzbuehel': [34493],
             '2010 Dextro Energy Triathlon - ITU Triathlon World Championship Grand Final Budapest': [4990]})

__Finding the program ID's and link back to the event ID's above__

In [8]:
# THIS GIVES PROGRAM ID for ALL WTS races
url = "https://api.triathlon.org/v1/statistics/results?analysis=count_unique&target_property=event.name&group_by=event.name|program.id|program.name"
headers = {'apikey': password}
response = requests.request("GET", url, headers=headers)
all_events  = response.json()['data']['result']

__Each race with associated `event ID` AND `program ID`__

In [9]:
for event in all_events:
    if (event['event.name']) in season_2010.keys():
        season_2010[event['event.name']].append(event['program.id'])
season_2010

defaultdict(list,
            {'2010 Dextro Energy Triathlon - ITU World Championship Series Sydney': [34488,
              5542,
              5543],
             '2010 Dextro Energy Triathlon - ITU World Championship Series Seoul': [34489,
              5589,
              5590],
             '2010 Dextro Energy Triathlon - ITU World Championship Series Madrid': [34490,
              5611,
              5612],
             '2010 Dextro Energy Triathlon - ITU World Championship Series Hamburg': [34491,
              5687,
              5688],
             '2010 Dextro Energy Triathlon - ITU World Championship Series London': [34492,
              5693,
              5984],
             '2010 Dextro Energy Triathlon - ITU World Championship Series Kitzbuehel': [34493,
              5702,
              5703],
             '2010 Dextro Energy Triathlon - ITU Triathlon World Championship Grand Final Budapest': [4990,
              5859,
              5860]})

#### For each race have the program ID and event ID, use that to query and parse the results API

In [10]:
race_list = list()
# get the results each race in our season_2010
for keys, values in season_2010.items():
    string = keys
    for program_ids in values[1:]:
        url = f"https://api.triathlon.org/v1/events/{values[0]}/programs/{program_ids}/results"
        response = requests.request("GET", url, headers=headers)
        race_result = response.json()['data']['results']
        # turn the dict into a df
        race = defaultdict(list)

        for athlete in race_result:
            race['program_id'].append(program_ids)
            race['athlete_id'].append(athlete['athlete_id'])
            race['athlete_first'].append(athlete['athlete_first'])
            race['athlete_last'].append(athlete['athlete_last'])
            race['nationality'].append(athlete['athlete_noc'])
            race['start_number'].append(athlete['start_num'])
            race['swim'].append(athlete['splits'][0])
            race['t1'].append(athlete['splits'][1])
            race['bike'].append(athlete['splits'][2])
            race['t2'].append(athlete['splits'][3])
            race['run'].append(athlete['splits'][4])
            race['position'].append(athlete['position'])
            race['total_time'].append(athlete['total_time'])

            
        race_df = pd.DataFrame(race)
        race_list.append(race_df)

__Write the mens 2010 races to file__

In [15]:
race_list[1].to_csv("2010_races/races/Sydney_men.csv", index = False )
race_list[3].to_csv("2010_races/races/Seoul_men.csv", index = False )
race_list[5].to_csv("2010_races/races/Madrid_men.csv", index = False )
race_list[7].to_csv("2010_races/races/Hamburg_men.csv", index = False )
race_list[9].to_csv("2010_races/races/London_men.csv", index = False )
race_list[11].to_csv("2010_races/races/Kitz_men.csv", index = False )
race_list[13].to_csv("2010_races/races/Budapest_men.csv", index = False )