##### Student Name: Dara Leonard
##### Student ID: 19202478

#### Task 1: Identify one or more suitable web APIs

Two web APIs have been identified to investigate if the weather in a certain location has an impact on the top time in a segment on Strava.

The Strava API is openly available and allows for requests to be made for public data and data specific to an individual athlete. The access token is refreshed daily.

The World Weather Online offers an API for historic weather that can be queried with latitude and longitude. This API is no longer freely available but there is a free 60 day trial upon signing up.

In [100]:
import pandas as pd
import requests
import json
from stravalib import Client

strava_client = Client(access_token="804334a3f703b8e6a78321e68ba45bb0ccd9d0c7")
weather_api_key = "4a88876129264886896114557202902"

def build_past_weather_request(location, year, month, day):
    return "http://api.worldweatheronline.com/premium/v1/past-weather.ashx?key=" + weather_api_key + "&q="+ location +"&format=json&date="+ str(year) + "-" + str(month) + "-" + str(day)

def build_average_weather_request(location):
    return "http://api.worldweatheronline.com/premium/v1/weather.ashx?key=" + weather_api_key + "&q="+ location + "&format=json&mca=yes"

#### Task 2: Collect data your chosen API(s)

In [101]:
json_weather_data = []
historical_average_weather_data = []
sw_lat = 53.263329
sw_lng = -9.070642
ne_lat = 53.283310
ne_lng = -9.044810

def execute_strava_requests():
    segments_in_an_area = strava_client.explore_segments(bounds=[sw_lat, sw_lng, ne_lat, ne_lng], activity_type='running') # Get current athlete details
    return [strava_client.get_segment(segment.id) for segment in segments_in_an_area]
 
def execute_weather_request(link):
    response = requests.get(link)
    json_res = response.json()['data']['weather']
    json_weather_data.append(json_res)

def execute_historical_average_climate_request(link):
    response = requests.get(link)
    json_res = response.json()['data']["ClimateAverages"]
    historical_average_weather_data.append(json_res)

def execute_api_requests(segment_info):
    segment_info_columns = ["id", "name", "activity_type", "distance", "start_latitude", "start_longitude", "end_latitude",
           "end_longitude", "city", "state", "country", "hazardous", "map_id", "map_polyline", "effort_count",
           "athlete_count", "star_count", "best_time", "best_time_date"]
    segment_info_df = pd.DataFrame(columns=segment_info_columns)
    for i, segment in enumerate(segment_info):
        segment_leader_board = strava_client.get_segment_leaderboard(segment.id)
        best_time = segment_leader_board.entries[0].elapsed_time
        best_time_date = segment_leader_board.entries[0].start_date
        tmp = dict({"id": segment.id,
             "name": segment.name,
             "activity_type": segment.activity_type,
             "distance": segment.distance,
             "start_latitude": segment.start_latitude,
             "start_longitude": segment.start_longitude,
             "end_latitude": segment.end_latitude,
             "end_longitude": segment.end_longitude,
             "city": segment.city,
             "state": segment.state,
             "country": segment.country,
             "hazardous": segment.hazardous,
             "map_id": segment.map.id,
             "map_polyline": segment.map.polyline,
             "effort_count": segment.effort_count,
             "athlete_count": segment.athlete_count,
             "star_count": segment.star_count,
             "best_time" : segment_leader_board.entries[0].elapsed_time, 
             "best_time_date" : segment_leader_board.entries[0].start_date})
        segment_info_df.loc[i] = tmp
        
        try:
            execute_weather_request(build_past_weather_request(str(segment.start_latitude + segment.start_longitude), best_time_date.year, best_time_date.month, best_time_date.day))
        except KeyError:
            execute_weather_request(build_past_weather_request(str(sw_lat + sw_lng), best_time_date.year, best_time_date.month, best_time_date.day))
        
        try:
            execute_historical_average_climate_request(build_average_weather_request(str(segment.start_latitude + segment.start_longitude)))
        except KeyError:
            execute_historical_average_climate_request(build_average_weather_request(str(sw_lat + sw_lng)))
    return segment_info_df

segment_info_final = execute_strava_requests()
segment_info_df_final = execute_api_requests(segment_info_final)

#### Task 3: Parse the collected data, and store it in an appropriate file format

In [102]:
def write_data_to_file(filename, data_to_write):
    with open(filename, 'w', encoding='utf-8') as file:
        json.dump(data_to_write, file, ensure_ascii=False, indent=4)

write_data_to_file('strava-data.json', segment_info_df_final.to_json())
write_data_to_file('weather-data.json', json_weather_data)
write_data_to_file('average-weather-data.json', historical_average_weather_data)

#### Task 4: Load and represent the data using an appropriate data structure. Apply any pre-processing steps to clean/filter/combine the data

In [103]:
print("Done")


Done
