# Sampling data for testing

In [None]:
import pandas as pd
import requests
import os

In [None]:
file_csv = '~/code/harlqeuinht/which_horse/raw_data/combined_flat2_csv.csv'

In [None]:
master_df = pd.read_csv(file_csv)

In [None]:
df = master_df.sample(n=10)

In [None]:
df

In [None]:
columns_to_drop = df.filter(regex='[678]').columns
columns_to_drop = columns_to_drop.drop('bet365_odds')
df = df.drop(columns=(columns_to_drop))

Create a DataFrame which stores the meeting location name and long & lat. Note this has been done on the sample dataset, not the full dataset

# Locations API

In [None]:
def get_co_ordinates(df):
        # Create a list of all unique racecourse names
        location_names = sorted(df['meeting_name'].unique())
        # Clean racecourse names so they are reconisable by the geolocation API
        locations_df = pd.DataFrame(location_names, columns=['meeting_name'])
        locations_df['location_names_cleaned'] = locations_df['meeting_name'].replace({'BANGOR-ON-DEE':'BANGOR', 'NEWMARKET (JULY)':'NEWMARKET', ' ':'_'})
        locations_df['location_names_cleaned'] = locations_df['location_names_cleaned'].str.replace(' ', '_')

        # Iterate through the locations df, generating API endpoints for each row
        for index, location in enumerate(locations_df['location_names_cleaned']):
            base_url = 'https://maps.googleapis.com/maps/api/geocode/json?'
            api_key = os.environ['KEY']
            params = f'address={location}+racecourse&components=country:GB&key={api_key}'
            endpoint = f'{base_url}{params}'
            # Call the geolocation API, storing the results
            results = requests.get(endpoint).json()
            # Store the returned latitude and longitude data in the respective columns
            locations_df.loc[index, 'lat'] = results['results'][0]['geometry']['location']['lat']
            locations_df.loc[index, 'lng'] = results['results'][0]['geometry']['location']['lng']
        return locations_df

locations_df = get_co_ordinates(df)

In [None]:
def get_unique_races(df):
    unique_race_days_df = pd.DataFrame({'date': df['date'], 'meeting_name':df['meeting_name']}).drop_duplicates()
    return unique_race_days_df

unique_race_days_df = get_unique_races(test_df)
unique_race_days_df

In [None]:
unique_race_days_df = pd.merge(unique_race_days_df, locations_df, how='left', left_on='meeting_name', right_on='meeting_name')
unique_race_days_df

In [None]:
def generate_endpoint(row):
        base_url = 'https://archive-api.open-meteo.com/v1/archive?'
        latitude = row['lat']
        longitude = row['lng']
        date = row['date']
        params = '&daily=temperature_2m_mean,precipitation_sum,wind_speed_10m_max&wind_speed_unit=mph'
        return f'{base_url}&latitude={latitude}&longitude={longitude}&start_date={date}&end_date={date}&{params}'

unique_race_days_df['endpoint'] = unique_race_days_df.apply(generate_endpoint, axis=1)

In [None]:
unique_race_days_df.head()

In [None]:
def call_weather_api(row):
        response = requests.get(row['endpoint'])
        data = response.json()

        temp = data['daily']['temperature_2m_mean'][0]
        precipitation = data['daily']['precipitation_sum'][0]
        wind = data['daily']['wind_speed_10m_max'][0]
        return temp, precipitation, wind

unique_race_days_df[['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max']] = unique_race_days_df.apply(call_weather_api, axis=1, result_type='expand')

In [None]:
unique_race_days_df.head()

# ALTOGETHER NOW

In [None]:
def get_weather_final(df):
    def get_co_ordinates(df):
            location_names = sorted(df['meeting_name'].unique())
            locations_df = pd.DataFrame(location_names, columns=['meeting_name'])
            locations_df['location_names_cleaned'] = locations_df['meeting_name'].replace({'BANGOR-ON-DEE':'BANGOR', 'NEWMARKET (JULY)':'NEWMARKET', ' ':'_'})
            locations_df['location_names_cleaned'] = locations_df['location_names_cleaned'].str.replace(' ', '_')

            for index, location in enumerate(locations_df['location_names_cleaned']):
                base_url = 'https://maps.googleapis.com/maps/api/geocode/json?'
                api_key = os.environ['KEY']
                params = f'address={location}+racecourse&components=country:GB&key={api_key}'
                endpoint = f'{base_url}{params}'
                # Call the geolocation API, storing the results
                results = requests.get(endpoint).json()
                # Store the returned latitude and longitude data in the respective columns
                locations_df.loc[index, 'lat'] = results['results'][0]['geometry']['location']['lat']
                locations_df.loc[index, 'lng'] = results['results'][0]['geometry']['location']['lng']
            return locations_df

    locations_df = get_co_ordinates(df)


    def get_unique_races(df):
        unique_race_days_df = pd.DataFrame({'date': df['date'], 'meeting_name':df['meeting_name']}).drop_duplicates()
        return unique_race_days_df

    unique_race_days_df = get_unique_races(df)
    unique_race_days_df = pd.merge(unique_race_days_df, locations_df, how='left', left_on='meeting_name', right_on='meeting_name')

    def generate_endpoint(row):
            base_url = 'https://archive-api.open-meteo.com/v1/archive?'
            latitude = row['lat']
            longitude = row['lng']
            date = row['date']
            params = '&daily=temperature_2m_mean,precipitation_sum,wind_speed_10m_max&wind_speed_unit=mph'
            return f'{base_url}&latitude={latitude}&longitude={longitude}&start_date={date}&end_date={date}&{params}'

    unique_race_days_df['endpoint'] = unique_race_days_df.apply(generate_endpoint, axis=1)

    def call_weather_api(row):
            response = requests.get(row['endpoint'])
            data = response.json()

            temp = data['daily']['temperature_2m_mean'][0]
            precipitation = data['daily']['precipitation_sum'][0]
            wind = data['daily']['wind_speed_10m_max'][0]
            return temp, precipitation, wind

    unique_race_days_df[['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max']] = unique_race_days_df.apply(call_weather_api, axis=1, result_type='expand')

    updated_df = pd.merge(df, unique_race_days_df, on=['date', 'meeting_name'], how='left')
    updated_df = updated_df.drop(columns=['endpoint', 'lat', 'lng', 'location_names_cleaned'])
    return updated_df

In [None]:
get_weather_final(df)

In [None]:
base_url = 'https://archive-api.open-meteo.com/v1/archive?'
longitude = '-1.597'
latitude = '52.279'
start_date = '2020-01-01'
end_date = '2020-01-07'
params = 'daily=temperature_2m_mean,precipitation_sum,rain_sum,wind_speed_10m_max,wind_direction_10m_dominant'
endpoint = f'{base_url}&latitude={latitude}&longitude={longitude}&start_date={start_date}&end_date={end_date}&{params}'
endpoint

print(requests.get(endpoint).json())


In [None]:
endpoint

In [None]:
latitude = locations_df['latitude']
longitude = locations_df['longitude']

In [None]:
# This code works and returns a 200 code for current weather data
response = requests.get('http://api.weatherapi.com/v1/current.json?key=581fec608fba4a5699790722240703&dt=2020-01-01&q=London')
response

In [None]:
print(response.json())

In [None]:
os.environ['KEY']

# Getting the weather data

In [2]:
import pandas as pd
import requests
import os
from get_weather_function import get_weather_data
from pipeline_cleaning import clean_data

file_csv = '../raw_data/merge_dfs.csv'
df = pd.read_csv(file_csv)
clean_df = clean_data(df.copy())

  df = pd.read_csv(file_csv)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['finish_position'].fillna(df['Place'], inplace=True)
 'F' 'PU' 'PU' 'PU' 'PU' 'PU' 'PU' 'F' 'PU' 'PU' 'BD' 'F' 'RR' 'PU' 'PU'
 'PU' 'PU' 'PU' 'PU' 'RO' 'PU' 'PU' 'UR' 'UR' 'PU' 'PU' 'BD' 'UR' 'PU'
 'PU' 'PU' 'PU' 'UR' 'PU' 'PU' 'UR' 'UR' 'UR' 'PU' 'PU' 'PU' 'BD' 'F' 'PU'
 'PU' 'UR' 'UR' 'PU' 'UR' 'PU' 'PU' 'PU' 'BD' 'BD' 'F' 'PU' 'PU' '4' 'UR'
 'RO' 'PU' 'PU' 'DSQ' 'PU' 'UR' 'PU' 'UR' 'PU' 'F' 'UR' 'DSQ' 'PU' 'PU'
 'RO' 'PU' 'PU' 'F' 'PU' 'PU' 'PU' 'RO' 'F' 'PU' 'RR' 'PU' 'PU' 'PU' 'PU'
 'PU' 'UR' 'PU' 'F' 'PU' 'PU' 'UR' 'F' 'PU' 'PU' 'PU' 'UR' 'UR' 'UR' 'PU'
 'UR' 'PU' 'PU' 'P

In [4]:
clean_df[['meeting_name', 'date']].drop_duplicates()

Unnamed: 0,meeting_name,date
54106,NEWCASTLE,2021-08-20
59709,BEVERLEY,2021-09-15
63559,REDCAR,2021-10-02
65634,NOTTINGHAM,2021-10-13
24837,REDCAR,2021-04-12
...,...,...
57647,STRATFORD,2021-09-04
2593,CARLISLE,2020-10-15
22310,HAYDOCK,2021-03-24
162840,NEWTON ABBOT,2023-07-07


In [5]:
weather_df = get_weather_data(clean_df)
weather_df

KeyError: 'daily'