In [1]:
import os
import json
import requests
from dotenv import load_dotenv
from tqdm import tqdm
from geopy.geocoders import ArcGIS
from datetime import datetime, timedelta
import pandas as pd

In [2]:
load_dotenv('.env')

weatherbit_key = os.getenv('WEATHERBIT_KEY')

In [3]:
folder_path = 't20s_male_json'
json_files = os.listdir(folder_path)

countries = ['England', 'Australia', 'Afghanistan', 'India', 'New Zealand', 'South Africa', 'West Indies', 'Bangladesh']
top_matches = []
weather_list = []

In [4]:
for match_file in tqdm(json_files):
    if '.json' in match_file:
        with open(os.path.join(folder_path, match_file), 'r') as f:
            content = f.read()
            data = json.loads(content)
            if set(data['info']['teams']).issubset(countries):
                top_matches.append(match_file) 
                weather = {}
                if 'city' in data['info'].keys():
                    weather['city'] = data['info']['city']
                if 'venue' in data['info'].keys():
                    weather['stadium'] = data['info']['venue']
                else:
                    print("Venue not found")
                if 'dates' in data['info'].keys():
                    weather['date'] = data['info']['dates'][0]
                weather_list.append(weather)


100%|██████████| 2604/2604 [00:01<00:00, 1531.74it/s]


In [5]:
def get_next_day(date_str):
    date_obj = datetime.strptime(date_str, '%Y-%m-%d')
    next_day = date_obj + timedelta(days=1)
    return next_day.strftime('%Y-%m-%d')

In [8]:
geolocator = ArcGIS(timeout=5)

weather_reports = []

url = f'https://api.weatherbit.io/v2.0/history/daily?key={weatherbit_key}'

for match_info in tqdm(weather_list):
    stadium = match_info['stadium']
    loc = geolocator.geocode(stadium)
    date = match_info['date']
    next_date = get_next_day(date)
    match = f'&lat={loc.latitude}&lon={loc.longitude}&start_date={date}&end_date={next_date}'
    weather_info = requests.get(url+match)
    weather_info = weather_info.json()
    weather_info['venue'] = stadium
    weather_info['date'] = date
    weather_reports.append(weather_info)

  0%|          | 0/468 [00:00<?, ?it/s]

100%|██████████| 468/468 [08:57<00:00,  1.15s/it]


In [9]:
flattened_data = []
for report in weather_reports:
    for data_entry in report['data']:
        combined_entry = {**report, **data_entry}
        combined_entry.pop('data', None)
        flattened_data.append(combined_entry)

df = pd.DataFrame(flattened_data)

In [10]:
df.to_csv('weather_reports.csv')

In [11]:
df.head()

Unnamed: 0,city_id,city_name,country_code,lat,lon,sources,state_code,station_id,timezone,venue,...,solar_rad,t_dhi,t_dni,t_ghi,t_solar_rad,temp,ts,wind_dir,wind_gust_spd,wind_spd
0,2186313,Napier,NZ,-39.502139,176.911751,"[933730-99999, imerg, era5, sat, radar, snodas]",F2,933730-99999,Pacific/Auckland,McLean Park,...,94.0,1463,11564,9194,2267.0,18.6,1483354800,192.0,13.2,2.3
1,6113406,Prince Rupert,CA,53.97517,-130.69113,"[imerg, era5, sat, radar, snodas]",BC,712200-99999,America/Vancouver,Bay Oval,...,10.0,351,3087,831,250.0,1.7,1483689600,125.0,14.4,6.5
2,6113406,Prince Rupert,CA,53.97517,-130.69113,"[imerg, era5, sat, radar, snodas]",BC,712200-99999,America/Vancouver,Bay Oval,...,18.0,357,3148,859,422.0,-1.1,1483862400,76.0,11.6,6.8
3,2143747,Whittlesea,AU,-37.498999,145.069916,"[948600-99999, ASN00086262, imerg, era5, sat, ...",VIC,948600-99999,Australia/Melbourne,Eden Park,...,305.0,1275,10230,7814,6836.6,15.9,1487250000,190.0,10.4,3.7
4,4722668,Richland Hills,US,32.812897,-97.204765,"[747390-13961, US1TXTN0070, US1TXTN0108, US1TX...",TX,747390-13961,America/Chicago,The Rose Bowl,...,307.0,1338,10646,8639,7372.0,29.2,1498021200,71.0,10.8,3.5


In [None]:
df = pd.read_csv('weather_reports.csv')
print(df.head(1))

   Unnamed: 0  lat   lon            valid_date  max_temp  min_temp  app_max_temp  app_min_temp  max_wind_spd  rh  clouds  weather_reports  
0           0  37.78   -122.4  2023-03-26T00:00:00    15.4     10.4        15.4          10.4           7.7  78      75   {'data': [...}  

