In [2]:
import pandas as pd
import numpy as np

In [3]:
import requests

def get_weather(latitude, longitude, date, time, variables):
	url = f"https://archive-api.open-meteo.com/v1/archive?latitude={latitude}&longitude={longitude}&hourly={','.join(variables)}&start_date={date}&end_date={date}&timezone=UTC"

	# Make the API request
	response = requests.get(url)
	data = response.json()
	datetime_str = f"{date}T{time}"
	for i, hourly_time in enumerate(data["hourly"]["time"]):
		if hourly_time == datetime_str:
			# Extract requested variables for the specific time
			return {var: data["hourly"][var][i] for var in variables}
	
	return None  

# Example usage
latitude = -34.92
longitude = 138.6
date = "2017-01-01"
time = "06:00"  
variables = ["temperature_2m", "relative_humidity_2m", "windspeed_10m", "apparent_temperature"]

weather_data = get_weather(latitude, longitude, date, time, variables)
print(weather_data)

{'temperature_2m': 20.1, 'relative_humidity_2m': 62, 'windspeed_10m': 28.7, 'apparent_temperature': 16.8}


In [4]:
matches = pd.read_csv('../data/all_years_nc_tc_elo_aggr_stats/matches.csv')

In [5]:
matches.head()[['Date', 'time', 'latitude', 'longitude']]

Unnamed: 0,Date,time,latitude,longitude
0,2017-12-31,07:25,-27.468968,153.023499
1,2017-12-31,09:20,-27.468968,153.023499
2,2018-01-01,08:35,18.521374,73.854507
3,2018-01-01,04:00,-27.468968,153.023499
4,2018-01-01,02:10,-27.468968,153.023499


In [6]:
from datetime import datetime, timedelta

def convert_utc_plus1_to_utc(date, time):
    dt = datetime.strptime(f"{date} {time}", "%Y-%m-%d %H:%M")
	
    # Subtract 1 hour to convert from UTC+1 to UTC
    dt_utc = dt - timedelta(hours=1)
    return dt_utc.strftime("%Y-%m-%d"), dt_utc.strftime("%H:%M")

def round_time_to_nearest_hour(time_str):
    time_obj = datetime.strptime(time_str, "%H:%M")

    # Round minutes to the nearest hour
    if time_obj.minute >= 30:  # Round up
        time_obj += timedelta(minutes=60 - time_obj.minute)
    else:  # Round down
        time_obj -= timedelta(minutes=time_obj.minute)

    return time_obj.strftime("%H:00")

In [7]:
# Apply get_weather to each row
def fetch_weather_data(row):
	date_utc, time_utc = convert_utc_plus1_to_utc(row["Date"], row["time"])
	rounded_time_utc = round_time_to_nearest_hour(time_utc)
	return get_weather(row["latitude"], row["longitude"], date_utc, rounded_time_utc, variables)

# For now just 100 rows to see if it works
#matches = matches.head(100)
# 
# weather_data = matches.apply(fetch_weather_data, axis=1)
# weather_df = pd.DataFrame(weather_data.tolist())
# 
# # Merge the weather data back into the original DataFrame
# matches = pd.concat([matches, weather_df], axis=1)
# print(matches[['Date', 'time', 'latitude', 'longitude', 'temperature_2m', "relative_humidity_2m", "windspeed_10m", "apparent_temperature"]])

In [9]:
def process_partial_chunk(df, func, start_index, chunk_size):
    end_index = min(start_index + chunk_size, len(df))  
    chunk = df.iloc[start_index:end_index]
    print(f"Processing rows {start_index} to {end_index}...")
    
    # Apply the function to the chunk
    chunk_results = chunk.apply(func, axis=1)
    chunk_results.to_csv(f"../data/weather_chunks/{start_index}_{end_index}.csv")
    result_df = pd.DataFrame(chunk_results.tolist(), index=chunk.index)
    
    return result_df

### To odpaliłem u siebie

In [None]:
weather_data_chunk_1 = process_partial_chunk(matches, fetch_weather_data, start_index=0, chunk_size=5000)

Processing rows 0 to 5000...


### To odpal u siebie

In [None]:
weather_data_chunk_2 = process_partial_chunk(matches, fetch_weather_data, start_index=5000, chunk_size=5000)