In [1]:
import pandas as pd
import numpy as np

In [2]:
import requests

def get_weather(latitude, longitude, date, time, variables):
    url = f"https://archive-api.open-meteo.com/v1/archive?latitude={latitude}&longitude={longitude}&hourly={','.join(variables)}&start_date={date}&end_date={date}&timezone=UTC"
    # Make the API request
    response = requests.get(url)
    data = response.json()
    if not data.get("hourly"):
        print(url, data)
        return None
    datetime_str = f"{date}T{time}"
    for i, hourly_time in enumerate(data["hourly"]["time"]):
        if hourly_time == datetime_str:
            # Extract requested variables for the specific time
            return {var: data["hourly"][var][i] for var in variables}
	
    return None  

# Example usage
latitude = -34.92
longitude = 138.6
date = "2017-01-01"
time = "06:00"  
variables = ["temperature_2m", "relative_humidity_2m", "windspeed_10m", "apparent_temperature"]

weather_data = get_weather(latitude, longitude, date, time, variables)
print(weather_data)

{'temperature_2m': 20.1, 'relative_humidity_2m': 62, 'windspeed_10m': 28.7, 'apparent_temperature': 16.8}


In [3]:
matches = pd.read_csv('../data/all_years_nc_tc_elo_aggr_stats/matches.csv')

In [4]:
matches.head()[['Date', 'time', 'latitude', 'longitude']]

Unnamed: 0,Date,time,latitude,longitude
0,2017-12-31,07:25,-27.468968,153.023499
1,2017-12-31,09:20,-27.468968,153.023499
2,2018-01-01,08:35,18.521374,73.854507
3,2018-01-01,04:00,-27.468968,153.023499
4,2018-01-01,02:10,-27.468968,153.023499


In [5]:
from datetime import datetime, timedelta

def convert_utc_plus1_to_utc(date, time):
    dt = datetime.strptime(f"{date} {time}", "%Y-%m-%d %H:%M")
	
    # Subtract 1 hour to convert from UTC+1 to UTC
    dt_utc = dt - timedelta(hours=1)
    return dt_utc.strftime("%Y-%m-%d"), dt_utc.strftime("%H:%M")

def round_time_to_nearest_hour(time_str):
    time_obj = datetime.strptime(time_str, "%H:%M")

    # Round minutes to the nearest hour
    if time_obj.minute >= 30:  # Round up
        time_obj += timedelta(minutes=60 - time_obj.minute)
    else:  # Round down
        time_obj -= timedelta(minutes=time_obj.minute)

    return time_obj.strftime("%H:00")

In [6]:
# Apply get_weather to each row
def fetch_weather_data(row):
	date_utc, time_utc = convert_utc_plus1_to_utc(row["Date"], row["time"])
	rounded_time_utc = round_time_to_nearest_hour(time_utc)
	return get_weather(row["latitude"], row["longitude"], date_utc, rounded_time_utc, variables)

# For now just 100 rows to see if it works
#matches = matches.head(100)
# 
# weather_data = matches.apply(fetch_weather_data, axis=1)
# weather_df = pd.DataFrame(weather_data.tolist())
# 
# # Merge the weather data back into the original DataFrame
# matches = pd.concat([matches, weather_df], axis=1)
# print(matches[['Date', 'time', 'latitude', 'longitude', 'temperature_2m', "relative_humidity_2m", "windspeed_10m", "apparent_temperature"]])

In [7]:
def process_partial_chunk(df, func, start_index, chunk_size):
    end_index = min(start_index + chunk_size, len(df))  
    chunk = df.iloc[start_index:end_index]
    print(f"Processing rows {start_index} to {end_index}...")
    
    # Apply the function to the chunk
    chunk_results = chunk.apply(func, axis=1)
    chunk_results.to_csv(f"../data/weather_chunks/{start_index}_{end_index}.csv")
    result_df = pd.DataFrame(chunk_results.tolist(), index=chunk.index)
    print(f"results saved to ../data/weather_chunks/{start_index}_{end_index}.csv")
    return result_df

### To odpaliłem u siebie

In [16]:
weather_data_chunk_1 = process_partial_chunk(matches, fetch_weather_data, start_index=0, chunk_size=500)

Processing rows 0 to 500...


In [17]:
weather_data_chunk_2 = process_partial_chunk(matches, fetch_weather_data, start_index=500, chunk_size=500)

Processing rows 500 to 1000...


In [19]:
weather_data_chunk_3 = process_partial_chunk(matches, fetch_weather_data, start_index=1000, chunk_size=500)

Processing rows 1000 to 1500...
results saved to ../data/weather_chunks/{start_index}_{end_index}.csv


In [21]:
weather_data_chunk_4 = process_partial_chunk(matches, fetch_weather_data, start_index=1500, chunk_size=500)

Processing rows 1500 to 2000...
results saved to ../data/weather_chunks/1500_2000.csv


In [22]:
weather_data_chunk_5 = process_partial_chunk(matches, fetch_weather_data, start_index=2000, chunk_size=500)

Processing rows 2000 to 2500...
results saved to ../data/weather_chunks/2000_2500.csv


In [23]:
weather_data_chunk_6 = process_partial_chunk(matches, fetch_weather_data, start_index=2500, chunk_size=500)

Processing rows 2500 to 3000...
results saved to ../data/weather_chunks/2500_3000.csv


In [24]:
weather_data_chunk_7 = process_partial_chunk(matches, fetch_weather_data, start_index=3000, chunk_size=500)

Processing rows 3000 to 3500...
results saved to ../data/weather_chunks/3000_3500.csv


In [25]:
weather_data_chunk_8 = process_partial_chunk(matches, fetch_weather_data, start_index=3500, chunk_size=500)

Processing rows 3500 to 4000...
results saved to ../data/weather_chunks/3500_4000.csv


In [43]:
weather_data_chunk_9= process_partial_chunk(matches, fetch_weather_data, start_index=4000, chunk_size=500)

Processing rows 4000 to 4500...
results saved to ../data/weather_chunks/4000_4500.csv


In [44]:
weather_data_chunk_10 = process_partial_chunk(matches, fetch_weather_data, start_index=4500, chunk_size=500)

Processing rows 4500 to 5000...
results saved to ../data/weather_chunks/4500_5000.csv


### To odpal u siebie

In [None]:
weather_data_chunk_11 = process_partial_chunk(matches, fetch_weather_data, start_index=5000, chunk_size=500)

### I tak dalej

In [9]:
weather_data_chunk_21 = process_partial_chunk(matches, fetch_weather_data, start_index=10000, chunk_size=500)

Processing rows 10000 to 10500...
results saved to ../data/weather_chunks/10000_10500.csv


In [10]:
weather_data_chunk_22 = process_partial_chunk(matches, fetch_weather_data, start_index=10500, chunk_size=500)

Processing rows 10500 to 11000...
results saved to ../data/weather_chunks/10500_11000.csv


In [11]:
weather_data_chunk_23 = process_partial_chunk(matches, fetch_weather_data, start_index=11000, chunk_size=500)

Processing rows 11000 to 11500...
results saved to ../data/weather_chunks/11000_11500.csv


In [14]:
weather_data_chunk_24 = process_partial_chunk(matches, fetch_weather_data, start_index=11500, chunk_size=500)

Processing rows 11500 to 12000...
results saved to ../data/weather_chunks/11500_12000.csv


In [13]:
weather_data_chunk_25 = process_partial_chunk(matches, fetch_weather_data, start_index=12000, chunk_size=500)

Processing rows 12000 to 12500...
results saved to ../data/weather_chunks/12000_12500.csv


In [15]:
weather_data_chunk_26 = process_partial_chunk(matches, fetch_weather_data, start_index=12500, chunk_size=500)

Processing rows 12500 to 13000...
results saved to ../data/weather_chunks/12500_13000.csv


In [16]:
weather_data_chunk_27 = process_partial_chunk(matches, fetch_weather_data, start_index=13000, chunk_size=500)

Processing rows 13000 to 13500...
results saved to ../data/weather_chunks/13000_13500.csv


In [17]:
weather_data_chunk_28 = process_partial_chunk(matches, fetch_weather_data, start_index=13500, chunk_size=500)

Processing rows 13500 to 14000...
results saved to ../data/weather_chunks/13500_14000.csv


In [18]:
weather_data_chunk_29 = process_partial_chunk(matches, fetch_weather_data, start_index=14000, chunk_size=500)

Processing rows 14000 to 14111...
results saved to ../data/weather_chunks/14000_14111.csv
