In [16]:
%pip install openmeteo-requests numpy pandas matplotlib


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [17]:
import openmeteo_requests
from datetime import datetime
import pandas as pd

In [18]:
# Bangkok coordinates
LATITUDE = 13.7563
LONGTITUDE = 100.5018

START_DATE = "2021-08-01"
END_DATE = datetime.now().strftime("%Y-%m-%d")

RAIN_SUM_THRESHOLD = 1 # mm
PRECIPITATION_HOURS_THRESHOLD = 3 # hours

In [19]:
openmeteo = openmeteo_requests.Client()

url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": LATITUDE,
    "longitude": LONGTITUDE,
    "start_date": START_DATE,
    "end_date": END_DATE,
    "daily": ["rain_sum", "precipitation_hours", "precipitation_sum"],
    "timezone": "Asia/Bangkok",
}
responses = openmeteo.weather_api(url, params=params)

In [20]:
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

Coordinates 13.743409156799316°N 100.49586486816406°E
Elevation 4.0 m asl
Timezone b'Asia/Bangkok'b'GMT+7'
Timezone difference to GMT+0 25200 s


In [21]:
daily = response.Daily()
daily_rain_sum = daily.Variables(0).ValuesAsNumpy()
daily_precipitation_hours = daily.Variables(1).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(2).ValuesAsNumpy()

daily_data = {
    "date": pd.date_range(
        start=pd.to_datetime(daily.Time(), unit="s", utc=True),
        end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
        freq=pd.Timedelta(seconds=daily.Interval()),
        inclusive="left",
    )
}

daily_data["rain_sum"] = daily_rain_sum
daily_data["precipitation_hours"] = daily_precipitation_hours
daily_data["precipitation_sum"] = daily_precipitation_sum

rain_df = pd.DataFrame(data=daily_data)

In [22]:
rain_df.describe()

Unnamed: 0,rain_sum,precipitation_hours,precipitation_sum
count,1371.0,1373.0,1371.0
mean,4.810649,5.105608,4.810649
std,7.474699,5.496732,7.474699
min,0.0,0.0,0.0
25%,0.0,0.0,0.0
50%,1.2,4.0,1.2
75%,6.8,8.0,6.8
max,56.800003,24.0,56.800003


In [23]:
rain_df['is_rain'] = (rain_df['rain_sum'] >= RAIN_SUM_THRESHOLD) & (rain_df['precipitation_hours'] >= PRECIPITATION_HOURS_THRESHOLD)
rain_df['is_rain'].value_counts()

is_rain
True     689
False    684
Name: count, dtype: int64

In [24]:
rain_df['date'] = pd.to_datetime(rain_df["date"]).dt.date.astype(str)

In [25]:
rain_df.tail(5)

Unnamed: 0,date,rain_sum,precipitation_hours,precipitation_sum,is_rain
1368,2025-04-29,8.2,5.0,8.2,True
1369,2025-04-30,25.1,8.0,25.1,True
1370,2025-05-01,7.2,4.0,7.2,True
1371,2025-05-02,,2.0,,False
1372,2025-05-03,,0.0,,False


In [26]:
rain_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1373 entries, 0 to 1372
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   date                 1373 non-null   object 
 1   rain_sum             1371 non-null   float32
 2   precipitation_hours  1373 non-null   float32
 3   precipitation_sum    1371 non-null   float32
 4   is_rain              1373 non-null   bool   
dtypes: bool(1), float32(3), object(1)
memory usage: 28.3+ KB


In [27]:
rain_df.to_csv("./data/rain_data.csv", index=False)