In [17]:
%pip install openmeteo-requests numpy pandas matplotlib

Note: you may need to restart the kernel to use updated packages.


In [18]:
import openmeteo_requests
import numpy as np
import pandas as pd

In [19]:
# Bangkok coordinates
LATITUDE = 13.7563
LONGTITUDE = 100.5018

START_DATE = "2021-08-01"
END_DATE = "2025-01-31"

RAIN_SUM_THRESHOLD = 1 # mm
PRECIPITATION_HOURS_THRESHOLD = 3 # hours

In [20]:
openmeteo = openmeteo_requests.Client()

url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": LATITUDE,
    "longitude": LONGTITUDE,
    "start_date": START_DATE,
    "end_date": END_DATE,
    "daily": ["rain_sum", "precipitation_hours", "precipitation_sum"],
    "timezone": "Asia/Bangkok",
}
responses = openmeteo.weather_api(url, params=params)

In [21]:
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

Coordinates 13.743409156799316°N 100.49586486816406°E
Elevation 4.0 m asl
Timezone b'Asia/Bangkok'b'GMT+7'
Timezone difference to GMT+0 25200 s


In [22]:
daily = response.Daily()
daily_rain_sum = daily.Variables(0).ValuesAsNumpy()
daily_precipitation_hours = daily.Variables(1).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(2).ValuesAsNumpy()

daily_data = {
    "date": pd.date_range(
        start=pd.to_datetime(daily.Time(), unit="s", utc=True),
        end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
        freq=pd.Timedelta(seconds=daily.Interval()),
        inclusive="left",
    )
}

daily_data["rain_sum"] = daily_rain_sum
daily_data["precipitation_hours"] = daily_precipitation_hours
daily_data["precipitation_sum"] = daily_precipitation_sum

rain_df = pd.DataFrame(data=daily_data)

In [23]:
rain_df.describe()

Unnamed: 0,rain_sum,precipitation_hours,precipitation_sum
count,1280.0,1280.0,1280.0
mean,4.964766,5.274219,4.964766
std,7.602226,5.592687,7.602226
min,0.0,0.0,0.0
25%,0.0,0.0,0.0
50%,1.3,4.0,1.3
75%,7.1,8.0,7.1
max,56.800003,24.0,56.800003


In [24]:
rain_df['is_rain'] = (rain_df['rain_sum'] >= RAIN_SUM_THRESHOLD) & (rain_df['precipitation_hours'] >= PRECIPITATION_HOURS_THRESHOLD)
rain_df['is_rain'].value_counts()

is_rain
True     657
False    623
Name: count, dtype: int64

In [25]:
rain_df['date'] = pd.to_datetime(rain_df["date"]).dt.date.astype(str)

In [26]:
rain_df.head(5)

Unnamed: 0,date,rain_sum,precipitation_hours,precipitation_sum,is_rain
0,2021-07-31,3.2,6.0,3.2,True
1,2021-08-01,1.1,6.0,1.1,True
2,2021-08-02,5.9,4.0,5.9,True
3,2021-08-03,2.9,7.0,2.9,True
4,2021-08-04,4.9,8.0,4.9,True


In [27]:
rain_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1280 entries, 0 to 1279
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   date                 1280 non-null   object 
 1   rain_sum             1280 non-null   float32
 2   precipitation_hours  1280 non-null   float32
 3   precipitation_sum    1280 non-null   float32
 4   is_rain              1280 non-null   bool   
dtypes: bool(1), float32(3), object(1)
memory usage: 26.4+ KB


In [28]:
rain_df.to_csv("./data/rain_data.csv", index=False)