In [1]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

In [None]:
# TODO: Change location code to the location required to extract data for
location_code = "kuala-lumpur"

In [2]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [4]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    # TODO: Change to specific malaysian location's latitude and longitude
	"latitude": 3.1412,
	"longitude": 101.6865,
	"start_date": "2010-01-01",
	"end_date": "2024-07-31",
	"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "rain", "pressure_msl", "wind_speed_10m", "wind_direction_10m"],
    "timezone": "Asia/Singapore"
}
responses = openmeteo.weather_api(url, params=params)

In [8]:
type(responses)
responses
type(responses[0])
responses[0]

<openmeteo_sdk.WeatherApiResponse.WeatherApiResponse at 0x2d42172ba30>

In [57]:
# "hourly_units": {
#     "time": "iso8601",
#     "temperature_2m": "°C",
#     "relative_humidity_2m": "%",
#     "dew_point_2m": "°C",
#     "rain": "mm",
#     "pressure_msl": "hPa",
#     "wind_speed_10m": "km/h",
#     "wind_direction_10m": "°"
# }

In [13]:
# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

Coordinates 3.1282951831817627°N 101.68547821044922°E
Elevation 47.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


In [14]:
# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_rain = hourly.Variables(3).ValuesAsNumpy()
hourly_pressure_msl = hourly.Variables(4).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(5).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(6).ValuesAsNumpy()

In [24]:
hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

In [25]:
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m
hourly_data["rain"] = hourly_rain
hourly_data["pressure_msl"] = hourly_pressure_msl
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m

hourly_dataframe = pd.DataFrame(data = hourly_data)

In [55]:
# Check for missing values
print(hourly_dataframe.isnull().sum()) 

date                    0
temperature_2m          0
relative_humidity_2m    0
dew_point_2m            0
rain                    0
pressure_msl            0
wind_speed_10m          0
wind_direction_10m      0
dtype: int64


In [56]:
# Check for continuity of time series weather data (hourly)
all_hour_datetime = pd.Series(data=pd.date_range(start=hourly_dataframe['date'].min(), end=hourly_dataframe['date'].max(), freq='h'))

if hourly_dataframe['date'].count() == all_hour_datetime.count():
    print("Continuous")
else:
    print("NOT continuous")

Continuous


In [None]:
data_location = f'../datasets/{location_code}/weather_data_hourly.csv'

In [None]:
# Convert data frame to CSV, which will be used in notebooks for data cleaning, feature selection, and model training
hourly_dataframe.to_csv(data_location, index = False) # Change csv file name based on location

In [4]:
from datetime import datetime
import pytz

# Define the Singapore timezone
singapore_tz = pytz.timezone('Asia/Kolkata')

# Get all the timezones from pytz
all_timezones = pytz.all_timezones

# which time is indian standard time
indian_tz = pytz.timezone('Asia/Kolkata')

# Get the current time in Singapore timezone
current_time_in_singapore = datetime.now(singapore_tz)

# Extract the current hour
current_hour_in_singapore = current_time_in_singapore.hour

print(f"The current hour in Singapore is: {current_hour_in_singapore}")

The current hour in Singapore is: 15
