# Basic Import

In [None]:
import pandas as pd

weather_data = pd.read_csv('full_weather.csv')

weather_data['datetime'] = pd.to_datetime(weather_data['datetime'])

# Select state, city, place available in the dataset using 
1. weather_data['state'].unique()
or 
2. weather_data['city'].unique()
or 
3. weather_data['place'].unique()

In [None]:
state = 'Kuala Lumpur'
city = 'Kuala Lumpur'
place = 'Bukit Bintang'

data_filtered = weather_data[
    (weather_data['datetime'].dt.year == 2024) &
    (weather_data['state'] == state) &
    (weather_data['city'] == city) &
    (weather_data['place'] == place)
].sort_values('datetime').reset_index(drop=True)

data_filtered.set_index("datetime", inplace=True)

hourly_df = data_filtered.resample("H").agg({
    "temperature": "mean",
    "pressure": "mean",
    "dew_point": "mean",
    "humidity": "mean",
    "wind_speed": "mean",
    "gust": "mean",
    "wind_chill": "mean",
    "uv_index": "mean",
    "feels_like_temperature": "mean",
    "visibility": "mean",
    "solar_radiation": "mean",
    "pollutant_value": "mean",
    "precipitation_rate": "sum",
    "precipitation_total": "sum"
})

# Keep categorical columns
hourly_df["place"] = data_filtered["place"].resample("H").first()
hourly_df["city"] = data_filtered["city"].resample("H").first()
hourly_df["state"] = data_filtered["state"].resample("H").first()

hourly_df = hourly_df.reset_index()

hourly_df["is_rainy"] = hourly_df["precipitation_total"].apply(lambda x: 1 if x > 0 else 0)

hourly_df.to_csv(f'{state.lower()}.csv', index=False)

# Download dataframe into csv file format

In [None]:
data_2024_sorted["datetime"] = pd.to_datetime(data_2024_sorted["datetime"])

data_2024_sorted.set_index("datetime", inplace=True)

hourly_df = data_2024_sorted.resample("H").agg({
    "temperature": "mean",
    "pressure": "mean",
    "dew_point": "mean",
    "humidity": "mean",
    "wind_speed": "mean",
    "gust": "mean",
    "wind_chill": "mean",
    "uv_index": "mean",
    "feels_like_temperature": "mean",
    "visibility": "mean",
    "solar_radiation": "mean",
    "pollutant_value": "mean",
    "precipitation_rate": "sum",
    "precipitation_total": "sum"
})

hourly_df["place"] = data_2024_sorted["place"].resample("H").first()
hourly_df["city"] = data_2024_sorted["city"].resample("H").first()
hourly_df["state"] = data_2024_sorted["state"].resample("H").first()

hourly_df.to_csv(f'{state.lower()}.csv', index=False)