In [10]:
from datetime import datetime, timedelta
from pathlib import Path
import pandas as pd
import numpy as np


def generate_synthetic_seasonal_weather_data(start_date, end_date):
    date_format = "%Y-%m-%d"
    start_date = datetime.strptime(start_date, date_format)
    end_date = datetime.strptime(end_date, date_format)

    date_range = pd.date_range(start_date, end_date, freq="D")
    weather_data = []
    mean_temp_lst = [15, 25, 15, 5]
    std_temp = 5
    mean_humidity_lst = [50, 80, 60, 30]
    std_humidity = 10

    for date in date_range:
        # Extract month to determine the season
        month = date.month

        # Apply seasonal variations
        if 3 <= month <= 5:  # Spring
            season = "spring"
            temperature = max(0, np.random.normal(mean_temp_lst[0], std_temp))
            humidity = max(0, np.random.normal(mean_humidity_lst[0], std_humidity))
        elif 6 <= month <= 8:  # Summer
            season = "summer"
            temperature = max(0, np.random.normal(mean_temp_lst[1], std_temp))
            humidity = max(0, np.random.normal(mean_humidity_lst[1], std_humidity))
        elif 9 <= month <= 11:  # Fall
            season = "fall"
            temperature = max(0, np.random.normal(mean_temp_lst[2], std_temp))
            humidity = max(0, np.random.normal(mean_humidity_lst[2], std_humidity))
        else:  # Winter
            season = "winter"
            temperature = max(0, np.random.normal(mean_temp_lst[3], std_temp))
            humidity = max(0, np.random.normal(mean_humidity_lst[3], std_humidity))

        weather_data.append(
            {
                "date": date,
                "temperature": temperature,
                "humidity": humidity,
                "season": season,
            }
        )

    return pd.DataFrame(weather_data)

In [11]:
start_date = "2016-01-01"
end_date = "2017-12-31"

df_tokyo_weather = generate_synthetic_seasonal_weather_data(start_date, end_date)

In [12]:
df_tokyo_weather = df_tokyo_weather.assign(year=lambda df: df.date.dt.year)

In [13]:
df_tokyo_weather.groupby(["year", "season"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,date,temperature,humidity
year,season,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016,fall,2016-10-16 00:00:00.000000000,16.324803,59.234469
2016,spring,2016-04-15 12:00:00.000000000,15.344293,50.313667
2016,summer,2016-07-16 12:00:00.000000000,24.714545,79.511701
2016,winter,2016-05-18 16:21:05.934065920,5.407702,29.266415
2017,fall,2017-10-16 00:00:00.000000000,15.286498,62.447524
2017,spring,2017-04-15 12:00:00.000000000,15.911319,49.956591
2017,summer,2017-07-16 12:00:00.000000000,24.235123,80.011156
2017,winter,2017-05-20 05:20:00.000000000,5.132455,31.410075


In [14]:
df_tokyo_weather.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 731 entries, 0 to 730
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date         731 non-null    datetime64[ns]
 1   temperature  731 non-null    float64       
 2   humidity     731 non-null    float64       
 3   season       731 non-null    object        
 4   year         731 non-null    int32         
dtypes: datetime64[ns](1), float64(2), int32(1), object(1)
memory usage: 25.8+ KB


In [19]:
df_tokyo_weather.to_csv("/content/drive/MyDrive/quamet/Sales Forecasting/tokyo_weather.csv", index=False)

In [20]:
df_tokyo_weather.head()

Unnamed: 0,date,temperature,humidity,season,year
0,2016-01-01,10.810162,30.313094,winter,2016
1,2016-01-02,1.757606,39.377427,winter,2016
2,2016-01-03,8.885207,23.050902,winter,2016
3,2016-01-04,6.112133,35.310604,winter,2016
4,2016-01-05,0.505106,29.675393,winter,2016


In [21]:
df_tokyo_weather.groupby(["year", "season"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,date,temperature,humidity
year,season,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016,fall,2016-10-16 00:00:00.000000000,16.324803,59.234469
2016,spring,2016-04-15 12:00:00.000000000,15.344293,50.313667
2016,summer,2016-07-16 12:00:00.000000000,24.714545,79.511701
2016,winter,2016-05-18 16:21:05.934065920,5.407702,29.266415
2017,fall,2017-10-16 00:00:00.000000000,15.286498,62.447524
2017,spring,2017-04-15 12:00:00.000000000,15.911319,49.956591
2017,summer,2017-07-16 12:00:00.000000000,24.235123,80.011156
2017,winter,2017-05-20 05:20:00.000000000,5.132455,31.410075
