In [2]:
import pandas as pd
from prophet import Prophet

# Load the updated dataset
data = pd.read_csv(r'../data/part_1/historical_weather_without_outliers.csv')

# Convert the 'date' column to datetime format
data['date'] = pd.to_datetime(data['date'])

# Function to train a Prophet model for a given city
def train_prophet_model(city_data):
    # Prepare the data for Prophet
    prophet_data = city_data.rename(columns={'date': 'ds', 'avg_temp_c': 'y'})

    # Define the Prophet model
    model = Prophet()

    # Fit the model
    model.fit(prophet_data)

    return model

# Dictionary to store Prophet models for each city
prophet_models = {}

# Train a Prophet model for each city
for city_id, city_data in data.groupby('city_id'):
    prophet_models[city_id] = train_prophet_model(city_data)

# Function to forecast using the trained Prophet model
def forecast_prophet_model(model, steps):
    future = model.make_future_dataframe(periods=steps)
    forecast = model.predict(future)
    return forecast.tail(steps)['yhat']

# Dictionary to store forecasted values for each city
prophet_forecasts = {}

# Forecast the first week of 2019 for each city and print the forecasted values
for city_id, model in prophet_models.items():
    forecast = forecast_prophet_model(model, steps=7)
    prophet_forecasts[city_id] = forecast
    print(f"Forecasted values for city {city_id}:")
    print(forecast.values)
    print("\n")

# Save the forecasted values to a CSV file
forecast_data = []
submission_id = 1
for city_id, forecast in prophet_forecasts.items():
    for temp in forecast:
        forecast_data.append({'submission_ID': submission_id, 'avg_temp_c': round(temp, 2)})
        submission_id += 1

forecast_df = pd.DataFrame(forecast_data)
forecast_df.to_csv(r'../data/part_1/forcasted_data/prophet.csv', index=False)

print("Forecast data has been saved to 'prophet.csv'")


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea5xgirb/6kxulper.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea5xgirb/voywi59s.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=69366', 'data', 'file=/tmp/tmpea5xgirb/6kxulper.json', 'init=/tmp/tmpea5xgirb/voywi59s.json', 'output', 'file=/tmp/tmpea5xgirb/prophet_modelnvm6hqp5/prophet_model-20240623084601.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
08:46:01 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
08:46:02 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea

Forecasted values for city C001:
[8.12534245 7.7859671  7.43027215 7.08387369 6.88686383 7.00036552
 7.06663429]


Forecasted values for city C002:
[14.62160551 14.51579733 14.34142385 14.16754365 14.07676506 13.93804321
 13.83855313]


Forecasted values for city C003:
[26.76534691 26.81774132 26.76870766 26.75219564 26.81666036 26.79944366
 26.74890758]


Forecasted values for city C004:
[-1.34358785 -1.42318059 -1.31901495 -1.18273041 -1.29588938 -1.41276121
 -1.21816351]


Forecasted values for city C005:
[24.63367947 24.67141308 24.56640921 24.85579874 24.73533224 24.6443898
 24.82038404]


Forecasted values for city C007:
[19.95925965 20.16326399 20.51458923 20.70686252 20.98839218 20.99685535
 20.58607381]


Forecasted values for city C008:
[3.45774003 3.54523664 3.37724449 3.31059143 3.45934818 3.21205362
 2.85192368]


Forecasted values for city C009:
[19.70425036 19.61883486 19.53278088 19.45923526 19.63583999 19.57534688
 19.34840988]


Forecasted values for city C010:
[19.23

In [4]:
# Function to train a Prophet model for a given city
def train_prophet_model(city_data):
    # Prepare the data for Prophet
    prophet_data = city_data.rename(columns={'date': 'ds', 'avg_temp_c': 'y'})

    # Define the Prophet model with tuned hyperparameters
    model = Prophet(changepoint_prior_scale=0.5, seasonality_prior_scale=10.0)

    # Fit the model
    model.fit(prophet_data)

    return model

# Dictionary to store Prophet models for each city
prophet_models = {}

# Train a Prophet model for each city
for city_id, city_data in data.groupby('city_id'):
    prophet_models[city_id] = train_prophet_model(city_data)

# Function to forecast using the trained Prophet model
def forecast_prophet_model(model, steps):
    future = model.make_future_dataframe(periods=steps)
    forecast = model.predict(future)
    return forecast.tail(steps)['yhat']

# Dictionary to store forecasted values for each city
prophet_forecasts = {}

# Forecast the first week of 2019 for each city and print the forecasted values
for city_id, model in prophet_models.items():
    forecast = forecast_prophet_model(model, steps=7)
    prophet_forecasts[city_id] = forecast
    print(f"Forecasted values for city {city_id}:")
    print(forecast.values)
    print("\n")

# Save the forecasted values to a CSV file
forecast_data = []
submission_id = 1
for city_id, forecast in prophet_forecasts.items():
    for temp in forecast:
        forecast_data.append({'submission_ID': submission_id, 'avg_temp_c': round(temp, 2)})
        submission_id += 1

forecast_df = pd.DataFrame(forecast_data)
forecast_df.to_csv(r'../data/part_1/forcasted_data/prophet_optimized.csv', index=False)

print("Forecast data has been saved to 'prophet_optimized.csv'")




INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea5xgirb/x_ifg7h9.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea5xgirb/bwy5wazh.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=28341', 'data', 'file=/tmp/tmpea5xgirb/x_ifg7h9.json', 'init=/tmp/tmpea5xgirb/bwy5wazh.json', 'output', 'file=/tmp/tmpea5xgirb/prophet_model058co8tw/prophet_model-20240623090328.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:03:28 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
09:03:29 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea

Forecasted values for city C001:
[8.57760713 8.24284428 7.89149926 7.54925324 7.35625224 7.47378643
 7.54394369]


Forecasted values for city C002:
[14.53801678 14.4228933  14.23880501 14.05708172 13.95859943 13.81236961
 13.71042404]


Forecasted values for city C003:
[26.90117439 26.95675329 26.91107869 26.89927327 26.96599461 26.95354708
 26.9068445 ]


Forecasted values for city C004:
[-1.2470981  -1.33920717 -1.2436918  -1.11571077 -1.23670941 -1.36084268
 -1.17286043]


Forecasted values for city C005:
[25.14812946 25.18266463 25.07501371 25.36153239 25.23794829 25.14359637
 25.31592499]


Forecasted values for city C007:
[19.24912523 19.44888758 19.79761304 19.98780441 20.26778825 20.27539122
 19.86458333]


Forecasted values for city C008:
[3.17488325 3.25662244 3.086278   3.01846065 3.16674108 2.92034917
 2.56244616]


Forecasted values for city C009:
[20.55162413 20.49503358 20.43217579 20.38076624 20.57830221 20.53705201
 20.32763431]


Forecasted values for city C010:
[19.5

In [5]:
# Function to train a Prophet model for a given city
def train_prophet_model(city_data):
    # Prepare the data for Prophet
    prophet_data = city_data.rename(columns={'date': 'ds', 'avg_temp_c': 'y'})

    # Define the Prophet model with tuned hyperparameters
    model = Prophet(
        changepoint_prior_scale=0.5,
        seasonality_prior_scale=10.0,
        yearly_seasonality=False,  # Manually set to False for tuning Fourier order
        weekly_seasonality=False,  # Manually set to False for tuning Fourier order
        daily_seasonality=False,   # Manually set to False for tuning Fourier order
    )

    # Add seasonalities with different Fourier orders and train the model
    for fourier_order in [5, 10, 15, 20]:
        model.add_seasonality(name='yearly', period=365.25, fourier_order=fourier_order)

    # Fit the model
    model.fit(prophet_data)

    return model

# Dictionary to store Prophet models for each city
prophet_models = {}

# Train a Prophet model for each city
for city_id, city_data in data.groupby('city_id'):
    prophet_models[city_id] = train_prophet_model(city_data)

# Function to forecast using the trained Prophet model
def forecast_prophet_model(model, steps):
    future = model.make_future_dataframe(periods=steps)
    forecast = model.predict(future)
    return forecast.tail(steps)['yhat']

# Dictionary to store forecasted values for each city
prophet_forecasts = {}

# Forecast the first week of 2019 for each city and print the forecasted values
for city_id, model in prophet_models.items():
    forecast = forecast_prophet_model(model, steps=7)
    prophet_forecasts[city_id] = forecast
    print(f"Forecasted values for city {city_id}:")
    print(forecast.values)
    print("\n")

# Save the forecasted values to a CSV file
forecast_data = []
submission_id = 1
for city_id, forecast in prophet_forecasts.items():
    for temp in forecast:
        forecast_data.append({'submission_ID': submission_id, 'avg_temp_c': round(temp, 2)})
        submission_id += 1

forecast_df = pd.DataFrame(forecast_data)
forecast_df.to_csv(r'../data/part_1/forcasted_data/prophet_more_optimized.csv', index=False)

print("Forecast data has been saved to 'prophet_more_optimized.csv'")



DEBUG:cmdstanpy:input tempfile: /tmp/tmpea5xgirb/2nnix4ga.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea5xgirb/re2jcici.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=36433', 'data', 'file=/tmp/tmpea5xgirb/2nnix4ga.json', 'init=/tmp/tmpea5xgirb/re2jcici.json', 'output', 'file=/tmp/tmpea5xgirb/prophet_model4x5ga44a/prophet_model-20240623091647.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:16:47 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
09:16:47 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea5xgirb/o8hbusaf.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea5xgirb/_50_ried.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/

Forecasted values for city C001:
[8.74172432 8.56091025 8.37321759 8.18769547 8.01229286 7.85292965
 7.71285186]


Forecasted values for city C002:
[14.4903555  14.31776599 14.1531128  13.9972288  13.85135718 13.71727276
 13.59732435]


Forecasted values for city C003:
[27.12436679 27.08876129 27.04257935 26.99028329 26.93686819 26.88743734
 26.84678121]


Forecasted values for city C004:
[-1.06830074 -1.10802969 -1.12612604 -1.12381678 -1.10415985 -1.071834
 -1.03279609]


Forecasted values for city C005:
[25.91984912 25.90021049 25.82082178 25.69155065 25.52549269 25.33779602
 25.1443629 ]


Forecasted values for city C007:
[20.31669088 20.28394416 20.19183749 20.0470801  19.86077278 19.64794581
 19.42666974]


Forecasted values for city C008:
[3.71762319 3.47984523 3.25162634 3.04445431 2.86498336 2.71451617
 2.58919336]


Forecasted values for city C009:
[20.57799649 20.57987104 20.59843104 20.62678756 20.65683873 20.67998668
 20.68792727]


Forecasted values for city C010:
[19.859