In [2]:
import pandas as pd
from prophet import Prophet

# Load the updated dataset
data = pd.read_csv('data/part_1/historical_weather_without_outliers.csv')

# Convert the 'date' column to datetime format
data['date'] = pd.to_datetime(data['date'])

# Function to train a Prophet model for a given city
def train_prophet_model(city_data):
    # Prepare the data for Prophet
    prophet_data = city_data.rename(columns={'date': 'ds', 'avg_temp_c': 'y'})

    # Define the Prophet model
    model = Prophet()

    # Fit the model
    model.fit(prophet_data)

    return model

# Dictionary to store Prophet models for each city
prophet_models = {}

# Train a Prophet model for each city
for city_id, city_data in data.groupby('city_id'):
    prophet_models[city_id] = train_prophet_model(city_data)

# Function to forecast using the trained Prophet model
def forecast_prophet_model(model, steps):
    future = model.make_future_dataframe(periods=steps)
    forecast = model.predict(future)
    return forecast.tail(steps)['yhat']

# Dictionary to store forecasted values for each city
prophet_forecasts = {}

# Forecast the first week of 2019 for each city and print the forecasted values
for city_id, model in prophet_models.items():
    forecast = forecast_prophet_model(model, steps=7)
    prophet_forecasts[city_id] = forecast
    print(f"Forecasted values for city {city_id}:")
    print(forecast.values)
    print("\n")

# Save the forecasted values to a CSV file
forecast_data = []
submission_id = 1
for city_id, forecast in prophet_forecasts.items():
    for temp in forecast:
        forecast_data.append({'submission_ID': submission_id, 'avg_temp_c': round(temp, 2)})
        submission_id += 1

forecast_df = pd.DataFrame(forecast_data)
forecast_df.to_csv('data/part_1/forcasted_data/prophet.csv', index=False)

print("Forecast data has been saved to 'prophet.csv'")


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea5xgirb/6kxulper.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea5xgirb/voywi59s.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=69366', 'data', 'file=/tmp/tmpea5xgirb/6kxulper.json', 'init=/tmp/tmpea5xgirb/voywi59s.json', 'output', 'file=/tmp/tmpea5xgirb/prophet_modelnvm6hqp5/prophet_model-20240623084601.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
08:46:01 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
08:46:02 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpea

Forecasted values for city C001:
[8.12534245 7.7859671  7.43027215 7.08387369 6.88686383 7.00036552
 7.06663429]


Forecasted values for city C002:
[14.62160551 14.51579733 14.34142385 14.16754365 14.07676506 13.93804321
 13.83855313]


Forecasted values for city C003:
[26.76534691 26.81774132 26.76870766 26.75219564 26.81666036 26.79944366
 26.74890758]


Forecasted values for city C004:
[-1.34358785 -1.42318059 -1.31901495 -1.18273041 -1.29588938 -1.41276121
 -1.21816351]


Forecasted values for city C005:
[24.63367947 24.67141308 24.56640921 24.85579874 24.73533224 24.6443898
 24.82038404]


Forecasted values for city C007:
[19.95925965 20.16326399 20.51458923 20.70686252 20.98839218 20.99685535
 20.58607381]


Forecasted values for city C008:
[3.45774003 3.54523664 3.37724449 3.31059143 3.45934818 3.21205362
 2.85192368]


Forecasted values for city C009:
[19.70425036 19.61883486 19.53278088 19.45923526 19.63583999 19.57534688
 19.34840988]


Forecasted values for city C010:
[19.23