In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
import warnings
warnings.filterwarnings("ignore")

In [5]:
historical_weather = pd.read_csv('historical_weather.csv')

In [6]:
submission_key = pd.read_csv('submission_key.csv')
sample_submission = pd.read_csv('sample_submission.csv')

In [7]:
historical_weather['date'] = pd.to_datetime(historical_weather['date'])
submission_key['date'] = pd.to_datetime(submission_key['date'])

In [8]:
def train_sarima(city_data):
    # Train SARIMA model
    model = SARIMAX(city_data['avg_temp_c'], order=(1, 0, 0), seasonal_order=(1, 1, 0, 12))
    model_fit = model.fit(disp=False)
    
    # Forecast
    forecast = model_fit.forecast(steps=7)
    
    return forecast


In [9]:
predictions = pd.DataFrame(columns=['submission_ID', 'avg_temp_c'])

In [10]:
for city_id in submission_key['city_id'].unique():
    city_data = historical_weather[historical_weather['city_id'] == city_id].copy()
    
    # Train SARIMA model and predict
    forecast = train_sarima(city_data)
    
    # Get submission IDs and append predictions to DataFrame
    city_submission_ids = submission_key[submission_key['city_id'] == city_id]['submission_ID'].values
    city_predictions = pd.DataFrame({'submission_ID': city_submission_ids, 'avg_temp_c': forecast})
    
    # Append city_predictions to predictions DataFrame
    predictions = pd.concat([predictions, city_predictions], ignore_index=True)

In [11]:
predictions['avg_temp_c'] = predictions['avg_temp_c'].round(1)

In [12]:
final_submission = sample_submission.merge(predictions, on='submission_ID')

In [13]:
final_submission.to_csv('final_submission.csv', index=False)

print("Prediction complete. Output saved to 'final_submission.csv'")

Prediction complete. Output saved to 'final_submission.csv'
