<a href="https://colab.research.google.com/github/Chaudhari-Amar/econ8310-assignment1/blob/main/assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.varmax import VARMAX

# Load the training and test data from the correct directory
train = pd.read_csv('/content/assignment_data_train.csv')
test = pd.read_csv('/content/assignment_data_test.csv')

# Remove timestamp column (if present)
if 'Timestamp' in train.columns:
    train = train.drop(columns=['Timestamp'])
if 'Timestamp' in test.columns:
    test = test.drop(columns=['Timestamp'])

# Ensure all columns are numeric
train = train.apply(pd.to_numeric, errors='coerce')
test = test.apply(pd.to_numeric, errors='coerce')

# Identify and drop constant columns (columns with only one unique value)
constant_cols = [col for col in train.columns if train[col].nunique() == 1]
train = train.drop(columns=constant_cols)
test = test.drop(columns=constant_cols, errors='ignore')

# Drop missing values
train = train.dropna()
test = test.dropna()

# DIFFERENCING: Ensure stationarity
train_diff = train.diff().dropna()

# Fit the VARMA model (VARMAX in statsmodels)
model = VARMAX(train_diff, order=(1, 1), enforce_stationarity=False, enforce_invertibility=False)
modelFit = model.fit(disp=False)

# Forecast for 744 hours
pred_diff = modelFit.forecast(steps=744)

# Convert predictions back to original scale
last_values = train.iloc[-1]  # Get last known values before differencing
pred = pred_diff.cumsum() + last_values['trips']

# Convert predictions into DataFrame
pred_df = pd.DataFrame(pred, columns=['trips'])

# Save predictions
pred_df.to_csv('/content/predictions_varma.csv', index=False)
