<a href="https://colab.research.google.com/github/Chaudhari-Amar/econ8310-assignment1/blob/main/assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# SINGLE-CELL SOLUTION FOR COLAB

# (Optional) ensure packages are present
!pip -q install statsmodels pandas numpy

import warnings, pickle, io, sys
from typing import Tuple
import numpy as np, pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing
warnings.filterwarnings("ignore")

TRAIN_URL = "https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_train.csv"
TEST_URL  = "https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_test.csv"

def _detect_timestamp_column(df: pd.DataFrame) -> str:
    cands = [c for c in df.columns if any(k in c.lower() for k in ["date","time","timestamp","datetime"])]
    return cands[0] if cands else ""

def _detect_target_column(df: pd.DataFrame, exclude=()) -> str:
    numeric = [c for c in df.columns if c not in exclude and pd.api.types.is_numeric_dtype(df[c])]
    if numeric: return numeric[0]
    for c in ["y","count","trips","trip_count","rides","n"]:
        if c in df.columns: return c
    return df.columns[-1]

def _load(url: str) -> pd.DataFrame:
    df = pd.read_csv(url)
    ts = _detect_timestamp_column(df)
    if ts:
        try:
            df[ts] = pd.to_datetime(df[ts])
            df = df.sort_values(ts).reset_index(drop=True)
        except Exception:
            pass
    return df

def _to_series(df: pd.DataFrame):
    ts_col = _detect_timestamp_column(df)
    y_col  = _detect_target_column(df, exclude=(ts_col,) if ts_col else ())
    if ts_col and pd.api.types.is_datetime64_any_dtype(df[ts_col]):
        df = df.sort_values(ts_col).reset_index(drop=True)
        y = pd.Series(df[y_col].to_numpy(), index=pd.DatetimeIndex(df[ts_col]), name=y_col)
        return y, y.index
    else:
        y = pd.Series(df[y_col].to_numpy(), name=y_col)
        return y, y.index

def build_and_fit(y: pd.Series, seasonal_periods=168):
    min_y = float(np.nanmin(y.to_numpy()))
    offset = 0.0
    if min_y < 0:
        offset = abs(min_y) + 1e-6
        y = y + offset
    mdl = ExponentialSmoothing(
        y.astype(float),
        trend="add",
        seasonal="add",
        seasonal_periods=seasonal_periods,
        initialization_method="estimated"
    )
    res = mdl.fit(optimized=True, use_brute=True)
    res._hw_offset = offset
    return mdl, res

def forecast_with_fitted(res, steps: int) -> np.ndarray:
    fc = res.forecast(steps)
    offset = getattr(res, "_hw_offset", 0.0)
    if offset:
        fc = np.asarray(fc) - offset
        fc = np.maximum(fc, 0.0)
    return np.asarray(fc).ravel()

# --- Load data ---
train_df = _load(TRAIN_URL)
test_df  = _load(TEST_URL)

y_train, train_index = _to_series(train_df)
y_test,  test_index  = _to_series(test_df)

h = len(y_test) if len(y_test) > 0 else 744  # expected 744 hours (January)

# --- Build, fit, forecast (REQUIRED NAMES) ---
model, modelFit = build_and_fit(y_train, seasonal_periods=168)
pred = forecast_with_fitted(modelFit, h)

# Save artifacts for convenience
try:
    with open("model.pkl", "wb") as f:
        pickle.dump(modelFit, f)
except Exception:
    pass

# Save predictions
if isinstance(test_index, pd.DatetimeIndex) and len(test_index) == h:
    pd.DataFrame({"timestamp": test_index, "prediction": pred}).to_csv("predictions.csv", index=False)
else:
    pd.DataFrame({"t": np.arange(h), "prediction": pred}).to_csv("predictions.csv", index=False)

print("Training observations:", len(y_train))
print("Forecast horizon:", h)
print("First 5 predictions:", pred[:5])

# (Optional) auto-download the CSV in Colab
try:
    from google.colab import files
    files.download("predictions.csv")
except Exception:
    pass


Training observations: 8760
Forecast horizon: 744
First 5 predictions: [2018. 2018. 2018. 2018. 2018.]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>