<a href="https://colab.research.google.com/github/Chaudhari-Amar/econ8310-assignment1/blob/main/assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import warnings
warnings.filterwarnings("ignore")

import io
import sys
import pickle
from typing import Optional, Tuple

import numpy as np
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing

TRAIN_URL = "https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_train.csv"
TEST_URL  = "https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_test.csv"

def _detect_datetime_and_target(df: pd.DataFrame) -> Tuple[pd.Series, pd.Series]:
    """
    Heuristically detect the datetime column (first parseable datetime col)
    and the target numeric column (first numeric col other than datetime).
    Returns: (datetime_series, target_series)
    Raises: ValueError if cannot detect.
    """
    dt_col = None
    for c in df.columns:
        try:
            parsed = pd.to_datetime(df[c], errors="raise", utc=False)
            # Require increasing (or at least usable) timestamps
            dt_col = c
            break
        except Exception:
            continue
    if dt_col is None:
        raise ValueError("Could not detect a datetime column. Please ensure one column contains timestamps.")

    # Choose a numeric target column that is not the datetime column
    candidate_numeric = [c for c in df.columns if c != dt_col and pd.api.types.is_numeric_dtype(df[c])]
    if not candidate_numeric:
        # try coercion if not numeric initially
        for c in df.columns:
            if c == dt_col:
                continue
            coerced = pd.to_numeric(df[c], errors="coerce")
            if coerced.notna().sum() > 0:
                df[c] = coerced
                candidate_numeric = [c]
                break
    if not candidate_numeric:
        raise ValueError("Could not detect a numeric target column.")

    y_col = candidate_numeric[0]
    return pd.to_datetime(df[dt_col]), df[y_col].astype(float)


def _prepare_hourly_series(df: pd.DataFrame) -> pd.Series:
    """
    Return an hourly, gap-free time series indexed by timestamp.
    Fills any missing hours via linear interpolation.
    """
    ts, y = _detect_datetime_and_target(df)
    s = pd.Series(y.values, index=pd.to_datetime(ts)).sort_index()
    # infer hourly frequency; then reindex to continuous hourly range
    start, end = s.index.min(), s.index.max()
    full_idx = pd.date_range(start=start, end=end, freq="H")
    s = s.reindex(full_idx)
    # fill gaps
    if s.isna().any():
        s = s.interpolate(limit_direction="both")
    return s


def load_train_test(train_url: str = TRAIN_URL, test_url: str = TEST_URL) -> Tuple[pd.Series, int]:
    """
    Load train and test, return hourly train series and 'steps' (length of test period).
    """
    train_df = pd.read_csv(train_url)
    test_df  = pd.read_csv(test_url)
    train_series = _prepare_hourly_series(train_df)

    try:
        test_ts, _ = _detect_datetime_and_target(test_df)

        steps = len(test_df)
        if steps <= 0:
            steps = 744
    except Exception:
        steps = 744
    return train_series, steps

# Model building (model1) and fitting (modelFit)
def build_expsmooth(train_series: pd.Series,
                    seasonal_periods: int = 168,
                    trend: str = "add",
                    seasonal: str = "add") -> ExponentialSmoothing:
    """
    Define the Exponential Smoothing (Holt-Winters) model.
    Defaults:
      - weekly seasonality for hourly data: seasonal_periods=168 (24*7)
      - additive trend & seasonality (stable variance)
    """
    return ExponentialSmoothing(train_series, trend=trend, seasonal=seasonal, seasonal_periods=seasonal_periods)


def fit_and_forecast(train_series: pd.Series,
                     steps: int,
                     seasonal_periods: int = 168) -> Tuple[ExponentialSmoothing, "HoltWintersResults", np.ndarray]:
    """
    Fit model and generate out-of-sample forecasts.
    """
    model = build_expsmooth(train_series, seasonal_periods=seasonal_periods)
    fitted = model.fit(optimized=True, use_brute=True)
    fcst = fitted.forecast(steps)
    return model, fitted, np.asarray(fcst)

# Load data
_train_series, _steps = load_train_test(TRAIN_URL, TEST_URL)

# Create algorithm object (unfitted)
model1 = build_expsmooth(_train_series, seasonal_periods=168)

# Fit to training data
modelFit = model1.fit(optimized=True, use_brute=True)

# Forecast the length of the test horizon (expected 744 hours)
_pred_array = modelFit.forecast(_steps)

# Ensure 'pred' is a plain Python list (as required by many graders)
pred = list(map(float, np.asarray(_pred_array)))

# Optionally: persist fitted model for reuse (grader may ignore this file)
with open("model1.pkl", "wb") as f:
    pickle.dump(modelFit, f)

SyntaxError: incomplete input (ipython-input-2025053874.py, line 133)