In [1]:
import pandas as pd
import numpy as np

class NaiveTimeSeriesModel:
    def __init__(self, lag=1):
        self.lag = lag
        self.last_observation = None

    def fit(self, data):
        if not isinstance(data, pd.DataFrame):
            raise ValueError("Input data must be a pandas DataFrame.")
        self.last_observation = data.iloc[-self.lag:]

    def forecast(self, steps=1):
        if self.last_observation is None:
            raise ValueError("Model has not been fitted yet. Call `fit` with data.")

        # Repeat the last observed value(s) for the forecast length
        forecast_values = np.tile(self.last_observation.values, (steps, 1))
        forecast_index = [i+1 for i in range(steps)]

        return pd.DataFrame(forecast_values[:steps], columns=self.last_observation.columns, index=forecast_index)

In [2]:
data = pd.read_parquet('../cache/encoded_99q_scaled.parquet')
train_data = data[data.index.year < 2023]
test_data = data[data.index.year == 2023]

In [3]:
naive = NaiveTimeSeriesModel(lag=1)
naive.fit(train_data)
naive_predictions = naive.forecast(steps=5)
naive_predictions

Unnamed: 0,Timestamp,Actor1Country,Actor1GeoCountry,Actor1Type,Actor2Country,Actor2GeoCountry,Actor2Type,ActionCountry,EventType,GoldsteinScale,NumSources,NumArticles,AvgTone,Magnitude,Impact,Impact_bin,pct_change_15min,pct_change_30min,pct_change_24h,AbsChange
1,1.170298,9,13,9,13,11,9,11,10,-0.104944,-0.677247,-0.106454,1.081663,-0.657705,0.034831,Neutral,-0.012071,-0.018106,0.126743,-0.649587
2,1.170298,9,13,9,13,11,9,11,10,-0.104944,-0.677247,-0.106454,1.081663,-0.657705,0.034831,Neutral,-0.012071,-0.018106,0.126743,-0.649587
3,1.170298,9,13,9,13,11,9,11,10,-0.104944,-0.677247,-0.106454,1.081663,-0.657705,0.034831,Neutral,-0.012071,-0.018106,0.126743,-0.649587
4,1.170298,9,13,9,13,11,9,11,10,-0.104944,-0.677247,-0.106454,1.081663,-0.657705,0.034831,Neutral,-0.012071,-0.018106,0.126743,-0.649587
5,1.170298,9,13,9,13,11,9,11,10,-0.104944,-0.677247,-0.106454,1.081663,-0.657705,0.034831,Neutral,-0.012071,-0.018106,0.126743,-0.649587


In [4]:
import joblib
joblib.dump(naive, '../cache/naive_model.joblib')

['../cache/naive_model.joblib']