In [None]:
import os
import getpass
import numpy as np
import pandas as pd
from time import time
from functools import reduce
import matplotlib.pyplot as plt

# Sklearn
from sklearn.impute import SimpleImputer

from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Own Liberay
import PCR_model as model

# PCR - Principal component regression

## Data processing


In [None]:
# Gå et niveau op og naviger til mappen "Data"
os.chdir('../Data')

### 1. Load data

In [None]:
# Import features
file_path = 'Merged_dataset.csv'
df = pd.read_csv("merged_dataset.csv", sep=";", parse_dates=["Date"])
df.set_index("Date", inplace=True)
df.shape

In [None]:
y = df.iloc[:, 0]   # første kolonne = inflation
X = df.iloc[:, 1:]  # resten = forklarende variable

# Tjek dimensioner
print("y shape:", y.shape)
print("X shape:", X.shape)

### 2. Imputer manglende værdier

In [None]:
imputer = SimpleImputer(strategy="median")  # Eller "median"

X_imputed = imputer.fit_transform(X)  # Imputer NaN i hele datasættet

# Konverter tilbage til DataFrame med kolonnenavne
X = pd.DataFrame(X_imputed, columns=X.columns, index = df.index)

### 3. Standardiser data 

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Forecast

In [None]:
df_pcr_rolling_forecast  = model.pcr_forecaster(X=X, 
                      y=y, 
                      forecast_horizon = 12, 
                      last_observation_date="2024-12-01", 
                      scaler=scaler, 
                      window_length=108,
                      variance_threshold=0.999, 
                      verbose=True)

In [None]:
df_pcr_rolling_forecast

# Forcasting model with a rolling window

In [None]:
forecast_results = model.run_rolling_forecast(X, y, forecast_horizon=12, start_date="2023-01-01", end_date="2024-12-01")

In [None]:
eval_df, merged_df = model.evaluate_forecasts(forecast_results, y)
eval_df

In [None]:
h1 = merged_df[merged_df["Horizon"] == 0]
h1

In [None]:
sum_r = h1["Inflationsforecast"].iloc[11:24].sum() / 12
sum_t = h1["y_true"].iloc[11:24].sum() / 12
print(sum_r)
print(sum_t)

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(h1["Dato"], h1["error"], label="error", marker="o")
plt.plot(h1["Dato"], h1["squared_error"], label="abs_error", marker="x")
plt.legend()
plt.title("PCR: Error")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(h1["Dato"], h1["y_true"], label="Faktisk", marker="o")
plt.plot(h1["Dato"], h1["Inflationsforecast"], label="Forecast (h=1)", marker="x")
plt.legend()
plt.title("PCR: Forecast vs Faktisk Inflation (h=1)")
plt.grid(True)
plt.tight_layout()
plt.show()