In [None]:
import pandas as pd

data = pd.read_parquet('../cache/encoded_scaled.parquet')
data = data.drop(columns=['Source'])
data.head()

In [None]:
X = data.drop(columns=['pct_change_15min', 'pct_change_30min', 'pct_change_24h'])
y = data['pct_change_30min']
sample = data.tail(100000)
sample = sample.drop(columns=['pct_change_15min', 'pct_change_24h', 'Timestamp'])
sample.head()

In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Quantifies multicollinearity
vif_data = pd.DataFrame()
vif_data["Variable"] = sample.columns
vif_data["VIF"] = [variance_inflation_factor(sample.values, i) for i in range(sample.shape[1])]
print(vif_data)

In [None]:
from sklearn.decomposition import PCA

# Reduce dimensionality, keeping most important components
pca = PCA(n_components=0.95)  # Retain 95% of variance
reduced_data = pca.fit_transform(data)

### dynamic factor model (DFM)

In [None]:
from statsmodels.tsa.statespace.dynamic_factor import DynamicFactor

# Fit a dynamic factor model
model = DynamicFactor(sample, k_factors=1, factor_order=1)
results = model.fit()
print(results.summary())

In [None]:
forecast_steps = 5
forecast = results.get_forecast(steps=forecast_steps)
# Extract the predicted values from the forecast result
predicted_values = forecast.predicted_mean
# Or for the full prediction including uncertainty (confidence intervals)
prediction_conf_int = forecast.conf_int()

# Display the predicted values and the confidence intervals
print("Predicted Values:")
print(predicted_values)

print("\nConfidence Intervals for Predictions:")
print(prediction_conf_int)

### State-Space Model

In [None]:
sample.columns

In [None]:
from statsmodels.tsa.statespace.varmax import VARMAX

# Check for missing values
sample = sample.fillna(sample.mean())  # Impute missing values with mean (or other strategy)

# Select the features and target columns you want to use for forecasting
features = sample[['Actor1Country', 'Actor2Country', 'GoldsteinScale', 'NumSources', 'NumArticles']]
target = sample['pct_change_30min']  # Example target column

# Fit a VARMAX model
model = VARMAX(features, order=(1, 1))  # order of the model (lags and error terms)
model_fitted = model.fit(disp=False)


In [None]:
# Make predictions
forecast_steps = 10  # number of steps to forecast
forecast = model_fitted.forecast(steps=forecast_steps)
print(forecast)

### Multivariate Exponential Smoothing

### Canonical Correlation Analysis (CCA)

### 11