In [1]:
# Dependencies
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from statsmodels.tsa.api import VAR

In [37]:
# Load data (FRED-MD)
df = pd.read_csv('current.csv')

# Remove the first row (transformation codes)
transformation_codes = df.iloc[0]  # Transformation codes can be applied if needed
df = df.iloc[1:]

# Set the first column as the index and datetime
df.set_index(df.columns[0], inplace=True)
df.index = pd.to_datetime(df.index)
original_data = df.copy()

# Store inflation
FORECAST_HORIZON = 6
inflation = df.dropna()['CPIAUCSL'].copy()

# Transform and standardize the data
df_logged = (np.sign(df) * np.log1p(np.abs(df))).dropna()

# Standardize based on training data only to avoid data leakage
df_norm = (df_logged - df_logged.mean()) / df_logged.std()

In [38]:
# Apply PCA on the training data
pca = PCA(n_components=0.95)
pca_data = pca.fit_transform(df_norm)
train_pca = pd.DataFrame(pca_data, index=df_norm.index)


In [39]:
# Convert data into sequences
def create_sequences(X, y, time_steps=10):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 4  # Have one quarter of context
X, y = create_sequences(train_pca, inflation, time_steps)

# Split into train and test
split = len(X) - FORECAST_HORIZON # Last quarter for test
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

  ys.append(y[i+time_steps])


In [43]:
# Build the LSTM model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import Dropout

# Create model
model = Sequential([
    LSTM(units=64, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    Dense(units=1)
])

# Compile and train
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=50, verbose=1)

Epoch 1/50


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 46506.8164
Epoch 2/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 46450.9102
Epoch 3/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 45654.8242
Epoch 4/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 45142.9102
Epoch 5/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 43659.4453
Epoch 6/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 43899.1211
Epoch 7/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 43800.0469
Epoch 8/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 43645.0469 
Epoch 9/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 40626.5312
Epoch 10/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/ste

<keras.src.callbacks.history.History at 0x7f64c84ac530>

In [None]:
# Evaluate the model
y_pred = model.predict(X_test)
# Inverse normalize the prediction
y_pred = y_pred * y_test.std() + y_test.mean()
y_pred

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


array([[358.82942281],
       [358.82628925],
       [358.83148488],
       [358.83271176],
       [358.83385124],
       [358.8325758 ]])

In [14]:
# Predictions
train_pred = model.predict(X_train)
test_pred = model.predict(X_test)
print(train_pred.shape, test_pred.shape)
# Set the index
train_pred = pd.Series(train_pred.flatten(), index=train.index[:split])
test_pred = pd.Series(test_pred.flatten(), index=test.index)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
(370, 1) (6, 1)


In [19]:
test_pred

sasdate
2024-07-01    4.807004
2024-08-01    4.778729
2024-09-01    4.754651
2024-10-01    4.730801
2024-11-01    4.709666
2024-12-01    4.716886
dtype: float32