In [5]:
# Dependencies
import pandas as pd
import numpy as np

In [6]:
# Load data (FRED-MD)
df = pd.read_csv(
    '/home/js/macroeconvue/nowcasting/current.csv',
    index_col='sasdate'
    )
# Drop target variable (CPIAUCSL)
target = df['CPIAUCSL'].copy()

In [7]:
# Make the series stationary
def transform_series(series, code):
    if code == 1:
        return series  # No transformation
    elif code == 2:
        return series.diff().dropna()  # First difference
    elif code == 3:
        return series.diff().diff().dropna()  # Second difference
    elif code == 4:
        return np.log(series).dropna()  # Logarithm
    elif code == 5:
        return np.log(series).diff().dropna()  # First difference of logarithm
    elif code == 6:
        return np.log(series).diff().diff().dropna()  # Second difference of logarithm
    elif code == 7:
        return series.pct_change().dropna()  # Percentage change
    else:
        raise ValueError(f"Unknown transformation code: {code}")

transformed_data = {}
transformation_codes = df.iloc[0]  # Assuming the first row contains the codes
data = df.iloc[1:]  # The actual data starts from the second row

for column in data.columns:
    code = transformation_codes[column]
    transformed_data[column] = transform_series(data[column], code)

df = pd.DataFrame(transformed_data).dropna(how='all')  # Drop rows with all NaN values

In [8]:
from sklearn.preprocessing import StandardScaler

# Normalize the data
df = (df - df.mean()) / df.std()
# Drop rows with NaN values after transformation
df = df.dropna()
# Get subsample (everything but last year)
df = df.iloc[:-12]


In [9]:
# Apply PCA fto look for the number of components to retain
from sklearn.decomposition import PCA
# import matplotlib.pyplot as plt

# pca = PCA()
# pca.fit(df)

# cumulative_variance = pca.explained_variance_ratio_.cumsum()

# # Plot cumulative explained variance
# plt.figure(figsize=(10, 6))
# plt.plot(cumulative_variance, marker='o', linestyle='--')
# plt.xlabel('Number of Components')
# plt.ylabel('Cumulative Explained Variance')
# plt.title('Explained Variance by Number of Components')
# plt.grid(True)
# plt.show()


In [10]:
# Apply PCA to keep 90% of variance
pca = PCA(n_components=0.90)
pca.fit(df)
data = pca.transform(df)
print(f"PCA len: {len(data)}, Original: {len(df)}")

# Look at relationship between original features nad cleanred_df
loadings = pca.components_
loadings_df = pd.DataFrame(loadings.T, index=df.columns, columns=[f'PC{i+1}' for i in range(loadings.shape[0])])
display(loadings_df)

PCA len: 378, Original: 378


Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,...,PC30,PC31,PC32,PC33,PC34,PC35,PC36,PC37,PC38,PC39
RPI,0.012627,0.088826,-0.064423,-0.114434,0.043183,-0.268795,-0.309271,0.406801,-0.144272,-0.110089,...,0.083199,-0.058561,-0.002593,-0.012743,-0.032287,-0.048993,-0.087040,0.015759,0.060748,-0.013758
W875RX1,0.012698,0.125040,-0.069472,-0.065059,-0.064132,-0.113268,-0.026840,0.133875,0.002898,0.058705,...,0.127318,0.273478,-0.098445,-0.362908,-0.072006,-0.057360,0.187569,-0.071045,-0.137735,0.085632
DPCERA3M086SBEA,0.099447,0.095869,-0.099368,0.048483,0.018754,0.005826,-0.073929,0.046479,-0.046922,0.016520,...,-0.049994,-0.106202,-0.060999,0.039548,-0.059059,0.082720,-0.006850,-0.105442,-0.120503,-0.058497
CMRMTSPLx,0.072797,0.097884,-0.099025,0.030969,0.007682,-0.040521,-0.003400,0.031026,-0.041557,-0.012060,...,-0.036794,-0.078211,-0.020531,0.015241,0.008228,0.036622,-0.091646,-0.128966,-0.013479,0.017506
RETAILx,0.124735,0.067249,-0.082064,0.007449,0.014891,-0.000961,-0.115412,0.089246,-0.051885,-0.001547,...,-0.103753,-0.095476,-0.067517,0.030923,-0.096621,0.034195,-0.035685,-0.174957,-0.136561,-0.048969
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
UMCSENTx,0.024300,0.016735,-0.048159,0.037107,-0.078552,0.098853,-0.157247,0.063427,0.034978,0.100359,...,0.011880,0.321511,0.002838,0.393946,0.423744,0.048407,-0.241205,0.035267,0.097881,-0.043795
DTCOLNVHFNM,0.016784,-0.000143,-0.017665,0.101138,0.187414,0.269285,0.104807,0.348211,0.013876,-0.046519,...,-0.148101,0.071346,-0.014960,0.034650,0.069497,0.102337,0.312145,-0.086091,0.360835,-0.168801
DTCTHFNM,0.018820,-0.004483,-0.008027,0.047322,0.262802,0.312254,0.180416,0.448072,0.134205,-0.008469,...,0.075045,-0.028386,0.032730,-0.036002,-0.053509,-0.062759,-0.128561,0.084642,-0.091913,0.025542
INVEST,0.016303,-0.015234,0.004236,-0.024050,0.034324,-0.178973,0.035653,-0.040940,0.048541,0.112709,...,0.221485,-0.132140,-0.137724,-0.076187,-0.125725,0.334812,0.052262,-0.065640,-0.057748,-0.212263


In [12]:
# Prepare data for the LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

# Convert data into sequences
def create_sequences(X, y, time_steps=10):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 12  # Choose based on your data
X, y = create_sequences(data, target.values, time_steps)

# # Split into train and test
split = int(0.8 * len(X))
split = len(data) - 12  # Use the last 12 months for testing
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


In [13]:
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l2

# Build the LSTM model
model = Sequential([
    LSTM(50, activation='relu', return_sequences=True, kernel_regularizer=l2(0.01), input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, activation='relu', return_sequences=False, kernel_regularizer=l2(0.01)),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1)
])

# Compile and train
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Evaluate the model
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

2025-03-23 14:34:43.076959: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
  super().__init__(**kwargs)


Epoch 1/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - loss: 4910.3560
Epoch 2/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 3176.6904
Epoch 3/50
[1m 1/12[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - loss: 1324.5220

2025-03-23 14:34:45.520869: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-03-23 14:34:45.682266: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 1168.2935
Epoch 4/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 846.7857
Epoch 5/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 506.4486
Epoch 6/50


2025-03-23 14:34:45.995630: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 404.2965
Epoch 7/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 309.5131
Epoch 8/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 235.3949
Epoch 9/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 168.3443
Epoch 10/50
[1m 1/12[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step - loss: 136.5215

2025-03-23 14:34:46.731224: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 148.6937
Epoch 11/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 173.8573
Epoch 12/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 129.8931
Epoch 13/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 139.4882
Epoch 14/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 96.1926
Epoch 15/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 126.1105
Epoch 16/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 120.4164
Epoch 17/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 99.6067
Epoch 18/50
[1m 1/12[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - loss: 116.2284

2025-03-23 14:34:48.048355: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m-0s[0m -31438us/step - loss: 119.4588
Epoch 19/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 97.7933
Epoch 20/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 77.9023
Epoch 21/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 80.7318
Epoch 22/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 76.9491
Epoch 23/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 90.7536
Epoch 24/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 63.7607
Epoch 25/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 105.3456
Epoch 26/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 87.6969
Epoch 27/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step -

2025-03-23 14:34:50.487800: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 72.0758
Epoch 35/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 77.1307
Epoch 36/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 48.6789
Epoch 37/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 65.2697
Epoch 38/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 45.8206
Epoch 39/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 59.0567
Epoch 40/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 43.9637
Epoch 41/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 45.2865
Epoch 42/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 56.8996
Epoch 43/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 

ValueError: math domain error

In [None]:
# Evaluate performance
from sklearn.metrics import mean_squared_error
train_mse = mean_squared_error(y_train, train_predict)
test_mse = mean_squared_error(y_test, test_predict)
print(f"Train MSE: {train_mse}")
print(f"Test MSE: {test_mse}")
# Inverse transform to get actual values
# train_predict = target.iloc[:split].values + train_predict.flatten()
# test_predict = target.iloc[split:].values + test_predict.flatten()
# # Plot the results
# import matplotlib.pyplot as plt
# plt.figure(figsize=(14, 7)) 
# plt.plot(target.index[:split], target.iloc[:split], label='Train Actual', color='blue')
# plt.plot(target.index[split:], target.iloc[split:], label='Test Actual', color='orange')
# plt.plot(target.index[:split], train_predict, label='Train Predicted', color='green')
# plt.plot(target.index[split:], test_predict, label='Test Predicted', color='red')
# plt.title('LSTM Model Predictions vs Actual')
# plt.xlabel('Date')
# plt.ylabel('CPIAUCSL')
# plt.legend()
# plt.show()

Train MSE: 67.09467274807682
Test MSE: 4706.87229854542


In [None]:
# Get RMSE for the test set
from sklearn.metrics import mean_squared_error
test_rmse = np.sqrt(mean_squared_error(y_test, test_predict))
print(f"Test RMSE: {test_rmse}")
