In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Flatten, Dropout, BatchNormalization, GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
from tensorflow.keras.callbacks import EarlyStopping
from scipy.signal import medfilt
from keras.regularizers import l2
from sklearn.decomposition import PCA
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import KFold
import joblib

# import innvestigate
# import innvestigate.utils as iutils


In [None]:
pip install innvestigate

In [None]:
pip install scikeras

In [None]:
!pip uninstall -y scikit-learn
!pip install scikit-learn==1.5.2

In [None]:
dataset='/content/MeanderingInterploatedUpdated.csv'
df=pd.read_csv(dataset)

In [None]:
df['year'] = df['name'].apply(lambda x: int(x.split('-')[0]))
df['quarter'] = df['name'].apply(lambda x: int(x.split('-')[1]))

# df_encoded = pd.get_dummies(df, columns=['quarter'], drop_first=True)


In [None]:
targets = ['c1_dist', 'c2_dist', 'c3_dist', 'c4_dist','c7_dist','c8_dist']
ts= df[targets]

In [None]:
for i in ts.columns:
  ts[i]=medfilt(ts[i], kernel_size=3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ts[i]=medfilt(ts[i], kernel_size=3)


In [None]:
scaler_ts = StandardScaler()
ts_normalized=scaler_ts.fit_transform(ts)

In [None]:
def add_time_features(df, scaler):
    # Cyclical encoding for quarter
    df['quarter_sin'] = np.sin(2 * np.pi * df['quarter'] / 4)
    df['quarter_cos'] = np.cos(2 * np.pi * df['quarter'] / 4)

    # # Cyclical encoding for year (you can normalize the year value if needed)
    # df['year_sin'] = np.sin(2 * np.pi * (df['year'] - df['year'].min()) / (df['year'].max() - df['year'].min()))
    # df['year_cos'] = np.cos(2 * np.pi * (df['year'] - df['year'].min()) / (df['year'].max() - df['year'].min()))

    df['year_scaled'] = scaler.fit_transform(df[['year']])  # Use double brackets to make it 2D    return df

    return df

scaler_year=StandardScaler()
df = add_time_features(df, scaler_year)


In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,name,c1_dist,c2_dist,c3_dist,c4_dist,c5_dist,c6_dist,c7_dist,c8_dist,year,quarter,quarter_sin,quarter_cos,year_scaled
0,0,1988-1,80.622577,2.236068,1118.456079,1087.443332,2572.616567,2748.070232,2197.682643,2400.496824,1988,1,1.0,6.123234000000001e-17,-1.67816
1,1,1988-2,89.201242,8.732515,1112.007378,1089.762294,2571.456646,2747.082249,2190.347995,2392.594177,1988,2,1.224647e-16,-1.0,-1.67816
2,2,1988-3,90.426766,10.77033,1091.650127,1090.093574,2567.532084,2743.753998,2166.673026,2367.186516,1988,3,-1.0,-1.83697e-16,-1.67816
3,3,1988-4,75.0,8.062258,892.45336,968.810095,2475.42259,2671.696465,1879.447259,2071.386251,1988,4,-2.449294e-16,1.0,-1.67816
4,4,1989-1,75.485233,8.156297,852.133189,926.992124,2424.462205,2622.730393,1848.25087,2040.697558,1989,1,1.0,6.123234000000001e-17,-1.584681


In [None]:
for col in ts.columns:
    series = df[col]

    # Additive decomposition
    additive_result = seasonal_decompose(series, model="additive", period=7)
    # Multiplicative decomposition
    multiplicative_result = seasonal_decompose(series, model="multiplicative", period=7)

    # Plot additive decomposition
    plt.figure(figsize=(12, 8))
    plt.suptitle(f"Additive Decomposition for {col}", fontsize=16)
    plt.subplot(411)
    plt.plot(series, label="Observed")
    plt.legend(loc="upper left")
    plt.subplot(412)
    plt.plot(additive_result.trend, label="Trend")
    plt.legend(loc="upper left")
    plt.subplot(413)
    plt.plot(additive_result.seasonal, label="Seasonal")
    plt.legend(loc="upper left")
    plt.subplot(414)
    plt.plot(additive_result.resid, label="Residual")
    plt.legend(loc="upper left")
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

    # Plot multiplicative decomposition
    plt.figure(figsize=(12, 8))
    plt.suptitle(f"Multiplicative Decomposition for {col}", fontsize=16)
    plt.subplot(411)
    plt.plot(series, label="Observed")
    plt.legend(loc="upper left")
    plt.subplot(412)
    plt.plot(multiplicative_result.trend, label="Trend")
    plt.legend(loc="upper left")
    plt.subplot(413)
    plt.plot(multiplicative_result.seasonal, label="Seasonal")
    plt.legend(loc="upper left")
    plt.subplot(414)
    plt.plot(multiplicative_result.resid, label="Residual")
    plt.legend(loc="upper left")
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

In [None]:
# redundant_features= pd.concat([ts, ts_ema], axis=1)
# redundant_features_normalized=scaler.fit_transform(redundant_features)

In [None]:
pca = PCA()
pca_components = pca.fit_transform(ts_normalized)

# Explained variance ratio for each component
explained_variance = pca.explained_variance_ratio_

# Cumulative explained variance
cumulative_variance = explained_variance.cumsum()

# Print explained variance for each component
for i, var in enumerate(explained_variance):
    print(f"Component {i+1}: {var:.2%} explained variance")
print(f"Cumulative explained variance: {cumulative_variance}")

Component 1: 61.97% explained variance
Component 2: 32.01% explained variance
Component 3: 5.34% explained variance
Component 4: 0.56% explained variance
Component 5: 0.11% explained variance
Component 6: 0.01% explained variance
Cumulative explained variance: [0.61974363 0.93986644 0.99325438 0.99887277 0.99992648 1.        ]


In [None]:
n_components = (cumulative_variance < 0.98).sum() + 1
print(f"Number of components to retain: {n_components}")

Number of components to retain: 3


In [None]:
pca = PCA(n_components=n_components)
reduced_features = pca.fit_transform(ts_normalized)

reduced_features_df = pd.DataFrame(reduced_features, columns=[f"PC{i+1}" for i in range(n_components)])


In [None]:
def create_windows(features, output, time_features, input_steps):
    X, y = [], []
    for i in range(len(features) - input_steps):
        # Use the features of the current window, including cyclical time features
        X.append(np.concatenate([features[i:i + input_steps], time_features[i:i + input_steps]], axis=1))

        # Output only the features of the next step (output at i + input_steps)
        y.append(output[i + input_steps])

        if i == 0:  # Debugging information for the first window
            print(f"Time features (1st window):\n{time_features[i:i + input_steps]}")
            print(f"Input features (1st window):\n{X[0]}")
            print(f"Output features (1st step after input window):\n{y[0]}")

    return np.array(X), np.array(y)


# Assuming `data` is your time series data, and `time_features` is the DataFrame containing the cyclical time features
input_steps = 4
output_steps = 1


In [None]:
time_features = df[['quarter_sin', 'quarter_cos', 'year_scaled']].values
print(f'time_feat:{time_features.shape}')
scaler_time=StandardScaler()
normalized_time_data = scaler_time.fit_transform(time_features)

X, y = create_windows(reduced_features_df, ts_normalized ,normalized_time_data, input_steps)

time_feat:(147, 3)
Time features (1st window):
[[ 1.40942772e+00  9.65384094e-03 -1.67815976e+00]
 [ 1.90920617e-16 -1.40946078e+00 -1.67815976e+00]
 [-1.40942772e+00  9.65384094e-03 -1.67815976e+00]
 [-3.26894726e-16  1.42876846e+00 -1.67815976e+00]]
Input features (1st window):
[[ 2.65728213e+00 -2.27517620e+00  1.11816363e+00  1.40942772e+00
   9.65384094e-03 -1.67815976e+00]
 [ 2.73707612e+00 -1.91175990e+00  1.13536030e+00  1.90920617e-16
  -1.40946078e+00 -1.67815976e+00]
 [ 2.42255980e+00 -1.84368953e+00  9.82191848e-01 -1.40942772e+00
   9.65384094e-03 -1.67815976e+00]
 [-1.80028181e+00 -1.34339584e+00 -3.67389752e-01 -3.26894726e-16
   1.42876846e+00 -1.67815976e+00]]
Output features (1st step after input window):
[-1.42345209 -0.93232542 -1.26601734 -0.78307871 -1.20903361 -1.12829837]


In [None]:
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [None]:
print(y_train.shape)

(114, 6)


In [None]:
# Define the TCN model
model = Sequential()

# Add Conv1D layers with padding to prevent size issues
model.add(Conv1D(8, 3, activation='relu', padding='same', input_shape=(input_steps, 6), kernel_regularizer=l2(0.06)))
model.add(BatchNormalization())
model.add(Dropout(0.05))
model.add(Conv1D(16, 3, activation='relu', padding='same', kernel_regularizer=l2(0.06)))
model.add(BatchNormalization())
model.add(Dropout(0.05))
# model.add(Conv1D(8, 3, activation='relu', padding='same', kernel_regularizer=l2(0.06)))
# model.add(BatchNormalization())
# model.add(Dropout(0.03))

# Use Global Average Pooling to collapse the time dimension
model.add(GlobalAveragePooling1D())

# Dense layer for predicting the next step (output features for the next step)
model.add(Dense(6, kernel_regularizer=l2(0.08)))  # Assuming 8 features in the output for a single step

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae', 'accuracy'])

# Print model summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:

# lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=1e-6)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
# history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

kfold = KFold(n_splits=6, shuffle=True)
for train_idx, val_idx in kfold.split(X_train, y_train):
    X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
    y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]
    model.fit(X_train_fold, y_train_fold, validation_data=(X_val_fold, y_val_fold), epochs=100, callbacks=[early_stopping])


Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2s/step - accuracy: 0.2775 - loss: 2.8358 - mae: 0.7552 - val_accuracy: 0.1579 - val_loss: 2.6797 - val_mae: 0.7654
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.3062 - loss: 2.7719 - mae: 0.7273 - val_accuracy: 0.1579 - val_loss: 2.6342 - val_mae: 0.7497
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.3415 - loss: 2.7314 - mae: 0.7235 - val_accuracy: 0.1053 - val_loss: 2.5903 - val_mae: 0.7340
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.2685 - loss: 2.6378 - mae: 0.6923 - val_accuracy: 0.1053 - val_loss: 2.5493 - val_mae: 0.7190
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.3311 - loss: 2.4822 - mae: 0.6472 - val_accuracy: 0.1579 - val_loss: 2.5094 - val_mae: 0.7042
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━

In [None]:
# plt.plot(history.history['loss'], label='Training Loss')
# plt.plot(history.history['val_loss'], label='Validation Loss')
# plt.legend()
# # plt.show()

In [None]:
# plt.plot(history.history['accuracy'], label='Training accuracy')
# plt.plot(history.history['val_accuracy'], label='Validation accuracy')
# plt.legend()
# plt.show()

In [None]:
test_loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.0000e+00 - loss: 1.2038 - mae: 0.8073
Test Loss: [1.2037951946258545, 0.8073081970214844, 0.0]


In [None]:
# Predict on Testing data
y_pred_test = model.predict(X_test)

# Flatten the predictions and actual values
y_test_flat = y_test.flatten()
y_pred_test_flat = y_pred_test.flatten()

# Calculate MAE and MSE
mae_test = mean_absolute_error(y_test_flat, y_pred_test_flat)
mse_test = mean_squared_error(y_test_flat, y_pred_test_flat)

# Calculate the range of the actual data
data_range = y_test_flat.max() - y_test_flat.min()

# Convert MAE and MSE to percentage of the data range
mae_percentage = (mae_test / data_range) * 100
mse_percentage = (mse_test / data_range) * 100

# Print the results
print(f"Testing MAE: {mae_test:.4f}")
print(f"Testing MAE as % of Data Range: {mae_percentage:.2f}%")
print(f"Testing MSE: {mse_test:.4f}")
print(f"Testing MSE as % of Data Range: {mse_percentage:.2f}%")

# Optionally include R² for completeness
r2_train = r2_score(y_test_flat, y_pred_test_flat)
print(f"Testing R²: {r2_train:.4f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step
Testing MAE: 0.4052
Testing MAE as % of Data Range: 10.25%
Testing MSE: 0.3085
Testing MSE as % of Data Range: 7.81%
Testing R²: 0.5958


In [None]:
# Predict on training data
y_pred_train = model.predict(X_train)

# Flatten the predictions and actual values
y_train_flat = y_train.flatten()
y_pred_train_flat = y_pred_train.flatten()

# Calculate MAE and MSE
mae_train = mean_absolute_error(y_train_flat, y_pred_train_flat)
mse_train = mean_squared_error(y_train_flat, y_pred_train_flat)

# Calculate the range of the actual data
data_range = y_train_flat.max() - y_train_flat.min()

# Convert MAE and MSE to percentage of the data range
mae_percentage = (mae_train / data_range) * 100
mse_percentage = (mse_train / data_range) * 100

# Print the results
print(f"Training MAE: {mae_train:.4f}")
print(f"Training MAE as % of Data Range: {mae_percentage:.2f}%")
print(f"Training MSE: {mse_train:.4f}")
print(f"Training MSE as % of Data Range: {mse_percentage:.2f}%")

# Optionally include R² for completeness
r2_train = r2_score(y_train_flat, y_pred_train_flat)
print(f"Training R²: {r2_train:.4f}")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Training MAE: 0.2740
Training MAE as % of Data Range: 4.04%
Training MSE: 0.1411
Training MSE as % of Data Range: 2.08%
Training R²: 0.8553


In [None]:
joblib.dump(model, '0.85_0.51_filt3_6feat.joblib')

['0.85_0.51_filt3_6feat.joblib']

In [None]:
# Calculate the mean of each feature in the training set
mean_baseline = np.mean(X_train[:, :, :], axis=(0, 1))  # Mean across all timesteps and windows for each feature

# Use the mean of each feature as the baseline prediction for every test sample
y_pred_baseline = np.tile(mean_baseline, (y_test.shape[0], 1))  # Repeat the mean for each test sample

# Evaluate the Baseline model
mse_baseline = mean_squared_error(y_test.flatten(), y_pred_baseline.flatten())
mae_baseline = mean_absolute_error(y_test.flatten(), y_pred_baseline.flatten())
r2_baseline = r2_score(y_test.flatten(), y_pred_baseline.flatten())

print(f"Baseline Model MSE: {mse_baseline:.4f}")
print(f"Baseline Model R2: {r2_baseline:.4f}")
print(f"Baseline Model MAE: {mae_baseline:.4f}")


Baseline Model MSE: 1.2759
Baseline Model R2: -0.6716
Baseline Model MAE: 0.8079


In [None]:
def get_new_time(year, quarter):
  no_of_years=year-2024
  no_of_q=((no_of_years-1)*4)+quarter

  years=[]
  quarters=[]

  if no_of_years==1:
    for i in range(no_of_q):
      quarters.append(i+1)
      years.append(2025)
  else:
     for i in range(no_of_years-1):
      quarters.extend([1,2,3,4])
      for j in range(4):
        years.append(2024+i+1)

      latest=years[-1]

      for i in range(quarter):
        quarters.append(i+1)
        years.append(latest+1)

  return years, quarters, len(quarters)

In [None]:
years, quarters, nstep=get_new_time(2028, 4)

In [None]:

def predict_beyond_test_set(model, last_known_input, n_steps, pca, years, quarters, scaler_year ):
    """
    Predicts beyond the test set by iteratively predicting the next step.

    Parameters:
    - model: Trained model.
    - last_known_input: Last known input data (shape should be (input_steps, num_input_features)).
    - n_steps: Number of steps to predict beyond the test set.
    - scaler: If you need to reverse scale the predictions, provide the scaler (optional).

    Returns:
    - predictions: Predicted values for the next n steps.
    """
    predictions = []
    current_input = last_known_input
    time_df=pd.DataFrame({'year': years, 'quarter': quarters})
    time_df=add_time_features(time_df, scaler_year)
    time_features = time_df[['quarter_sin', 'quarter_cos', 'year_scaled']].values


    for _ in range(n_steps):
        print(f'timesep:{_}')

        # Make prediction for the next step
        if _ ==0:
          pred = model.predict(np.expand_dims(current_input, axis=0))  # Shape (1, input_steps, num_input_features)

          predictions.append(pred.flatten())  # Flatten to get a 1D prediction
        elif _==1:
          redundant_pred=predictions[-1]
          pca_feat=pca.transform(redundant_pred.reshape(1, -1))
          time=time_features[0]
          time_reshaped = time.reshape(1, -1)  # Shape: (1, n_time_features)

          # Concatenate pca_feat and time_reshaped along axis 1
          concatenated = np.concatenate([pca_feat, time_reshaped], axis=1)
          last_known=last_known_input[-3:]
          final_array = np.vstack([last_known, concatenated])
          pred=model.predict(np.expand_dims(final_array, axis=0))
          predictions.append(pred.flatten())

        elif _==2:
          redundant_pred=predictions
          pca_feat=pca.transform(redundant_pred)
          time=time_features[:2]
          # time_reshaped = time.reshape(1, -1)  # Shape: (1, n_time_features)

          # # Concatenate pca_feat and time_reshaped along axis 1
          concatenated = np.concatenate([pca_feat, time], axis=1)
          last_known=last_known_input[-2:]
          final_array = np.vstack([last_known, concatenated])
          pred=model.predict(np.expand_dims(final_array, axis=0))
          predictions.append(pred.flatten())
        elif _==3:
          redundant_pred=predictions
          pca_feat=pca.transform(redundant_pred)
          time=time_features[:3]
          # time_reshaped = time.reshape(1, -1)  # Shape: (1, n_time_features)

          # # Concatenate pca_feat and time_reshaped along axis 1
          concatenated = np.concatenate([pca_feat, time], axis=1)
          last_known=last_known_input[-1:]
          final_array = np.vstack([last_known, concatenated])
          pred=model.predict(np.expand_dims(final_array, axis=0))
          predictions.append(pred.flatten())
        else:
          redundant_pred=predictions[-4:]
          pca_feat=pca.transform(redundant_pred)
          time=time_features[(_-3):_+1, :]
          concatenated = np.concatenate([pca_feat, time], axis=1)
          pred=model.predict(np.expand_dims(concatenated, axis=0))
          predictions.append(pred.flatten())


          # print(time_df.iloc[_])

    return np.array(predictions)

# Example usage:
n_steps = nstep

# print(n_steps)

# Assume `X_test[-1]` is the last input sequence from the test set
last_known_input = X_test[-1]

# Predict the next 20 steps
predictions = predict_beyond_test_set(model, last_known_input, n_steps, pca, years, quarters, scaler_year)

# Print the predictions
# print("Predictions for the next 20 steps:", predictions)


timesep:0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 311ms/step
timesep:1
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
timesep:2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
timesep:3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
timesep:4
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
timesep:5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
timesep:6
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
timesep:7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
timesep:8
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
timesep:9
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
timesep:10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
timesep:11
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
timesep:12
[

In [None]:
unscaled_pred=scaler_ts.inverse_transform(predictions)

predictions_df=pd.DataFrame({'year': years, 'quarter': quarters})
for i, col in enumerate(targets):
  predictions_df[col] = unscaled_pred[:, i]

# print(unscaled_pred.shape)  # Check the shape of unscaled_pred
# print(predictions_df.shape)  # Check the shape of predictions_df

# predictions_df.tail()


In [None]:
predictions_df.tail()

Unnamed: 0,year,quarter,c1_dist,c2_dist,c3_dist,c4_dist,c5_dist,c6_dist
19,2027,4,104.15126,26.049616,1040.378296,1042.276611,2517.932373,2680.664307
20,2028,1,109.507614,32.130676,1035.160767,1039.195435,2506.40625,2664.849609
21,2028,2,116.626785,41.385876,1026.501221,1034.259766,2493.473145,2649.077393
22,2028,3,125.237274,51.685062,1020.816101,1034.456665,2509.317139,2677.935303
23,2028,4,135.032623,60.943981,1020.519897,1034.253296,2517.299561,2692.717529


In [None]:
# Flatten the variances from 2D to 1D
train_variances = np.var(y_train, axis=0).flatten()  # Variance for each time series in the training set
test_variances = np.var(y_test, axis=0).flatten()    # Variance for each time series in the test set

# Check if the shapes are now 1D
print(f"Shape of train_variances: {train_variances.shape}")
print(f"Shape of test_variances: {test_variances.shape}")

# Generate time series indices (1, 2, ..., num_features)
time_series_indices = np.arange(1, y_train.shape[1] + 1)

# Plot the variances for both y_train and y_test
plt.figure(figsize=(10, 6))
plt.bar(time_series_indices - 0.2, train_variances, width=0.4, label='Train Variance', color='blue', alpha=0.7)
plt.bar(time_series_indices + 0.2, test_variances, width=0.4, label='Test Variance', color='red', alpha=0.7)

plt.xlabel('Time Series')
plt.ylabel('Variance')
plt.title('Variance of Time Series in Training and Test Data')
plt.xticks(time_series_indices)  # Set x-ticks to the time series indices
plt.legend()
plt.tight_layout()
plt.show()

print(f'train var; {train_variances}')
print(f'test var; {test_variances}')

In [None]:

analyzer = innvestigate.create_analyzer("lrp", model)

# Sample input data (e.g., one instance from your test set)
# X_test shape: (num_samples, input_steps, num_input_features)
sample_input = X_test[0:1]  # Select the first test sample, shape: (1, input_steps, num_input_features)

# Predict the output for the sample
prediction = model.predict(sample_input)

# Perform LRP to analyze the relevance of the input
relevance = analyzer.analyze(sample_input)

# The 'relevance' array has the same shape as the input (1, input_steps, num_input_features)
print("Input shape:", sample_input.shape)
print("Relevance shape:", relevance.shape)


In [None]:

# Model_lstm
model_lstm = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, activation='relu', return_sequences=True, input_shape=(4, 6)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(6)  # Output layer
])

model_lstm.compile(optimizer='adam', loss='mse', metrics=['mae'])
model_lstm.summary()

history = model_lstm.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1)




  super().__init__(**kwargs)


Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - loss: 1.0695 - mae: 0.7759 - val_loss: 1.1478 - val_mae: 0.6863
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - loss: 0.9612 - mae: 0.7421 - val_loss: 1.1595 - val_mae: 0.6904
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.9760 - mae: 0.7492 - val_loss: 1.1695 - val_mae: 0.6928
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.8460 - mae: 0.6893 - val_loss: 1.1790 - val_mae: 0.6934
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.7646 - mae: 0.6571 - val_loss: 1.1869 - val_mae: 0.6923
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.6913 - mae: 0.6262 - val_loss: 1.1948 - val_mae: 0.6898
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.6853 - mae: 0.6

In [None]:
# Predictions
y_pred = model_lstm.predict(X_test)


# Evaluation Metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R2 Score: {r2:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 951ms/step
MSE: 1.0831
MAE: 0.7685
R2 Score: -3.6281
