In [1]:
# --- LSTM Model for Tp1–Tp13 ---
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
import pickle

# Load data
ftraindf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/benchmark_C38U.SI_traindf.csv')
fvaldf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/benchmark_C38U.SI_valdf.csv')
ftestdf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/benchmark_C38U.SI_testdf.csv')
with open('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/features_selected.pkl', 'rb') as handle:
    features_selected = pickle.load(handle)

SEQ_LENGTH = 5
EPOCHS = 50
BATCH_SIZE = 16

results = []

for cluster_id in range(1, 14):
    try:
        print(f'🔁 Training cluster Tp{cluster_id}')
        features = features_selected[cluster_id] + ['yref_Tm0_close']
        ft = ftraindf[features]
        fv = fvaldf[features]
        fs = ftestdf[features]
        all_data = pd.concat([ft, fv, fs])
        scaler = MinMaxScaler()
        scaled_all = scaler.fit_transform(all_data)

        train_len, val_len = len(ft), len(fv)
        train_scaled = scaled_all[:train_len]
        val_scaled = scaled_all[train_len:train_len+val_len]
        test_scaled = scaled_all[train_len+val_len:]

        def create_sequences(data, seq_length=5):
            X, y = [], []
            for i in range(len(data) - seq_length):
                X.append(data[i:i+seq_length])
                y.append(data[i+seq_length][-1])
            return np.array(X), np.array(y)

        X_train, y_train = create_sequences(train_scaled, SEQ_LENGTH)
        X_val, y_val = create_sequences(val_scaled, SEQ_LENGTH)
        X_test, y_test = create_sequences(test_scaled, SEQ_LENGTH)

        model = Sequential([
            LSTM(64, return_sequences=True, input_shape=(SEQ_LENGTH, X_train.shape[2])),
            Dropout(0.2),
            LSTM(64),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)

        y_pred_scaled = model.predict(X_test)
        zero_pad = np.zeros((len(y_pred_scaled), len(features)-1))
        y_pred_combined = np.hstack((zero_pad, y_pred_scaled))
        y_pred = scaler.inverse_transform(y_pred_combined)[:, -1]
        y_true = fs['yref_Tm0_close'].values[SEQ_LENGTH:]

        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        r2 = r2_score(y_true, y_pred)

        print(f'✅ Tp{cluster_id} → RMSE: {rmse:.4f} | R²: {r2:.4f}')
        results.append({'Tp': f'Tp{cluster_id}', 'RMSE': rmse, 'R2': r2})

    except Exception as e:
        print(f'⚠️ Tp{cluster_id} failed: {e}')
        results.append({'Tp': f'Tp{cluster_id}', 'RMSE': None, 'R2': None, 'Error': str(e)})

lstm_results_df = pd.DataFrame(results)
print('\n📊 Summary:')
print(lstm_results_df)
print(f'📈 Overall Mean R²: {lstm_results_df["R2"].mean():.4f}')
lstm_results_df.to_csv('lstm_non_fuzzy_results.csv', index=False)


2025-04-01 00:56:21.563744: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-01 00:56:21.592583: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743440181.623231  170971 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743440181.632232  170971 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743440181.655368  170971 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

🔁 Training cluster Tp1


2025-04-01 00:56:25.090158: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step
✅ Tp1 → RMSE: 0.0379 | R²: 0.9295
🔁 Training cluster Tp2


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
✅ Tp2 → RMSE: 0.0575 | R²: 0.8374
🔁 Training cluster Tp3


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step
✅ Tp3 → RMSE: 0.0640 | R²: 0.7990
🔁 Training cluster Tp4


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
✅ Tp4 → RMSE: 0.0752 | R²: 0.7221
🔁 Training cluster Tp5


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
✅ Tp5 → RMSE: 0.0524 | R²: 0.8651
🔁 Training cluster Tp6


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
✅ Tp6 → RMSE: 0.0533 | R²: 0.8606
🔁 Training cluster Tp7


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
✅ Tp7 → RMSE: 0.0621 | R²: 0.8104
🔁 Training cluster Tp8


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
✅ Tp8 → RMSE: 0.0546 | R²: 0.8537
🔁 Training cluster Tp9


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
✅ Tp9 → RMSE: 0.0354 | R²: 0.9384
🔁 Training cluster Tp10


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
✅ Tp10 → RMSE: 0.0606 | R²: 0.8197
🔁 Training cluster Tp11


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
✅ Tp11 → RMSE: 0.0367 | R²: 0.9339
🔁 Training cluster Tp12


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
✅ Tp12 → RMSE: 0.0425 | R²: 0.9114
🔁 Training cluster Tp13


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
✅ Tp13 → RMSE: 0.0578 | R²: 0.8361

📊 Summary:
      Tp      RMSE        R2
0    Tp1  0.037871  0.929533
1    Tp2  0.057530  0.837386
2    Tp3  0.063954  0.799042
3    Tp4  0.075204  0.722123
4    Tp5  0.052393  0.865130
5    Tp6  0.053257  0.860644
6    Tp7  0.062124  0.810378
7    Tp8  0.054571  0.853684
8    Tp9  0.035413  0.938384
9   Tp10  0.060571  0.819742
10  Tp11  0.036666  0.933947
11  Tp12  0.042472  0.911372
12  Tp13  0.057765  0.836056
📈 Overall Mean R²: 0.8552


In [4]:
# --- LSTM Model for Tp1–Tp13 ---
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
import pickle

# Load data
ftraindf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/ftraindf.csv')
fvaldf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/fvaldf.csv')
ftestdf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/ftestdf.csv')
with open('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/features_selected.pkl', 'rb') as handle:
    features_selected = pickle.load(handle)

SEQ_LENGTH = 5
EPOCHS = 50
BATCH_SIZE = 16

results = []

for cluster_id in range(1, 14):
    try:
        print(f'🔁 Training cluster Tp{cluster_id}')
        cluster_suffix = f'_c{cluster_id - 1}'
        mapped_features = [f + cluster_suffix for f in features_selected[cluster_id] if f + cluster_suffix in ftraindf.columns]
        features = mapped_features + ['yref_Tm0_close']

        # Optional: warn if any features are missing
        missing = [f for f in features_selected[cluster_id] if f + cluster_suffix not in ftraindf.columns]
        if missing:
            print(f"⚠️ Tp{cluster_id} → Missing features: {missing}")

        ft = ftraindf[features]
        fv = fvaldf[features]
        fs = ftestdf[features]
        all_data = pd.concat([ft, fv, fs])
        scaler = MinMaxScaler()
        scaled_all = scaler.fit_transform(all_data)

        train_len, val_len = len(ft), len(fv)
        train_scaled = scaled_all[:train_len]
        val_scaled = scaled_all[train_len:train_len+val_len]
        test_scaled = scaled_all[train_len+val_len:]

        def create_sequences(data, seq_length=5):
            X, y = [], []
            for i in range(len(data) - seq_length):
                X.append(data[i:i+seq_length])
                y.append(data[i+seq_length][-1])
            return np.array(X), np.array(y)

        X_train, y_train = create_sequences(train_scaled, SEQ_LENGTH)
        X_val, y_val = create_sequences(val_scaled, SEQ_LENGTH)
        X_test, y_test = create_sequences(test_scaled, SEQ_LENGTH)

        model = Sequential([
            LSTM(64, return_sequences=True, input_shape=(SEQ_LENGTH, X_train.shape[2])),
            Dropout(0.2),
            LSTM(64),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)

        y_pred_scaled = model.predict(X_test)
        zero_pad = np.zeros((len(y_pred_scaled), len(features)-1))
        y_pred_combined = np.hstack((zero_pad, y_pred_scaled))
        y_pred = scaler.inverse_transform(y_pred_combined)[:, -1]
        y_true = fs['yref_Tm0_close'].values[SEQ_LENGTH:]

        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        r2 = r2_score(y_true, y_pred)

        print(f'✅ Tp{cluster_id} → RMSE: {rmse:.4f} | R²: {r2:.4f}')
        results.append({'Tp': f'Tp{cluster_id}', 'RMSE': rmse, 'R2': r2})

    except Exception as e:
        print(f'⚠️ Tp{cluster_id} failed: {e}')
        results.append({'Tp': f'Tp{cluster_id}', 'RMSE': None, 'R2': None, 'Error': str(e)})

results_df = pd.DataFrame(results)
print('\n📊 Summary:')
print(results_df)
print(f'📈 Overall Mean R²: {results_df["R2"].mean():.4f}')
results_df.to_csv('lstm_fuzzy_results.csv', index=False)

🔁 Training cluster Tp1


2025-04-01 11:53:25.036739: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
✅ Tp1 → RMSE: 0.0581 | R²: 0.8343
🔁 Training cluster Tp2


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
✅ Tp2 → RMSE: 0.0833 | R²: 0.6588
🔁 Training cluster Tp3


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
✅ Tp3 → RMSE: 0.0808 | R²: 0.6796
🔁 Training cluster Tp4


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
✅ Tp4 → RMSE: 0.0945 | R²: 0.5613
🔁 Training cluster Tp5


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
✅ Tp5 → RMSE: 0.0767 | R²: 0.7113
🔁 Training cluster Tp6


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
✅ Tp6 → RMSE: 0.0539 | R²: 0.8573
🔁 Training cluster Tp7


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
✅ Tp7 → RMSE: 0.0363 | R²: 0.9353
🔁 Training cluster Tp8
⚠️ Tp8 → Missing features: ['x_Tm12_PriceChg', 'x_Tm4_PRoc1', 'x_Tm10_VolChg', 'x_Tm1_PriceChg', 'x_Tm3_PriceChg', 'x_Tm5_PRoc1', 'x_Tm7_VolChg', 'x_Tm13_VolChg', 'x_Tm7_PRoc1', 'x_Tm12_VolChg', 'x_Tm1_VolChg', 'x_Tm5_PriceChg', 'x_Tm6_PriceChg', 'x_Tm9_PRoc1', 'x_Tm11_PRoc1', 'x_Tm8_PriceChg', 'x_Tm4_PriceChg', 'x_Tm13_PriceChg', 'x_Tm7_PriceChg', 'x_Tm11_PriceChg', 'x_Tm10_PriceChg']


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
✅ Tp8 → RMSE: 0.0463 | R²: 0.8948
🔁 Training cluster Tp9
⚠️ Tp9 → Missing features: ['x_Tm4_VolChg', 'x_Tm1_PriceChg', 'x_Tm13_VolChg', 'x_Tm6_VolChg', 'x_Tm9_PRoc1', 'x_Tm4_PRoc1', 'x_Tm7_PRoc1', 'x_Tm13_PriceChg', 'x_Tm4_PriceChg', 'x_Tm8_PriceChg', 'x_Tm10_VolChg', 'x_Tm6_PRoc1', 'x_Tm1_PRoc1', 'x_Tm10_PRoc1', 'x_Tm12_PRoc1', 'x_Tm7_VolChg', 'x_Tm3_PriceChg', 'x_Tm1_VolChg', 'x_Tm6_PriceChg']


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
✅ Tp9 → RMSE: 0.0284 | R²: 0.9603
🔁 Training cluster Tp10
⚠️ Tp10 → Missing features: ['x_Tm1_PriceChg', 'x_Tm4_VolChg', 'x_Tm2_PriceChg', 'x_Tm13_PriceChg', 'x_Tm6_PriceChg', 'x_Tm12_PRoc1', 'x_Tm13_VolChg', 'x_Tm3_PRoc1', 'x_Tm5_PRoc1', 'x_Tm10_PRoc1', 'x_Tm8_PriceChg', 'x_Tm5_VolChg', 'x_Tm9_PriceChg', 'x_Tm11_VolChg', 'x_Tm11_PRoc1', 'x_Tm6_PRoc1']


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
✅ Tp10 → RMSE: 0.0302 | R²: 0.9552
🔁 Training cluster Tp11
⚠️ Tp11 → Missing features: ['x_Tm9_VolChg', 'x_Tm2_PriceChg', 'x_Tm1_PriceChg', 'x_Tm5_VolChg', 'x_Tm5_PriceChg', 'x_Tm7_VolChg', 'x_Tm11_VolChg', 'x_Tm9_PRoc1', 'x_Tm13_PriceChg', 'x_Tm7_PRoc1', 'x_Tm6_VolChg', 'x_Tm6_PriceChg', 'x_Tm10_VolChg', 'x_Tm6_PRoc1', 'x_Tm11_PriceChg', 'x_Tm10_PriceChg', 'x_Tm4_VolChg', 'x_Tm8_PRoc1', 'x_Tm7_PriceChg', 'x_Tm12_PRoc1']


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
✅ Tp11 → RMSE: 0.0304 | R²: 0.9545
🔁 Training cluster Tp12
⚠️ Tp12 → Missing features: ['x_Tm12_VolChg', 'x_Tm6_PRoc1', 'x_Tm8_VolChg', 'x_Tm10_PriceChg', 'x_Tm5_PRoc1', 'x_Tm7_PRoc1', 'x_Tm10_VolChg', 'x_Tm5_VolChg', 'x_Tm2_PriceChg', 'x_Tm10_PRoc1', 'x_Tm13_PriceChg', 'x_Tm3_PRoc1', 'x_Tm6_VolChg', 'x_Tm9_PRoc1', 'x_Tm9_PriceChg', 'x_Tm12_PRoc1', 'x_Tm8_PriceChg', 'x_Tm5_PriceChg']


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
✅ Tp12 → RMSE: 0.0291 | R²: 0.9583
🔁 Training cluster Tp13
⚠️ Tp13 → Missing features: ['x_Tm12_VolChg', 'x_Tm4_PRoc1', 'x_Tm1_PriceChg', 'x_Tm3_PriceChg', 'x_Tm1_PRoc1', 'x_Tm5_VolChg', 'x_Tm6_PRoc1', 'x_Tm13_PRoc1', 'x_Tm11_PriceChg', 'x_Tm7_PRoc1', 'x_Tm10_PriceChg', 'x_Tm9_PriceChg', 'x_Tm11_VolChg', 'x_Tm6_PriceChg', 'x_Tm6_VolChg', 'x_Tm3_PRoc1']


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
✅ Tp13 → RMSE: 0.0307 | R²: 0.9538

📊 Summary:
      Tp      RMSE        R2
0    Tp1  0.058069  0.834336
1    Tp2  0.083340  0.658766
2    Tp3  0.080759  0.679572
3    Tp4  0.094499  0.561266
4    Tp5  0.076655  0.711310
5    Tp6  0.053889  0.857325
6    Tp7  0.036302  0.935255
7    Tp8  0.046271  0.894811
8    Tp9  0.028444  0.960251
9   Tp10  0.030207  0.955172
10  Tp11  0.030433  0.954498
11  Tp12  0.029145  0.958266
12  Tp13  0.030680  0.953756
📈 Overall Mean R²: 0.8396


In [3]:
print(ftraindf.columns.tolist())
print(features_selected[1])


['Date', 'yref_Tm0_close', 'High_c0', 'High_c1', 'High_c2', 'High_c3', 'High_c4', 'High_c5', 'High_c6', 'Low_c0', 'Low_c1', 'Low_c2', 'Low_c3', 'Low_c4', 'Low_c5', 'Low_c6', 'Open_c0', 'Open_c1', 'Open_c2', 'Open_c3', 'Open_c4', 'Open_c5', 'Open_c6', 'x_Tm1_PriceChg_c0', 'x_Tm1_PriceChg_c1', 'x_Tm1_PriceChg_c2', 'x_Tm1_PriceChg_c3', 'x_Tm1_PriceChg_c4', 'x_Tm1_PriceChg_c5', 'x_Tm1_PriceChg_c6', 'x_Tm1_VolChg_c0', 'x_Tm1_VolChg_c1', 'x_Tm1_VolChg_c2', 'x_Tm1_VolChg_c3', 'x_Tm1_VolChg_c4', 'x_Tm1_VolChg_c5', 'x_Tm1_VolChg_c6', 'x_Tm1_PRoc1_c0', 'x_Tm1_PRoc1_c1', 'x_Tm1_PRoc1_c2', 'x_Tm1_PRoc1_c3', 'x_Tm1_PRoc1_c4', 'x_Tm1_PRoc1_c5', 'x_Tm1_PRoc1_c6', 'yref_Tp1_Price', 'x_Tm2_PriceChg_c0', 'x_Tm2_PriceChg_c1', 'x_Tm2_PriceChg_c2', 'x_Tm2_PriceChg_c3', 'x_Tm2_PriceChg_c4', 'x_Tm2_PriceChg_c5', 'x_Tm2_PriceChg_c6', 'x_Tm2_VolChg_c0', 'x_Tm2_VolChg_c1', 'x_Tm2_VolChg_c2', 'x_Tm2_VolChg_c3', 'x_Tm2_VolChg_c4', 'x_Tm2_VolChg_c5', 'x_Tm2_VolChg_c6', 'x_Tm2_PRoc1_c0', 'x_Tm2_PRoc1_c1', 'x_Tm2_PR