In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
import pickle

# Load data
ftraindf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/ftraindf.csv')
fvaldf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/fvaldf.csv')
ftestdf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/ftestdf.csv')

with open('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/features_selected.pkl', 'rb') as handle:
    features_selected = pickle.load(handle)

selected_cluster = 1
cluster_suffix = "_c0"  # Assumes all selected features have this suffix
features = [f + cluster_suffix for f in features_selected[selected_cluster]] + ['yref_Tm0_close']

ftraindf = ftraindf[features]
fvaldf = fvaldf[features]
ftestdf = ftestdf[features]

# Combine for consistent scaling
scaler = MinMaxScaler()
all_data = pd.concat([ftraindf, fvaldf, ftestdf])
scaled_all = scaler.fit_transform(all_data)

# Split back
train_len = len(ftraindf)
val_len = len(fvaldf)
test_len = len(ftestdf)

train_scaled = scaled_all[:train_len]
val_scaled = scaled_all[train_len:train_len+val_len]
test_scaled = scaled_all[train_len+val_len:]

# Sequence builder
def create_sequences(data, seq_length=5):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length][-1])  # Predicting 'Close'
    return np.array(X), np.array(y)

SEQ_LENGTH = 5
X_train, y_train = create_sequences(train_scaled, SEQ_LENGTH)
X_val, y_val = create_sequences(val_scaled, SEQ_LENGTH)
X_test, y_test = create_sequences(test_scaled, SEQ_LENGTH)

# Build BiLSTM model
model = Sequential([
    Bidirectional(LSTM(64, return_sequences=True), input_shape=(SEQ_LENGTH, X_train.shape[2])),
    Dropout(0.2),
    Bidirectional(LSTM(64)),
    Dropout(0.2),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=16, verbose=1)

# Predict
y_pred_scaled = model.predict(X_test)

# Inverse transform
zero_pad = np.zeros((len(y_pred_scaled), len(features)-1))
y_pred_combined = np.hstack((zero_pad, y_pred_scaled))
y_pred = scaler.inverse_transform(y_pred_combined)[:, -1]

# Actual values
y_true = ftestdf['yref_Tm0_close'].values[SEQ_LENGTH:]

# Metrics
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
r2 = r2_score(y_true, y_pred)

print(f"✅ RMSE: {rmse:.4f}")
print(f"✅ R² Score: {r2:.4f}")


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
import pickle

# Load data
ftraindf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/ftraindf.csv')
fvaldf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/fvaldf.csv')
ftestdf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/ftestdf.csv')

with open('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/features_selected.pkl', 'rb') as handle:
    features_selected = pickle.load(handle)

# Model parameters
SEQ_LENGTH = 5
EPOCHS = 50
BATCH_SIZE = 16
cluster_suffix = "_c0"  # consistent suffix in your dataset

results = []

for cluster_id in range(1, 14):  # Tp1 to Tp13
    try:
        print(f"🔁 Training cluster Tp{cluster_id}")
        
        # Build feature set and subset data
        features = [f + cluster_suffix for f in features_selected[cluster_id]] + ['yref_Tm0_close']
        ft = ftraindf[features]
        fv = fvaldf[features]
        fs = ftestdf[features]

        # Normalize
        all_data = pd.concat([ft, fv, fs])
        scaler = MinMaxScaler()
        scaled_all = scaler.fit_transform(all_data)

        train_len = len(ft)
        val_len = len(fv)
        test_len = len(fs)

        train_scaled = scaled_all[:train_len]
        val_scaled = scaled_all[train_len:train_len+val_len]
        test_scaled = scaled_all[train_len+val_len:]

        # Build sequences
        def create_sequences(data, seq_length=5):
            X, y = [], []
            for i in range(len(data) - seq_length):
                X.append(data[i:i+seq_length])
                y.append(data[i+seq_length][-1])
            return np.array(X), np.array(y)

        X_train, y_train = create_sequences(train_scaled, SEQ_LENGTH)
        X_val, y_val = create_sequences(val_scaled, SEQ_LENGTH)
        X_test, y_test = create_sequences(test_scaled, SEQ_LENGTH)

        # Model
        model = Sequential([
            Bidirectional(LSTM(64, return_sequences=True), input_shape=(SEQ_LENGTH, X_train.shape[2])),
            Dropout(0.2),
            Bidirectional(LSTM(64)),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)

        # Predict
        y_pred_scaled = model.predict(X_test)

        # Inverse scale
        zero_pad = np.zeros((len(y_pred_scaled), len(features)-1))
        y_pred_combined = np.hstack((zero_pad, y_pred_scaled))
        y_pred = scaler.inverse_transform(y_pred_combined)[:, -1]

        y_true = fs['yref_Tm0_close'].values[SEQ_LENGTH:]

        # Metrics
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        r2 = r2_score(y_true, y_pred)

        print(f"✅ Tp{cluster_id} → RMSE: {rmse:.4f} | R²: {r2:.4f}")
        results.append({
            'Tp': f'Tp{cluster_id}',
            'RMSE': rmse,
            'R2': r2
        })
    except Exception as e:
        print(f"⚠️ Tp{cluster_id} failed: {e}")
        results.append({
            'Tp': f'Tp{cluster_id}',
            'RMSE': None,
            'R2': None,
            'Error': str(e)
        })

# Convert to DataFrame
results_df = pd.DataFrame(results)
print("\n📊 Summary:")
print(results_df)


In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
import pickle

# Load data
ftraindf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/benchmark_C38U.SI_traindf.csv')
fvaldf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/benchmark_C38U.SI_valdf.csv')
ftestdf = pd.read_csv('/home/priya/Desktop/fyp/Src alwin/Src/benchmark_C38U.SI_testdf.csv')


with open('/home/priya/Desktop/fyp/Src alwin/Src/data/C38U.SI/features_selected.pkl', 'rb') as handle:
    features_selected = pickle.load(handle)

# Model parameters
SEQ_LENGTH = 5
EPOCHS = 50
BATCH_SIZE = 16
cluster_suffix = "_c0"  # consistent suffix in your dataset

results = []

for cluster_id in range(1, 14):  # Tp1 to Tp13
    try:
        print(f"🔁 Training cluster Tp{cluster_id}")
        
        # Build feature set and subset data
        features = features_selected[cluster_id] + ['yref_Tm0_close']

        ft = ftraindf[features]
        fv = fvaldf[features]
        fs = ftestdf[features]

        # Normalize
        all_data = pd.concat([ft, fv, fs])
        scaler = MinMaxScaler()
        scaled_all = scaler.fit_transform(all_data)

        train_len = len(ft)
        val_len = len(fv)
        test_len = len(fs)

        train_scaled = scaled_all[:train_len]
        val_scaled = scaled_all[train_len:train_len+val_len]
        test_scaled = scaled_all[train_len+val_len:]

        # Build sequences
        def create_sequences(data, seq_length=5):
            X, y = [], []
            for i in range(len(data) - seq_length):
                X.append(data[i:i+seq_length])
                y.append(data[i+seq_length][-1])
            return np.array(X), np.array(y)

        X_train, y_train = create_sequences(train_scaled, SEQ_LENGTH)
        X_val, y_val = create_sequences(val_scaled, SEQ_LENGTH)
        X_test, y_test = create_sequences(test_scaled, SEQ_LENGTH)

        # Model
        model = Sequential([
            Bidirectional(LSTM(64, return_sequences=True), input_shape=(SEQ_LENGTH, X_train.shape[2])),
            Dropout(0.2),
            Bidirectional(LSTM(64)),
            Dropout(0.2),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)

        # Predict
        y_pred_scaled = model.predict(X_test)

        # Inverse scale
        zero_pad = np.zeros((len(y_pred_scaled), len(features)-1))
        y_pred_combined = np.hstack((zero_pad, y_pred_scaled))
        y_pred = scaler.inverse_transform(y_pred_combined)[:, -1]

        y_true = fs['yref_Tm0_close'].values[SEQ_LENGTH:]

        # Metrics
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        r2 = r2_score(y_true, y_pred)

        print(f"✅ Tp{cluster_id} → RMSE: {rmse:.4f} | R²: {r2:.4f}")
        results.append({
            'Tp': f'Tp{cluster_id}',
            'RMSE': rmse,
            'R2': r2
        })
    except Exception as e:
        print(f"⚠️ Tp{cluster_id} failed: {e}")
        results.append({
            'Tp': f'Tp{cluster_id}',
            'RMSE': None,
            'R2': None,
            'Error': str(e)
        })

# Convert to DataFrame
results_df = pd.DataFrame(results)
print("\n📊 Summary:")
print(results_df)


🔁 Training cluster Tp1


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step
✅ Tp1 → RMSE: 0.1016 | R²: 0.4926
🔁 Training cluster Tp2


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step
✅ Tp2 → RMSE: 0.0403 | R²: 0.9204
🔁 Training cluster Tp3


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step
✅ Tp3 → RMSE: 0.0796 | R²: 0.6887
🔁 Training cluster Tp4


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step
✅ Tp4 → RMSE: 0.0750 | R²: 0.7236
🔁 Training cluster Tp5


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step
✅ Tp5 → RMSE: 0.0489 | R²: 0.8826
🔁 Training cluster Tp6


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step
✅ Tp6 → RMSE: 0.0360 | R²: 0.9362
🔁 Training cluster Tp7


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step
✅ Tp7 → RMSE: 0.0943 | R²: 0.5634
🔁 Training cluster Tp8


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step
✅ Tp8 → RMSE: 0.0599 | R²: 0.8240
🔁 Training cluster Tp9


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
✅ Tp9 → RMSE: 0.0891 | R²: 0.6102
🔁 Training cluster Tp10


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 67ms/step
✅ Tp10 → RMSE: 0.0628 | R²: 0.8064
🔁 Training cluster Tp11


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
✅ Tp11 → RMSE: 0.1295 | R²: 0.1755
🔁 Training cluster Tp12


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
✅ Tp12 → RMSE: 0.1038 | R²: 0.4701
🔁 Training cluster Tp13


  super().__init__(**kwargs)


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
✅ Tp13 → RMSE: 0.0391 | R²: 0.9249

📊 Summary:
      Tp      RMSE        R2
0    Tp1  0.101627  0.492555
1    Tp2  0.040253  0.920388
2    Tp3  0.079593  0.688739
3    Tp4  0.075000  0.723628
4    Tp5  0.048880  0.882610
5    Tp6  0.036021  0.936250
6    Tp7  0.094264  0.563419
7    Tp8  0.059854  0.823980
8    Tp9  0.089069  0.610220
9   Tp10  0.062766  0.806441
10  Tp11  0.129539  0.175532
11  Tp12  0.103850  0.470118
12  Tp13  0.039096  0.924899
