In [2]:
import pandas as pd

# Load financial news and stock price CSVs
news_df = pd.read_excel('/content/data_2007_to_2011_by_date.csv.xlsx', parse_dates=['Date'])
stock_df = pd.read_excel('/content/sensex_2007_2011.csv.xlsx', parse_dates=['Date'])


In [3]:
import numpy as np
import pandas as pd

In [4]:
# Merge on 'date'
merged_df = pd.merge(news_df, stock_df, on='Date')

# Drop rows with missing values after merge
merged_df.dropna(inplace=True)


In [5]:
merged_df.head()

Unnamed: 0.1,Unnamed: 0,Date,headline_text,Sentiment,Close,High,Low,Open,Volume
0,450061,2007-01-02,Q&A: Innovation is the norm across sectors in ...,0.0,13942.240234,13980.540039,13797.44043,13827.769531,9600
1,450062,2007-01-02,Consumer democracy,0.0,13942.240234,13980.540039,13797.44043,13827.769531,9600
2,450063,2007-01-02,Winner Takes It All,0.0,13942.240234,13980.540039,13797.44043,13827.769531,9600
3,450064,2007-01-02,Mosquito scare keeps NRI tourists off Gujarat,0.0,13942.240234,13980.540039,13797.44043,13827.769531,9600
4,450065,2007-01-02,Diversified MFs are a better bet,0.5,13942.240234,13980.540039,13797.44043,13827.769531,9600


In [6]:
# ---------------- STEP 2: Sentiment Analysis ----------------
from textblob import TextBlob

# Function to calculate polarity and subjectivity
def get_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity, blob.sentiment.subjectivity
# Ensure 'headline_text' is treated as string and fill potential NaNs with empty string
merged_df['headline_text'] = merged_df['headline_text'].astype(str).fillna('')

# Apply sentiment analysis
merged_df[['polarity', 'subjectivity']] = merged_df['headline_text'].apply(lambda x: pd.Series(get_sentiment(x)))

In [7]:
# ---------------- STEP 3: Abnormal Return Labels ----------------
merged_df['DailyReturn'] = (merged_df['Close'] - merged_df['Close'].shift(1)) / merged_df['Close'].shift(1)

# Compute mu_R (mean) and sigma_R (std) with rolling window = 5
merged_df['mu_R'] = merged_df['DailyReturn'].rolling(window=5).mean()
merged_df['sigma_R'] = merged_df['DailyReturn'].rolling(window=5).std()

# Label abnormal returns
delta = 1  # Define delta
merged_df['abnormal_return'] = (merged_df['DailyReturn'] > (merged_df['mu_R'] + delta * merged_df['sigma_R'])).astype(int)

In [8]:
# ---------------- STEP 4: Feature Engineering ----------------
for lag in range(1, 6):
    merged_df[f'ARSign_t-{lag}'] = merged_df['abnormal_return'].shift(lag)

# Final feature list
features = ['ARSign_t-1', 'ARSign_t-2', 'ARSign_t-3', 'ARSign_t-4', 'ARSign_t-5', 'polarity', 'subjectivity']
merged_df.dropna(subset=features, inplace=True)

In [9]:
# ---------------- STEP 5: Normalize Features ----------------
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
merged_df[features] = scaler.fit_transform(merged_df[features])

In [10]:
cols_to_convert = ['Close', 'High', 'Low', 'Open', 'Volume', 'DailyReturn']
for col in cols_to_convert:
    merged_df[col] = pd.to_numeric(merged_df[col], errors='coerce')


In [11]:
merged_df = merged_df.dropna()

In [12]:
features = ['ARSign_t-1', 'ARSign_t-2', 'ARSign_t-3', 'ARSign_t-4', 'ARSign_t-5', 'polarity', 'subjectivity']
target = 'abnormal_return'

In [13]:
import numpy as np

W = 5
X_data = merged_df[features].to_numpy()
y_data = merged_df[target].to_numpy()

X_windows = np.lib.stride_tricks.sliding_window_view(X_data, window_shape=(W,), axis=0)
X_windows = X_windows.reshape(-1, W, len(features))
y_windows = y_data[W:]

In [14]:
# ---------------- STEP 7: Temporal Train-Test Split ----------------
split_index = int(0.8 * len(X_windows))
X_train, X_test = X_windows[:split_index], X_windows[split_index:]
y_train, y_test = y_windows[:split_index], y_windows[split_index:]

In [15]:
# Normalize input (optional but recommended for RNN)
from sklearn.preprocessing import StandardScaler

# Flatten the time steps to apply scaler across all time points
X_train_flat = X_train.reshape(-1, X_train.shape[-1])
X_test_flat = X_test.reshape(-1, X_test.shape[-1])

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_flat).reshape(X_train.shape)
X_test_scaled = scaler.transform(X_test_flat).reshape(X_test.shape)

In [16]:
!pip install keras
!pip install tensorflow



In [17]:

# ---------------- STEP 8: Model Architectures ----------------
from keras.models import Sequential
from keras.layers import SimpleRNN, GRU, LSTM, Dense, Dropout, TimeDistributed

# SimpleRNN model
def create_simple_rnn_model():
    model = Sequential()
    model.add(SimpleRNN(64, activation='sigmoid', input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(16, activation='elu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

# GRU model
def create_gru_model():
    model = Sequential()
    model.add(GRU(64, activation='elu', input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(16, activation='elu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

# LSTM model
def create_lstm_model():
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, activation='elu', input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(32, return_sequences=False, activation='elu'))
    model.add(Dropout(0.4))
    model.add(Dense(16, activation='elu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [18]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
import numpy as np

In [19]:
# ---------------- STEP 6: Windowing and Splitting ----------------
# This step should be done BEFORE the training loop

W = 5
X_data = merged_df[features].to_numpy()
y_data = merged_df[target].to_numpy()

# Create sliding windows for features
# The number of windows is len(X_data) - W + 1
X_windows = np.lib.stride_tricks.sliding_window_view(X_data, window_shape=(W,), axis=0)
X_windows = X_windows.reshape(-1, W, len(features))

# The target for each window is the abnormal return at the END of the window
# So we take y_data starting from index W-1 (0-based indexing)
y_windows = y_data[W-1:] # Corrected slicing

# Ensure X_windows and y_windows have the same length after windowing
min_len = min(len(X_windows), len(y_windows))
X_windows = X_windows[:min_len]
y_windows = y_windows[:min_len]

# Temporal train-test split
split_index = int(0.8 * len(X_windows))
X_train, X_test = X_windows[:split_index], X_windows[split_index:]
y_train, y_test = y_windows[:split_index], y_windows[split_index:]

# Normalize input (optional but recommended for RNN) - Perform normalization AFTER split
scaler = StandardScaler()
# Flatten the time steps to apply scaler across all time points
X_train_flat = X_train.reshape(-1, X_train.shape[-1])
X_test_flat = X_test.reshape(-1, X_test.shape[-1])

X_train_scaled = scaler.fit_transform(X_train_flat).reshape(X_train.shape)
X_test_scaled = scaler.transform(X_test_flat).reshape(X_test.shape)

In [20]:
from keras.models import Sequential
from keras.layers import SimpleRNN, GRU, LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
from keras.saving import save_model
import numpy as np

# Model Definitions (Keep these the same)
def create_simple_rnn_model():
    model = Sequential()
    # Use X_train_scaled.shape[1] and X_train_scaled.shape[2] for input_shape
    model.add(SimpleRNN(64, activation='sigmoid', input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
    model.add(Dense(16, activation='elu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

def create_gru_model():
    model = Sequential()
     # Use X_train_scaled.shape[1] and X_train_scaled.shape[2] for input_shape
    model.add(GRU(64, activation='elu', input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
    model.add(Dense(16, activation='elu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

def create_lstm_model():
    model = Sequential()
     # Use X_train_scaled.shape[1] and X_train_scaled.shape[2] for input_shape
    model.add(LSTM(64, return_sequences=True, activation='elu', input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
    model.add(LSTM(32, return_sequences=False, activation='elu'))
    model.add(Dropout(0.4))
    model.add(Dense(16, activation='elu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

# ---------------- STEP 9: Model Training ----------------
# Use the already prepared X_train_scaled, y_train, X_test_scaled, y_test
# Remove the redundant windowing and splitting logic from here

models = [create_simple_rnn_model(), create_gru_model(), create_lstm_model()]
model_names = ['SimpleRNN', 'GRU', 'LSTM']
predictions = []

for i, model in enumerate(models):
    print(f"Training {model_names[i]} model...")
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5)

    # Use the pre-split and pre-scaled data
    model.fit(X_train_scaled, y_train, validation_split=0.2, epochs=50, callbacks=[early_stopping], batch_size=32)

    # Save model in the recommended Keras format
    save_model(model, f'model_{model_names[i]}.keras')

    # Use the pre-split and pre-scaled test data for prediction
    y_pred = model.predict(X_test_scaled)
    predictions.append(y_pred)

    # Ensure y_pred is 1D if y_test is 1D for metric calculation
    if y_pred.ndim > 1 and y_pred.shape[1] == 1:
        y_pred = y_pred.flatten()

    y_bin = (y_pred > 0.5).astype(int)

    print(f"{model_names[i]} Accuracy:", accuracy_score(y_test, y_bin))
    print(f"{model_names[i]} F1-score:", f1_score(y_test, y_bin))
    # roc_auc_score expects prediction probabilities, not binary predictions
    print(f"{model_names[i]} AUC:", roc_auc_score(y_test, y_pred))
    print(f"{model_names[i]} Precision:", precision_score(y_test, y_bin))
    print(f"{model_names[i]} Recall:", recall_score(y_test, y_bin))

  super().__init__(**kwargs)


Training SimpleRNN model...
Epoch 1/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4ms/step - accuracy: 0.9987 - loss: 0.0126 - val_accuracy: 0.9992 - val_loss: 0.0063
Epoch 2/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 4ms/step - accuracy: 0.9987 - loss: 0.0100 - val_accuracy: 0.9992 - val_loss: 0.0063
Epoch 3/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 3ms/step - accuracy: 0.9986 - loss: 0.0109 - val_accuracy: 0.9992 - val_loss: 0.0063
Epoch 4/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4ms/step - accuracy: 0.9987 - loss: 0.0102 - val_accuracy: 0.9992 - val_loss: 0.0063
Epoch 5/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 3ms/step - accuracy: 0.9987 - loss: 0.0097 - val_accuracy: 0.9992 - val_loss: 0.0075
Epoch 6/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4ms/step - accuracy: 0.9987 - loss: 0.0099 - val_accura

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 6ms/step - accuracy: 0.9959 - loss: 0.0198 - val_accuracy: 0.9992 - val_loss: 0.0072
Epoch 2/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 6ms/step - accuracy: 0.9986 - loss: 0.0106 - val_accuracy: 0.9992 - val_loss: 0.0065
Epoch 3/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 7ms/step - accuracy: 0.9987 - loss: 0.0098 - val_accuracy: 0.9992 - val_loss: 0.0063
Epoch 4/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 6ms/step - accuracy: 0.9986 - loss: 0.0104 - val_accuracy: 0.9992 - val_loss: 0.0063
Epoch 5/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 7ms/step - accuracy: 0.9987 - loss: 0.0100 - val_accuracy: 0.9992 - val_loss: 0.0064
Epoch 6/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 6ms/step - accuracy: 0.9988 - loss: 0.0094 - val_accuracy: 0.9992 - val_loss: 0.006

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 8ms/step - accuracy: 0.9975 - loss: 0.0218 - val_accuracy: 0.9992 - val_loss: 0.0063
Epoch 2/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 8ms/step - accuracy: 0.9988 - loss: 0.0095 - val_accuracy: 0.9992 - val_loss: 0.0063
Epoch 3/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 8ms/step - accuracy: 0.9987 - loss: 0.0103 - val_accuracy: 0.9992 - val_loss: 0.0065
Epoch 4/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 8ms/step - accuracy: 0.9988 - loss: 0.0097 - val_accuracy: 0.9992 - val_loss: 0.0064
Epoch 5/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 8ms/step - accuracy: 0.9987 - loss: 0.0099 - val_accuracy: 0.9992 - val_loss: 0.0063
Epoch 6/50
[1m11489/11489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 8ms/step - accuracy: 0.9987 - loss: 0.0102 - val_accuracy: 0.9992 - val_loss: 0.0064
[1m3591

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [23]:
# ---------------- STEP 10: PSO Ensemble ----------------
import numpy as np
from sklearn.metrics import f1_score
from pyswarm import pso  # pip install pyswarm

# Define predictions from trained models
y_pred_1, y_pred_2, y_pred_3 = predictions
y_true = y_test

# PSO Objective Function: Maximize F1 by minimizing (1 - F1)
def ensemble_f1_loss(weights):
    w1, w2, w3 = weights
    # Ensure y_pred_1, y_pred_2, y_pred_3 are 1D for multiplication
    # The predict output is likely (samples, 1), flatten if necessary
    y_pred_1_flat = y_pred_1.flatten() if y_pred_1.ndim > 1 else y_pred_1
    y_pred_2_flat = y_pred_2.flatten() if y_pred_2.ndim > 1 else y_pred_2
    y_pred_3_flat = y_pred_3.flatten() if y_pred_3.ndim > 1 else y_pred_3

    ensemble_pred = w1 * y_pred_1_flat + w2 * y_pred_2_flat + w3 * y_pred_3_flat
    y_pred_bin = (ensemble_pred > 0.5).astype(int)
    return 1 - f1_score(y_true, y_pred_bin)

# Constraint: weights must sum to 1
def weight_constraint(weights):
    # We want sum(weights) - 1 == 0.
    # For f_ieqcons, constraints are >= 0.
    # A common way to handle sum=1 constraint using f_ieqcons is:
    # 1 - sum(weights) >= 0  AND  sum(weights) - 1 >= 0
    # This means 1 - sum(weights) and sum(weights) - 1 must both be non-negative,
    # which only happens when 1 - sum(weights) = 0 and sum(weights) - 1 = 0,
    # i.e., sum(weights) = 1.
    return np.array([1 - np.sum(weights), np.sum(weights) - 1]) # Return an array of constraints

# Run PSO
lb = [0, 0, 0]  # lower bounds
ub = [1, 1, 1]  # upper bounds

# **Attempt 1: Pass the constraint function directly (against error message, but for testing)**
# This is less likely to work based on the error, but is a quick test.
# If this fails, revert to the original [weight_constraint] and try reinstalling pyswarm.
# best_weights, best_score = pso(
#     ensemble_f1_loss,
#     lb,
#     ub,
#     f_ieqcons=weight_constraint, # <-- Changed from [weight_constraint]
#     swarmsize=30,
#     maxiter=50,
#     debug=True
# )

# **Attempt 2: Keep the list format, but ensure pyswarm is correctly installed**
# This is the correct way to pass constraints according to pyswarm docs.
# If the TypeError persists with this, it suggests an issue with the pyswarm installation or version.
# Reverting to the original call with the list:
best_weights, best_score = pso(
    ensemble_f1_loss,
    lb,
    ub,
    f_ieqcons=weight_constraint, # Pass the function directly, not in a list
    swarmsize=30,
    maxiter=50,
    debug=True
)


# Final Ensemble Prediction using optimized weights
w1, w2, w3 = best_weights
y_pred_1_flat = y_pred_1.flatten() if y_pred_1.ndim > 1 else y_pred_1
y_pred_2_flat = y_pred_2.flatten() if y_pred_2.ndim > 1 else y_pred_2
y_pred_3_flat = y_pred_3.flatten() if y_pred_3.ndim > 1 else y_pred_3
ensemble_pred = w1 * y_pred_1_flat + w2 * y_pred_2_flat + w3 * y_pred_3_flat
ensemble_bin = (ensemble_pred > 0.5).astype(int)

# Report Metrics
print("PSO-Optimized Ensemble Weights:", best_weights)
print("Optimized F1-score:", f1_score(y_true, ensemble_bin))

# Assign best_weights to w1, w2, w3 for the evaluation step (assuming success)
w1, w2, w3 = best_weights

Single constraint function given in f_ieqcons
Best after iteration 1: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 2: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 3: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 4: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 5: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 6: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 7: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 8: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 9: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 10: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 11: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 12: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 13: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 14: [0.41942137 0.78499404 0.63424494] 1e+100
Best after iteration 15: 

In [24]:
# ---------------- STEP 10: PSO Ensemble ----------------
# Assuming PSO implementation is available
# Let y_pred_1, y_pred_2, y_pred_3 be outputs from SimpleRNN, GRU, LSTM
y_pred_1, y_pred_2, y_pred_3 = predictions

# Final prediction: y_final = w1*y1 + w2*y2 + w3*y3
# Use PSO to optimize weights w1, w2, w3 with constraints
# This part requires a PSO implementation


In [27]:
# ---------------- STEP 11: Evaluation ----------------
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_auc_score, precision_score, recall_score, f1_score
import numpy as np # Ensure numpy is imported if not already

# Compute metrics
# Ensure y_pred_1, y_pred_2, y_pred_3 are flattened before the weighted sum if they are (samples, 1)
y_pred_1_flat = y_pred_1.flatten() if y_pred_1.ndim > 1 else y_pred_1
y_pred_2_flat = y_pred_2.flatten() if y_pred_2.ndim > 1 else y_pred_2
y_pred_3_flat = y_pred_3.flatten() if y_pred_3.ndim > 1 else y_pred_3


y_final_raw = (w1 * y_pred_1_flat + w2 * y_pred_2_flat + w3 * y_pred_3_flat)

# Convert probabilities to binary predictions and ensure it's 1-dimensional
y_final = (y_final_raw > 0.5).astype(int).flatten() # Add .flatten() here

# Now y_final should be (114888,)

# Compute metrics
# y_test is already 1D (from global variables and typical data loading)
accuracy = accuracy_score(y_test, y_final)
balanced_accuracy = balanced_accuracy_score(y_test, y_final)
# roc_auc_score expects probability scores if the target is binary, not binary predictions
# Use the raw predictions for AUC calculation if possible, otherwise the AUC for binary predictions will be 0.5 or 1.0 depending on the distribution
try:
    auc = roc_auc_score(y_test, y_final_raw) # Use raw scores for AUC
except ValueError:
    # Handle case where only one class is present in y_test or y_final
    print("Could not compute AUC: only one class present in y_test or y_final")
    auc = np.nan # Or some other indicator

precision = precision_score(y_test, y_final)
recall = recall_score(y_test, y_final)
f1 = f1_score(y_test, y_final)

# Print metrics
print(f'Accuracy: {accuracy}')
print(f'Balanced Accuracy: {balanced_accuracy}')
print(f'AUC: {auc}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

Accuracy: 0.999416823340993
Balanced Accuracy: 0.5
AUC: 0.5114703522302788
Precision: 0.0
Recall: 0.0
F1 Score: 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [28]:
# ---------------- STEP 12: Save Outputs ----------------
import joblib
import pandas as pd # Ensure pandas is imported

# Save ensemble weights
joblib.dump((w1, w2, w3), 'ensemble_weights.pkl')

# Save best model (assuming the best model is the LSTM model - models[2])
# Check if models list and models[2] exist and are valid
if 'models' in globals() and len(models) > 2 and models[2] is not None:
    try:
        models[2].save('best_model.h5')
    except Exception as e:
        print(f"Could not save best model: {e}")
else:
    print("Could not find 'models' or models[2] to save the best model.")


# Export predictions and evaluation metrics
# y_test and y_final should now both be 1-dimensional arrays
predictions_df = pd.DataFrame({
    'y_test': y_test,
    'y_pred': y_final # y_final is now 1D
})
predictions_df.to_csv('predictions.csv', index=False)

metrics_df = pd.DataFrame({
    'Accuracy': [accuracy],
    'Balanced Accuracy': [balanced_accuracy],
    'AUC': [auc],
    'Precision': [precision],
    'Recall': [recall],
    'F1 Score': [f1]
})
metrics_df.to_csv('evaluation_metrics.csv', index=False)

