In [1]:
import os
import sys
import pandas as pd
import numpy as np
import pickle
import tensorflow as tf
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

# Set the absolute path to the 'src' directory
module_path = r"C:\Users\shali\Documents\shalin\ASU_2nd_SEM\APM 523 Optimization\APM523_HybridSwarm_TextClassification\src"
if module_path not in sys.path:
    sys.path.append(module_path)

print("SRC Path Exists:", os.path.exists(module_path))
print("SRC Path:", module_path)

try:
    from swarm_algorithms import PSO, GWO, HybridPSOGWO
    from models import build_lstm_model
    print("Successfully imported modules!")
except ImportError as e:
    print("Error importing modules:", e)

np.random.seed(42)
tf.random.set_seed(42)

SRC Path Exists: True
SRC Path: C:\Users\shali\Documents\shalin\ASU_2nd_SEM\APM 523 Optimization\APM523_HybridSwarm_TextClassification\src
Successfully imported modules!


In [2]:
#Load Preprocessed Data
processed_dir = '../data/processed/'
train_tfidf_path = os.path.join(processed_dir, 'train_tfidf.pkl')
test_tfidf_path = os.path.join(processed_dir, 'test_tfidf.pkl')
train_csv_path = os.path.join(processed_dir, 'train_preprocessed.csv')
test_csv_path = os.path.join(processed_dir, 'test_preprocessed.csv')

with open(train_tfidf_path, 'rb') as f:
    X_train_tfidf = pickle.load(f)
with open(test_tfidf_path, 'rb') as f:
    X_test_tfidf = pickle.load(f)

train_df = pd.read_csv(train_csv_path)
test_df = pd.read_csv(test_csv_path)
y_train = train_df['Class Index'].values - 1
y_test = test_df['Class Index'].values - 1
y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes=4)
y_test_cat = tf.keras.utils.to_categorical(y_test, num_classes=4)

subset_size = int(0.1 * len(train_df))  # 12,000 samples for optimization
train_subset_idx = np.random.choice(len(train_df), subset_size, replace=False)
X_train_tfidf_subset = X_train_tfidf[train_subset_idx]
y_train_cat_subset = y_train_cat[train_subset_idx]

X_train_tfidf_split, X_val_tfidf, y_train_cat_split, y_val_cat = train_test_split(
    X_train_tfidf_subset, y_train_cat_subset, test_size=0.2, random_state=42)

print("TF-IDF X_train_split shape:", X_train_tfidf_split.shape)
print("TF-IDF X_val shape:", X_val_tfidf.shape)
print("TF-IDF X_test shape:", X_test_tfidf.shape)

TF-IDF X_train_split shape: (9600, 5000)
TF-IDF X_val shape: (2400, 5000)
TF-IDF X_test shape: (7600, 5000)


In [3]:
# PSO for LSTM
lstm_bounds = [(32, 128), (0.2, 0.5), (16, 64)]  # [lstm_units, dropout_rate, batch_size]
pso_lstm = PSO(n_particles=5, bounds=lstm_bounds, model_type='lstm', 
               input_dim=X_train_tfidf.shape[1], output_dim=4, 
               X_train=X_train_tfidf_split, y_train=y_train_cat_split, 
               X_val=X_val_tfidf, y_val=y_val_cat)
best_params_lstm_pso, best_score_lstm_pso = pso_lstm.optimize(max_iter=5)  # Increased iterations
print("PSO LSTM - Best Params:", best_params_lstm_pso, "Best Validation Accuracy:", best_score_lstm_pso)

PSO LSTM - Best Params: [90.14571225  0.37378921 47.00566802] Best Validation Accuracy: 0.8791666626930237


In [8]:
# GWO for LSTM
gwo_lstm = GWO(n_wolves=5, bounds=lstm_bounds, model_type='lstm', 
               input_dim=X_train_tfidf.shape[1], output_dim=4, 
               X_train=X_train_tfidf_split, y_train=y_train_cat_split, 
               X_val=X_val_tfidf, y_val=y_val_cat)
best_params_lstm_gwo, best_score_lstm_gwo = gwo_lstm.optimize(max_iter=5)
print("GWO LSTM - Best Params:", best_params_lstm_gwo, "Best Validation Accuracy:", best_score_lstm_gwo)

GWO LSTM - Best Params: [128.    0.5  64. ] Best Validation Accuracy: 0.8812500238418579


In [9]:
# Hybrid PSO-GWO for LSTM
hybrid_lstm = HybridPSOGWO(n_agents=5, bounds=lstm_bounds, model_type='lstm', 
                           input_dim=X_train_tfidf.shape[1], output_dim=4, 
                           X_train=X_train_tfidf_split, y_train=y_train_cat_split, 
                           X_val=X_val_tfidf, y_val=y_val_cat)
best_params_lstm_hybrid, best_score_lstm_hybrid = hybrid_lstm.optimize(max_iter=5)
print("Hybrid PSO-GWO LSTM - Best Params:", best_params_lstm_hybrid, "Best Validation Accuracy:", best_score_lstm_hybrid)

Hybrid PSO-GWO LSTM - Best Params: [106.53311919   0.46339378  26.69795066] Best Validation Accuracy: 0.8787500262260437


In [10]:
# Train and Evaluate Optimized Models on Full Dataset
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# PSO-optimized LSTM
pso_model = build_lstm_model(input_dim=X_train_tfidf.shape[1], output_dim=4, 
                             lstm_units=int(best_params_lstm_pso[0]), dropout_rate=best_params_lstm_pso[1])
pso_model.fit(X_train_tfidf, y_train_cat, epochs=10, batch_size=int(best_params_lstm_pso[2]), 
              validation_split=0.2, verbose=1, callbacks=[early_stopping])
y_pred_pso = pso_model.predict(X_test_tfidf, verbose=0)
pso_accuracy = accuracy_score(y_test, np.argmax(y_pred_pso, axis=1))
pso_f1 = f1_score(y_test, np.argmax(y_pred_pso, axis=1), average='weighted')

# GWO-optimized LSTM
gwo_model = build_lstm_model(input_dim=X_train_tfidf.shape[1], output_dim=4, 
                             lstm_units=int(best_params_lstm_gwo[0]), dropout_rate=best_params_lstm_gwo[1])
gwo_model.fit(X_train_tfidf, y_train_cat, epochs=10, batch_size=int(best_params_lstm_gwo[2]), 
              validation_split=0.2, verbose=1, callbacks=[early_stopping])
y_pred_gwo = gwo_model.predict(X_test_tfidf, verbose=0)
gwo_accuracy = accuracy_score(y_test, np.argmax(y_pred_gwo, axis=1))
gwo_f1 = f1_score(y_test, np.argmax(y_pred_gwo, axis=1), average='weighted')

# Hybrid-optimized LSTM
hybrid_model = build_lstm_model(input_dim=X_train_tfidf.shape[1], output_dim=4, 
                                lstm_units=int(best_params_lstm_hybrid[0]), dropout_rate=best_params_lstm_hybrid[1])
hybrid_model.fit(X_train_tfidf, y_train_cat, epochs=10, batch_size=int(best_params_lstm_hybrid[2]), 
                 validation_split=0.2, verbose=1, callbacks=[early_stopping])
y_pred_hybrid = hybrid_model.predict(X_test_tfidf, verbose=0)
hybrid_accuracy = accuracy_score(y_test, np.argmax(y_pred_hybrid, axis=1))
hybrid_f1 = f1_score(y_test, np.argmax(y_pred_hybrid, axis=1), average='weighted')

print("PSO-LSTM - Test Accuracy:", pso_accuracy, "F1-Score:", pso_f1)
print("GWO-LSTM - Test Accuracy:", gwo_accuracy, "F1-Score:", gwo_f1)
print("Hybrid-LSTM - Test Accuracy:", hybrid_accuracy, "F1-Score:", hybrid_f1)

Epoch 1/10
[1m2043/2043[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 17ms/step - accuracy: 0.8032 - loss: 0.6117 - val_accuracy: 0.8858 - val_loss: 0.3206
Epoch 2/10
[1m2043/2043[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 19ms/step - accuracy: 0.9037 - loss: 0.2980 - val_accuracy: 0.8797 - val_loss: 0.3325
Epoch 3/10
[1m2043/2043[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 20ms/step - accuracy: 0.9105 - loss: 0.2684 - val_accuracy: 0.8818 - val_loss: 0.3410
Epoch 4/10
[1m2043/2043[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 20ms/step - accuracy: 0.9156 - loss: 0.2493 - val_accuracy: 0.8818 - val_loss: 0.3541
Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 27ms/step - accuracy: 0.7858 - loss: 0.6803 - val_accuracy: 0.8855 - val_loss: 0.3203
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 25ms/step - accuracy: 0.8999 - loss: 0.3216 - val_accuracy: 0.8811 - val_loss: 0.3300
Epoc

In [11]:
# Cell 7: Save Results
output_dir = '../outputs/'
models_dir = os.path.join(output_dir, 'models')
results_dir = os.path.join(output_dir, 'results')
os.makedirs(models_dir, exist_ok=True)
os.makedirs(results_dir, exist_ok=True)

pso_model.save(os.path.join(models_dir, 'pso_lstm.keras'))
gwo_model.save(os.path.join(models_dir, 'gwo_lstm.keras'))
hybrid_model.save(os.path.join(models_dir, 'hybrid_lstm.keras'))

results = pd.DataFrame({
    'Model': ['PSO-LSTM', 'GWO-LSTM', 'Hybrid-PSO-GWO-LSTM'],
    'Best_Params': [best_params_lstm_pso.tolist(), best_params_lstm_gwo.tolist(), best_params_lstm_hybrid.tolist()],
    'Validation_Accuracy': [best_score_lstm_pso, best_score_lstm_gwo, best_score_lstm_hybrid],
    'Test_Accuracy': [pso_accuracy, gwo_accuracy, hybrid_accuracy],
    'Test_F1_Score': [pso_f1, gwo_f1, hybrid_f1]
})
results.to_csv(os.path.join(results_dir, 'swarm_optimization_results.csv'), index=False)
print("Swarm optimization results saved to", results_dir)

Swarm optimization results saved to ../outputs/results
