**Section	A:	Problem	Statement	–	Enhancing	Neural	Network	Performance	with Particle	Swarm	Optimization**	


In [1]:
# I coose Predicting	secondary	school	student	performance	using	a	double	particle	swarm optimization-based	categorical	boosting	model
# link - (https://www.sciencedirect.com/science/article/abs/pii/S0952197623008333)

In [2]:
# Use Python's requests Module to download the file directly in Jupyter Notebook
import requests

# URL of the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00320/student.zip"

# Send HTTP request
response = requests.get(url)

# Save the content to a zip file
with open("student-performance.zip", "wb") as file:
    file.write(response.content)

print("Download complete!")

Download complete!


In [3]:
# Extract the ZIP File
import zipfile

# Extract the dataset
with zipfile.ZipFile("student-performance.zip", 'r') as zip_ref:
    zip_ref.extractall("student_performance")

print("Extraction complete!")



Extraction complete!


In [4]:
#  Load the Dataset
import pandas as pd

# Load the student performance dataset
df = pd.read_csv("student_performance/student-mat.csv", sep=";")

# Display the first few rows
df.head()


Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,GP,F,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,GP,F,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10


In [5]:
!pip install pyswarm





[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
from pyswarm import pso

In [7]:
# Import Libraries
import random
import warnings
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from pyswarm import pso  # PSO optimization library

# Suppress warnings
warnings.filterwarnings("ignore")


In [8]:
# PSO-NN Model for Hyperparameter Optimization

import random
import warnings
from sklearn.neural_network import MLPClassifier  # Import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split



# Load dataset (modify based on your actual data)
df = pd.read_csv("student_performance/student-mat.csv", sep=";")
df_encoded = pd.get_dummies(df, drop_first=True)  # One-hot encoding categorical columns

X = df_encoded.drop("G3", axis=1)  # Features (all except target column 'G3')
y = df_encoded["G3"]  # Target variable (final grade 'G3')

In [9]:
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [10]:
# PSO Configuration
SWARM_SIZE = 10
DIMENSIONS = 2  # [learning rate, hidden layer size]
NUM_GENERATIONS = 20
W = 0.729  # Inertia weight
C1 = 1.49  # Cognitive weight
C2 = 1.49  # Social weight
MIN_BOUNDARY = [0.0001, 5]  # Minimum learning rate, minimum hidden neurons
MAX_BOUNDARY = [0.1, 100]  # Maximum learning rate, maximum hidden neurons


In [11]:
# Fitness function
def fitness_function(position):
    lr = position[0]
    hidden = int(position[1])  # Convert to integer for hidden layer size
    if hidden <= 0:
        hidden = 1  # Ensure hidden layer size is positive

    model = MLPClassifier(hidden_layer_sizes=(hidden,), learning_rate_init=lr, max_iter=200, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate performance on the test set
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)  # Accuracy as fitness function
    return 1 - acc  # We want to minimize error


In [12]:
# Particle Class (for PSO)
class Particle:
    def __init__(self):
        self.position = [
            random.uniform(MIN_BOUNDARY[0], MAX_BOUNDARY[0]),
            random.uniform(MIN_BOUNDARY[1], MAX_BOUNDARY[1])
        ]
        self.velocity = [random.uniform(-1, 1) for _ in range(DIMENSIONS)]
        self.fitness = fitness_function(self.position)
        self.best_position = list(self.position)
        self.best_fitness = self.fitness
        self.informants = random.sample(range(SWARM_SIZE), 3)
        self.group_best_position = list(self.position)
        self.group_best_fitness = self.fitness

    def update_velocity(self):
        for d in range(DIMENSIONS):
            r1, r2 = random.random(), random.random()
            cognitive = C1 * r1 * (self.best_position[d] - self.position[d])
            social = C2 * r2 * (self.group_best_position[d] - self.position[d])
            self.velocity[d] = W * self.velocity[d] + cognitive + social

    def update_position(self):
        for d in range(DIMENSIONS):
            self.position[d] += self.velocity[d]
            self.position[d] = max(MIN_BOUNDARY[d], min(MAX_BOUNDARY[d], self.position[d]))
        self.fitness = fitness_function(self.position)

    def update_group_best(self, swarm):
        best_informant = min(self.informants, key=lambda i: swarm[i].best_fitness)
        if swarm[best_informant].best_fitness < self.group_best_fitness:
            self.group_best_fitness = swarm[best_informant].best_fitness
            self.group_best_position = list(swarm[best_informant].best_position)

In [13]:
# PSO main loop
swarm = [Particle() for _ in range(SWARM_SIZE)]
global_best = min(swarm, key=lambda p: p.best_fitness)
global_best_position = list(global_best.best_position)
global_best_fitness = global_best.best_fitness

for gen in range(NUM_GENERATIONS):
    for particle in swarm:
        particle.update_group_best(swarm)
        particle.update_velocity()
        particle.update_position()
        if particle.fitness < particle.best_fitness:
            particle.best_fitness = particle.fitness
            particle.best_position = list(particle.position)

    best_particle = min(swarm, key=lambda p: p.best_fitness)
    if best_particle.best_fitness < global_best_fitness:
        global_best_fitness = best_particle.best_fitness
        global_best_position = list(best_particle.best_position)

    print(f"Generation {gen + 1}: Best Accuracy = {1 - global_best_fitness:.4f}")

print("\nPSO Optimization Complete!")
print(f"Best Learning Rate: {global_best_position[0]:.5f}")
print(f"Best Hidden Neurons: {int(global_best_position[1])}")

Generation 1: Best Accuracy = 0.2658
Generation 2: Best Accuracy = 0.3165
Generation 3: Best Accuracy = 0.3165
Generation 4: Best Accuracy = 0.3165
Generation 5: Best Accuracy = 0.3165
Generation 6: Best Accuracy = 0.3165
Generation 7: Best Accuracy = 0.3165
Generation 8: Best Accuracy = 0.3165
Generation 9: Best Accuracy = 0.3165
Generation 10: Best Accuracy = 0.3165
Generation 11: Best Accuracy = 0.3165
Generation 12: Best Accuracy = 0.3165
Generation 13: Best Accuracy = 0.3165
Generation 14: Best Accuracy = 0.3165
Generation 15: Best Accuracy = 0.3165
Generation 16: Best Accuracy = 0.3165
Generation 17: Best Accuracy = 0.3165
Generation 18: Best Accuracy = 0.3165
Generation 19: Best Accuracy = 0.3165
Generation 20: Best Accuracy = 0.3165

PSO Optimization Complete!
Best Learning Rate: 0.10000
Best Hidden Neurons: 95


In [14]:
# Traditionally Optimized Neural Network using Grid Search

#import library

import time
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier


In [15]:
# Define parameter grid for grid search
param_grid = {
    'hidden_layer_sizes': [(10,), (50,), (100,)],
    'learning_rate_init': [0.001, 0.01, 0.1],
    'max_iter': [200]
}

# Initialize the model
mlp = MLPClassifier(random_state=42)

In [16]:
# Perform Grid Search
grid_search = GridSearchCV(mlp, param_grid, cv=5, scoring='accuracy')
start_time = time.time()
grid_search.fit(X_train, y_train)
grid_search_time = time.time() - start_time


In [17]:
# Print the best parameters and the corresponding accuracy
print("Best parameters from Grid Search:", grid_search.best_params_)
print("Best Accuracy from Grid Search:", grid_search.best_score_)

Best parameters from Grid Search: {'hidden_layer_sizes': (100,), 'learning_rate_init': 0.01, 'max_iter': 200}
Best Accuracy from Grid Search: 0.24365079365079367


In [18]:
# Evaluate on the test set
best_mlp = grid_search.best_estimator_
y_pred_grid = best_mlp.predict(X_test)
grid_accuracy = accuracy_score(y_test, y_pred_grid)
print(f"Test Accuracy from Grid Search: {grid_accuracy:.4f}")
print(f"Grid Search Time: {grid_search_time:.2f} seconds")

Test Accuracy from Grid Search: 0.2658
Grid Search Time: 11.23 seconds


In [19]:
# Final Evaluation

# Display comparison results
print("\n--- Final Results Comparison ---")
print(f"PSO-NN Test Accuracy: {1 - global_best_fitness:.4f}")
print(f"Grid Search Test Accuracy: {grid_accuracy:.4f}")
print(f"PSO-NN Time: {sum([particle.fitness for particle in swarm]):.2f} seconds")
print(f"Grid Search Time: {grid_search_time:.2f} seconds")



--- Final Results Comparison ---
PSO-NN Test Accuracy: 0.3165
Grid Search Test Accuracy: 0.2658
PSO-NN Time: 7.62 seconds
Grid Search Time: 11.23 seconds


**Section	B:	Task	Description**

In [20]:
# Problem Selection
#For this task, I choose text classification in Natural Language Processing (NLP) as the problem domain. Specifically, I classify news articles from the 20 Newsgroups dataset into one of 20 categories.

# Objectives:
# Build a neural network to classify the articles.

# Compare the performance of a traditionally optimized NN model (using Grid Search) and a PSO-optimized NN model.

# Success Metrics:
#Accuracy of the model on the test set.
# Time taken for training and optimization.

In [21]:
# import library
!pip install scikit-learn





[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [22]:
# Install Required Libraries

!pip install scikit-learn numpy tensorflow pyswarm





[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [23]:
# Load and Preprocess the Dataset

import numpy as np
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score

# Load dataset
newsgroups = fetch_20newsgroups(subset='all')
X, y = newsgroups.data, newsgroups.target

# Convert text to numerical features using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_tfidf = vectorizer.fit_transform(X).toarray()

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.3, random_state=42)



In [24]:
# Build a Simple Neural Network

def create_nn(learning_rate=0.01, hidden_neurons=100):
    """Creates and compiles a simple neural network."""
    model = Sequential([
        Dense(hidden_neurons, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(len(set(y)), activation='softmax')
    ])
    
    model.compile(optimizer=keras.optimizers.Adam(learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Train and evaluate the traditional model
baseline_model = create_nn()
baseline_model.fit(X_train, y_train, epochs#=5, batch_size=32, verbose=0)
y_pred_baseline = np.argmax(baseline_model.predict(X_test), axis=1)

# Calculate baseline accuracy
baseline_acc = accuracy_score(y_test, y_pred_baseline)
print(f"Baseline Neural Network Accuracy: {baseline_acc:.4f}")



[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Baseline Neural Network Accuracy: 0.8834


In [25]:
#  to Add Grid Search for Hyperparameter Optimization

from sklearn.model_selection import ParameterGrid

# Define hyperparameter grid
param_grid = {
    'learning_rate': [0.001, 0.01, 0.1],  # Different learning rates to try
    'hidden_neurons': [50, 100, 150]      # Different number of neurons
}

# Iterate over all parameter combinations
best_acc = 0
best_params = None

for params in ParameterGrid(param_grid):
    model = create_nn(learning_rate=params['learning_rate'], hidden_neurons=params['hidden_neurons'])
    model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)
    
    y_pred = np.argmax(model.predict(X_test), axis=1)
    acc = accuracy_score(y_test, y_pred)
    
    if acc > best_acc:
        best_acc = acc
        best_params = params

print(f"Best Hyperparameters: {best_params}")
print(f"Best Accuracy: {best_acc:.4f}")


[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Best Hyperparameters: {'hidden_neurons': 150, 'learning_rate': 0.001}
Best Accuracy: 0.8925


In [26]:
# Optimize Using PSO

from pyswarm import pso
import numpy as np
import random
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import accuracy_score

# Define available activation functions
activation_functions = ['relu', 'tanh', 'sigmoid']

def create_nn(learning_rate, num_layers, neurons_per_layer, activation_index):
    """Creates and compiles a neural network with given hyperparameters."""
    model = Sequential()
    
    # Add first hidden layer
    model.add(Dense(int(neurons_per_layer), activation=activation_functions[int(activation_index)], input_shape=(X_train.shape[1],)))
    
    # Add additional layers based on num_layers
    for _ in range(int(num_layers) - 1):
        model.add(Dense(int(neurons_per_layer), activation=activation_functions[int(activation_index)]))
    
    # Output layer
    model.add(Dense(len(set(y)), activation='softmax'))
    
    # Compile model
    model.compile(optimizer=keras.optimizers.Adam(learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Define the fitness function for PSO
def fitness_function(params):
    learning_rate, num_layers, neurons_per_layer, activation_index = params
    num_layers = int(num_layers)
    neurons_per_layer = int(neurons_per_layer)
    activation_index = int(activation_index)
    
    model = create_nn(learning_rate, num_layers, neurons_per_layer, activation_index)
    model.fit(X_train, y_train, epochs=3, batch_size=32, verbose=0)  # Train for fewer epochs
    
    y_pred = np.argmax(model.predict(X_test), axis=1)
    return 1 - accuracy_score(y_test, y_pred)  # Minimize error

# Define search space (Lower Bound and Upper Bound)
lb = [0.0001, 1, 10, 0]  # Lower bounds (learning rate, layers, neurons, activation function index)
ub = [0.1, 3, 200, 2]    # Upper bounds (activation function index 0=relu, 1=tanh, 2=sigmoid)

# Run PSO optimization
best_params, _ = pso(fitness_function, lb, ub, swarmsize=5, maxiter=3)  # Fewer particles, fewer iterations

# Train the optimized model
optimized_model = create_nn(best_params[0], int(best_params[1]), int(best_params[2]), int(best_params[3]))
optimized_model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)
y_pred_pso = np.argmax(optimized_model.predict(X_test), axis=1)

# Calculate PSO-optimized accuracy
pso_acc = accuracy_score(y_test, y_pred_pso)
print(f"PSO-Optimized Neural Network Accuracy: {pso_acc:.4f}")
print(f"Best Hyperparameters: Learning Rate={best_params[0]:.5f}, Layers={int(best_params[1])}, Neurons={int(best_params[2])}, Activation={activation_functions[int(best_params[3])]}")




[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━

In [28]:
pip install statsmodels

Collecting statsmodels
  Downloading statsmodels-0.14.4-cp312-cp312-win_amd64.whl.metadata (9.5 kB)
Collecting patsy>=0.5.6 (from statsmodels)
  Downloading patsy-1.0.1-py2.py3-none-any.whl.metadata (3.3 kB)
Downloading statsmodels-0.14.4-cp312-cp312-win_amd64.whl (9.8 MB)
   ---------------------------------------- 0.0/9.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/9.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/9.8 MB 330.3 kB/s eta 0:00:30
    --------------------------------------- 0.2/9.8 MB 1.2 MB/s eta 0:00:08
   - -------------------------------------- 0.4/9.8 MB 2.2 MB/s eta 0:00:05
   -- ------------------------------------- 0.7/9.8 MB 3.0 MB/s eta 0:00:04
   --- ------------------------------------ 0.8/9.8 MB 3.3 MB/s eta 0:00:03
   ---- ----------------------------------- 1.1/9.8 MB 3.6 MB/s eta 0:00:03
   ----- ---------------------------------- 1.4/9.8 MB 3.9 MB/s eta 0:00:03
   ------ --------------------------------- 1.7/9.8 MB


[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [29]:
# Compare Results

from statsmodels.stats.contingency_tables import mcnemar
import numpy as np

# Generate confusion matrix for McNemar's test
contingency_table = np.zeros((2, 2))

for i in range(len(y_test)):
    baseline_correct = (y_pred_baseline[i] == y_test[i])
    pso_correct = (y_pred_pso[i] == y_test[i])
    
    if baseline_correct and pso_correct:
        contingency_table[0, 0] += 1  # Both correct
    elif baseline_correct and not pso_correct:
        contingency_table[0, 1] += 1  # Baseline correct, PSO wrong
    elif not baseline_correct and pso_correct:
        contingency_table[1, 0] += 1  # Baseline wrong, PSO correct
    else:
        contingency_table[1, 1] += 1  # Both wrong

# Apply McNemar's test
result = mcnemar(contingency_table, exact=True)

# Print results
print("\nComparison of Traditional vs PSO-Optimized Model:")
print(f"Baseline Model Accuracy: {baseline_acc:.4f}")
print(f"PSO-Optimized Model Accuracy: {pso_acc:.4f}")

# Check statistical significance
print("\nStatistical Test (McNemar's Test):")
print(f"p-value: {result.pvalue:.5f}")
if result.pvalue < 0.05:
    print("The improvement from PSO optimization is **statistically significant**.")
else:
    print("No **statistically significant** improvement from PSO optimization.")



Comparison of Traditional vs PSO-Optimized Model:
Baseline Model Accuracy: 0.8834
PSO-Optimized Model Accuracy: 0.8796

Statistical Test (McNemar's Test):
p-value: 0.21425
No **statistically significant** improvement from PSO optimization.
