<a href="https://colab.research.google.com/github/05satyam/machine-leanring-small-projects/blob/main/ModelToFindBestHyperparamUsingMetaheuristicsPSO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This is a trial to find best hyper paramters using PSO as optimizer in machine mode. Credit goes to ChatGpt, which is used as reference to learn about it.

In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
# Define a simple multi-layer perceptron (MLP) model
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_units):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_units)
        self.fc2 = nn.Linear(hidden_units, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.sigmoid(x)
        x = self.fc2(x)
        return x

In [5]:
# Define the objective function for PSO
def objective_function(params):
    learning_rate, hidden_units = params
    model = MLP(input_dim=X_train.shape[1], hidden_units=round(hidden_units))
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    for epoch in range(10):
        optimizer.zero_grad()
        outputs = model(torch.tensor(X_train, dtype=torch.float32))
        loss = criterion(outputs, torch.tensor(y_train, dtype=torch.float32).view(-1, 1))
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        predicted = (model(torch.tensor(X_test, dtype=torch.float32)) > 0).float()
        accuracy = accuracy_score(y_test, predicted)

    return -accuracy  # We want to maximize accuracy, so negate it


In [6]:
# Particle Swarm Optimization (PSO) implementation
def pso(objective_function, num_particles, num_dimensions, num_iterations, domain):
    particles = np.random.uniform(domain[0], domain[1], size=(num_particles, num_dimensions))
    print("particles ", particles)
    velocities = np.random.uniform(-1, 1, size=(num_particles, num_dimensions))

    best_positions = particles.copy()
    best_values = np.array([objective_function(p) for p in particles])

    global_best_index = np.argmin(best_values)
    global_best_position = best_positions[global_best_index]
    global_best_value = best_values[global_best_index]

    for iteration in range(num_iterations):
        for i in range(num_particles):
            inertia_weight = 0.7
            cognitive_weight = 1.4
            social_weight = 1.4

            cognitive_component = cognitive_weight * np.random.rand() * (best_positions[i] - particles[i])
            social_component = social_weight * np.random.rand() * (global_best_position - particles[i])

            velocities[i] = inertia_weight * velocities[i] + cognitive_component + social_component
            particles[i] = particles[i] + velocities[i]

            particles[i] = np.clip(particles[i], domain[0], domain[1])

            current_value = objective_function(particles[i])

            if current_value < best_values[i]:
                best_values[i] = current_value
                best_positions[i] = particles[i]

                if current_value < global_best_value:
                    global_best_value = current_value
                    global_best_position = particles[i]

    return global_best_position


In [7]:
# Set parameters for PSO
num_particles = 30
num_dimensions = 2
num_iterations = 50
domain = [(0.001, 0.1), (10, 100)]  # Range for learning rate and hidden units

# Run PSO for hyperparameter tuning and architecture search
best_hyperparameters = pso(objective_function, num_particles, num_dimensions, num_iterations, domain)

print("Best Hyperparameters (learning_rate, hidden_units):", best_hyperparameters)


particles  [[ 4.459954   45.11527232]
 [ 3.58734146 72.13019651]
 [ 6.37588831 65.24413666]
 [ 6.4832217  82.90532307]
 [ 3.55287535 48.90240128]
 [ 4.8693058  18.60962974]
 [ 9.63174909 98.99871929]
 [ 5.68370641 47.29108944]
 [ 4.73530166 20.56145501]
 [ 7.35436298  2.58520632]
 [ 0.5093239  71.05274632]
 [ 7.2478964  98.72266967]
 [ 5.21902851 77.77445473]
 [ 9.05450836 64.05844374]
 [ 6.33763531 44.27972822]
 [ 9.1419694  30.01947427]
 [ 1.12760356 94.5135597 ]
 [ 7.21673519 63.81611856]
 [ 7.71844444 56.07204949]
 [ 2.38751522 93.18497104]
 [ 4.99656844 62.22052969]
 [ 7.75457284 73.14450565]
 [ 1.52981614 68.94944475]
 [ 3.00983051 21.02421861]
 [ 0.69007475 65.81251002]
 [ 1.67936669 49.04531917]
 [ 8.16459045 28.77802195]
 [ 4.97238567 85.39849638]
 [ 9.8275071  72.63955105]
 [ 7.03138167 48.99909747]]




Best Hyperparameters (learning_rate, hidden_units): [7.28205094 1.63473427]




In [8]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a simple neural network model
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim, hidden_units):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_units)
        self.fc2 = nn.Linear(hidden_units, 2)  # 2 classes: 0 and 1
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return self.softmax(x)

# Define the training and evaluation process
def train_and_evaluate(learning_rate, hidden_units):
    model = NeuralNetwork(input_dim=X_train.shape[1], hidden_units=hidden_units)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(10):
        optimizer.zero_grad()
        outputs = model(torch.tensor(X_train, dtype=torch.float32))
        loss = criterion(outputs, torch.tensor(y_train, dtype=torch.long))
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        predicted = model(torch.tensor(X_test, dtype=torch.float32))
        _, predicted_labels = torch.max(predicted, 1)
        accuracy = accuracy_score(y_test, predicted_labels)

    return accuracy

# Hyperparameter tuning using grid search
best_accuracy = 0
best_hyperparameters = (0, 0)

learning_rates = [0.001, 0.01, 0.1]
hidden_units_list = [50, 100, 150]

for lr in learning_rates:
    for hidden_units in hidden_units_list:
        accuracy = train_and_evaluate(lr, hidden_units)
        print(f"Learning Rate: {lr}, Hidden Units: {hidden_units}, Accuracy: {accuracy}")

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_hyperparameters = (lr, hidden_units)

print("Best Hyperparameters (learning_rate, hidden_units):", best_hyperparameters)


Learning Rate: 0.001, Hidden Units: 50, Accuracy: 0.64
Learning Rate: 0.001, Hidden Units: 100, Accuracy: 0.65
Learning Rate: 0.001, Hidden Units: 150, Accuracy: 0.785
Learning Rate: 0.01, Hidden Units: 50, Accuracy: 0.84
Learning Rate: 0.01, Hidden Units: 100, Accuracy: 0.87
Learning Rate: 0.01, Hidden Units: 150, Accuracy: 0.85
Learning Rate: 0.1, Hidden Units: 50, Accuracy: 0.84
Learning Rate: 0.1, Hidden Units: 100, Accuracy: 0.84
Learning Rate: 0.1, Hidden Units: 150, Accuracy: 0.84
Best Hyperparameters (learning_rate, hidden_units): (0.01, 100)
