<a href="https://colab.research.google.com/github/Julialunna/Artificial-Intelligence/blob/main/DP-PSO-SGD/DP_wine_PSO_SGD_trained.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.optim as optim
import copy
import random

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from pydp.algorithms.numerical_mechanisms import GaussianMechanism


In [2]:
!pip install python-dp

Collecting python-dp
  Downloading python_dp-1.1.4-cp311-cp311-manylinux1_x86_64.whl.metadata (5.1 kB)
Downloading python_dp-1.1.4-cp311-cp311-manylinux1_x86_64.whl (3.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-dp
Successfully installed python-dp-1.1.4


In [4]:
#preparing dataset
wine = load_wine()
# x is the carachteristics and y the labels
x = wine.data
y = wine.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Padronizar os dados para média zero e variância unitária, melhora o treinamento
#transforming trains and tests munpy arrays to tensors
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)  # Padroniza os dados de treino
x_test = scaler.transform(x_test)  # Padroniza os dados de teste (usando os mesmos parâmetros do treino)

# Converter para tensores do PyTorch
x_train= torch.tensor(x_train, dtype=torch.float32)
x_test = torch.tensor(x_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

In [5]:
#defining model
#herda de torch.nn.Module
class MLP(torch.nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    #chama superclasse
    super(MLP, self).__init__()
    self.fc1 = torch.nn.Linear(input_size, 128)
    self.fc2 = torch.nn.Linear(128, 64)
    self.fc3 = torch.nn.Linear(64, 32)
    self.fc4 = torch.nn.Linear(32, output_size)
    #define o comportamento da rede neural
  def forward(self, x):
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = torch.relu(self.fc3(x))
    x = self.fc4(x)
    return x



In [14]:
class Particle:
    def __init__(self, model, device):
        self.model = copy.deepcopy(model).to(device)
        self.best_model = copy.deepcopy(model).to(device)
        # self.position = {name: torch.zeros_like(param).to(device) for name, param in model.named_parameters()}
        # self.velocity = {name: torch.zeros_like(param).to(device) for name, param in model.named_parameters()}

        # Definir os limites do espaço de busca e a escala da velocidade
        #DIMINUIR LOW E HIGH
        low = -10.0  # Limite inferior do espaço de busca
        high = 10.0  # Limite superior do espaço de busca
        velocity_scale = 0.1  # Escala para as velocidades iniciais

        # Inicializar a posição com valores aleatórios uniformes no intervalo [low, high]
        self.position = {name: torch.rand_like(param).to(device) * (high - low) + low for name, param in model.named_parameters()}

        # Inicializar a velocidade com valores aleatórios pequenos (normalmente distribuídos)
        self.velocity = {name: torch.randn_like(param).to(device) * velocity_scale for name, param in model.named_parameters()}

        self.best_score = float('inf')
        self.device = device

        # Inicializar o otimizador (por exemplo, Adam)
        #self.optimizer = optim.Adam(self.model.parameters(), lr=0.0001)
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.0001, weight_decay=1e-5)

    def pso(self, global_best_model, inertia, c1, c2):
        for name, param in self.model.named_parameters():
            # if param.grad is None:
            #     continue

            local_rand = random.random()
            global_rand = random.random()

            # Atualização da velocidade
            self.velocity[name] = (
                inertia*self.velocity[name]
                + c1*local_rand*(self.best_model.state_dict()[name].to(self.device) - param.data)
                + c2*global_rand*(global_best_model.state_dict()[name].to(self.device) - param.data)
            )

            #calculating and adding noise
            epsilon = 1.0
            delta = 1e-5
            sensitivity = 3
            gaussian_mech = GaussianMechanism(epsilon, delta, sensitivity)
            noise_tensor = torch.zeros_like(self.velocity[name])  # Cria um tensor de zeros com a mesma forma

            if len(self.velocity[name].shape) == 2:  # Tensor 2D ([128, 13])
              for i in range(self.velocity[name].shape[0]):  # Itera sobre as linhas
                for j in range(self.velocity[name].shape[1]):  # Itera sobre as colunas
                # Gera um ruído diferente para cada posição [i, j]
                  noise = gaussian_mech.add_noise(0)
                  self.velocity[name][i, j] += noise

            elif len(self.velocity[name].shape) == 1:  # Tensor 1D ([128])
              for i in range(self.velocity[name].shape[0]):  # Itera sobre os elementos
            # Gera um ruído diferente para cada posição [i]
                noise = gaussian_mech.add_noise(0)
                self.velocity[name][i] += noise

            # Gerar ruído diretamente para todos os elementos do tensor
            #sigma = sensitivity * torch.sqrt(torch.tensor(2.0 * torch.log(torch.tensor(1.0 / delta)))) / epsilon

            # Gerar ruído diretamente com a distribuição normal do PyTorch (muito mais eficiente!)
            #noise = torch.normal(mean=0, std=sigma, size=self.velocity[name].shape, device=self.device)


            self.velocity[name] += noise_tensor
            #clipping velocity
            self.velocity[name] = torch.clamp(self.velocity[name], -1.5, 1.5)

            # Atualizar posição
            self.position[name] = param.data + self.velocity[name]
            param.data = self.position[name]

    def evaluate_weights(self, x, y, criterion):
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(x)
            loss = criterion(outputs, y)
            _, predicted = torch.max(outputs.data, 1)
            acc = (predicted == y).sum().item() / len(x)
        return loss.item(), acc*100


In [7]:
# Definir os hiperparâmetros do PSO e do Adam
pop_size = 10
num_epochs = 150
#inertia = 0.9
c1, c2 = 0.8, 0.9
learning_rate = 0.008
beta1, beta2 = 0.9, 0.999
epsilon = 1e-8

In [15]:
model = MLP(input_size=x_train.size()[1], hidden_size=128, output_size=3)

particles = [Particle(model, device) for _ in range(pop_size)]

global_best_model = copy.deepcopy(particles[0].model)
global_best_score = float('inf')

criterion = torch.nn.CrossEntropyLoss()

# Inicializar m e v para Adam
overall_global_best_accuracy = 0.0
overall_global_best_model = copy.deepcopy(global_best_model)

# Loop de treinamento do PSO
for epoch in range(num_epochs):
    inertia = 0.9 - ((0.9-0.4)/num_epochs)*epoch
    for particle in particles:
        # Colocar o modelo em modo de treinamento
        particle.model.train()

        particle.optimizer.zero_grad()

        # Treinar a partícula (atualização de posição)
        particle.pso(global_best_model, inertia, c1, c2)

        outputs = particle.model(x_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        particle.optimizer.step()

        val_loss, val_acc = particle.evaluate_weights(x_train, y_train, criterion)

        # Avaliar a partícula e atualizar o local best

        if val_loss < particle.best_score:
            particle.best_score = val_loss
            particle.best_model = copy.deepcopy(particle.model)

    #Determinar e atualizar o g-best (modelo global)
    best_particle = min(particles, key=lambda p: p.best_score)
    if best_particle.best_score < global_best_score:
        global_best_score = best_particle.best_score
        global_best_model = copy.deepcopy(best_particle.best_model)
    val_loss, val_accuracy = best_particle.evaluate_weights(x_test, y_test, criterion)
    if val_accuracy > overall_global_best_accuracy:
        overall_global_best_accuracy = val_accuracy
        overall_global_best_model = copy.deepcopy(best_particle.best_model)

    # Avaliar e imprimir a cada época
    if (epoch + 1) % 10 == 0:

        print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.2f}, Validation Accuracy: {val_accuracy:.2f}')
print(f'Best Models accuracy: {overall_global_best_accuracy:.2f}%')

Epoch 10/150, Validation Loss: 104524.06, Validation Accuracy: 36.11
Epoch 20/150, Validation Loss: 81216.20, Validation Accuracy: 30.56
Epoch 30/150, Validation Loss: 96129.41, Validation Accuracy: 36.11
Epoch 40/150, Validation Loss: 28831.45, Validation Accuracy: 38.89
Epoch 50/150, Validation Loss: 82050.10, Validation Accuracy: 25.00
Epoch 60/150, Validation Loss: 134831.42, Validation Accuracy: 22.22
Epoch 70/150, Validation Loss: 89520.09, Validation Accuracy: 19.44
Epoch 80/150, Validation Loss: 12485.34, Validation Accuracy: 83.33
Epoch 90/150, Validation Loss: 55408.68, Validation Accuracy: 50.00
Epoch 100/150, Validation Loss: 81900.04, Validation Accuracy: 13.89
Epoch 110/150, Validation Loss: 206224.97, Validation Accuracy: 27.78
Epoch 120/150, Validation Loss: 142040.95, Validation Accuracy: 19.44
Epoch 130/150, Validation Loss: 79012.88, Validation Accuracy: 33.33
Epoch 140/150, Validation Loss: 163398.56, Validation Accuracy: 38.89
Epoch 150/150, Validation Loss: 72398.