# HW 4

### Done in collboration between Quentin Phillips and Steven Jia

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
!pip install torch-pso
from torch_pso import ParticleSwarmOptimizer



In [None]:
# Get gpu, mps or cpu device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [None]:
def get_train_valid_loader(data_dir,
                           batch_size,
                           augment,
                           random_seed,
                           valid_size=0.1,
                           shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    valid_transform = transforms.Compose([
            transforms.Resize((128,128)),
            transforms.ToTensor(),
            normalize,
    ])
    if augment:
        train_transform = transforms.Compose([
            transforms.Resize((128,128)),
            transforms.RandomCrop(120, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=train_transform,
    )

    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=valid_transform,
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


def get_test_loader(data_dir,
                    batch_size,
                    shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # define transform
    transform = transforms.Compose([
        transforms.Resize((128,128)),
        transforms.ToTensor(),
        normalize,
    ])

    dataset = datasets.CIFAR10(
        root=data_dir, train=False,
        download=True, transform=transform,
    )

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle
    )

    return data_loader


# dataset
train_loader, valid_loader = get_train_valid_loader(data_dir = './data',                                      batch_size = 64,
                       augment = True,random_seed = 123)

test_loader = get_test_loader(data_dir = './data',
                              batch_size = 72)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [None]:
class model1(nn.Module):
    def __init__(self, num_classes=10):
        super(model1, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(1024, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [None]:
def NNRun(model, optimizer, criterion, num_epochs = 20):
  for epoch in range(num_epochs):
      for i, (images, labels) in enumerate(train_loader):
          # Move tensors to the configured device
          images = images.to(device)
          labels = labels.to(device)

          # Forward pass
          outputs = model(images)
          loss = criterion(outputs, labels)

          # Backward and optimize
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

  with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    return correct / total

In [None]:
# PSO algorithm

def particle_swarm_optimization(num_dimensions, num_particles, max_iter,i_min=-10,i_max=10,bounds=None,w=0.5,c1=0.25,c2=0.75):
    # Initialize the particles
    # This creates a data structure such as a dictionary

    if bounds is None:
        particles = [({'position': [np.random.uniform(i_min, i_max) for _ in range(num_dimensions)],
                    'velocity': [np.random.uniform(-1, 1) for _ in range(num_dimensions)],
                    'pbest': 0,
                    'pbest_position': [0, 0]})
                    for _ in range(num_particles)]
    else:
        particles = [({'position': [np.random.uniform(bounds[i][0], bounds[i][1]) for i in range(num_dimensions)],
                    'velocity': [np.random.uniform(-1, 1) for _ in range(num_dimensions)],
                    'pbest': 0,
                    'pbest_position': [0, 0]})
                    for _ in range(num_particles)]

    # Initialize global best
    gbest_value = 0
    gbest_position = [0, 0]

    for _ in range(max_iter):
        for particle in particles:
            position = particle['position']
            velocity = particle['velocity']

            # Calculate the current value
            model =  model1(num_classes=10).to(device)
            optimizer = torch.optim.SGD(model.parameters(), lr=position[0], weight_decay=position[1])
            criterion = nn.CrossEntropyLoss()
            current_value = NNRun(model, optimizer, criterion)

            print(f"position: {position} cr: {current_value} pbest: {particle['pbest']} gbest: {gbest_value}")

            # Update personal best
            if current_value > particle['pbest']:
                particle['pbest'] = current_value
                particle['pbest_position'] = position.copy()
                print(f" if statement part - position: {position} cr: {current_value}")

            # Update global best
            if current_value > gbest_value:
                gbest_value = current_value
                gbest_position = position.copy()
                print(gbest_value)

            # Update particle's velocity and position
            for i in range(num_dimensions):
                r1, r2 = np.random.uniform(), np.random.uniform()
                velocity[i] = w * velocity[i] + c1*r1 * (particle['pbest_position'][i] - position[i]) + c2*r2 * (gbest_position[i] - position[i])
                position[i] += velocity[i]
                # legalize the values to the provided bounds
                if bounds is not None:
                    position[i] = np.clip(position[i],bounds[i][0],bounds[i][1])

    return gbest_position, gbest_value

In [34]:
particle_swarm_optimization(num_dimensions=2, num_particles=7, max_iter=3, bounds=[(0.1, 0.3), (0.0001, 0.01)])

position: [0.2134304219883172, 0.007092218140461653] cr: 0.3222 pbest: 0 gbest: 0
 if statement part - position: [0.2134304219883172, 0.007092218140461653] cr: 0.3222
0.3222
position: [0.12558547816999002, 0.008697257626510899] cr: 0.5342 pbest: 0 gbest: 0.3222
 if statement part - position: [0.12558547816999002, 0.008697257626510899] cr: 0.5342
0.5342
position: [0.13998795433743189, 0.00010326280096908705] cr: 0.8409 pbest: 0 gbest: 0.5342
 if statement part - position: [0.13998795433743189, 0.00010326280096908705] cr: 0.8409
0.8409
position: [0.20640656424164378, 0.0029712282306877166] cr: 0.597 pbest: 0 gbest: 0.8409
 if statement part - position: [0.20640656424164378, 0.0029712282306877166] cr: 0.597
position: [0.2832350070623105, 0.001959581477573873] cr: 0.3378 pbest: 0 gbest: 0.8409
 if statement part - position: [0.2832350070623105, 0.001959581477573873] cr: 0.3378
position: [0.1777089116931845, 0.0021284828449193846] cr: 0.477 pbest: 0 gbest: 0.8409
 if statement part - positi

([0.13998795433743189, 0.00010326280096908705], 0.8409)