#Subiectul 1 - completarea modelului AlexNet
Se va modifica doar celula corespunzatoare paragrafului 2 - "Definirea modelului". Cerinta aferenta se gaseste deasupra ei.

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from tqdm import tqdm

# Alegem configuratia sistemului (cpu/gpu)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Setam media si deviatia standard pentru normalizarea bazei de date - acestea sunt calculate la nivel de canal si doar pe baza de date de antrenare!
normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])

##1. Pre-procesarea datelor

In [None]:
def get_train_valid_loader(data_dir,
                           batch_size,
                           augment,
                           random_seed,
                           normalize,
                           valid_size=0.1,
                           shuffle=True):

    valid_transform = transforms.Compose([transforms.Resize((227,227)), transforms.ToTensor(), normalize])
    if augment:
        train_transform = transforms.Compose([transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(0.4), transforms.Resize((227,227)), transforms.ToTensor(), normalize])
    else:
        train_transform = valid_transform
     
    train_dataset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=train_transform)
    valid_dataset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=valid_transform)

    # Split train/val
    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    # Amestecam indecsii
    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    # Separam indecsii de train in train+val
    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # Cream dataloaders pentru train si val
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)
 
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


def get_test_loader(data_dir,
                    batch_size,
                    normalize,
                    shuffle=True):

    # Transformari asemanatoare cu cele pentru train/val. Normalizarea se face cu aceleasi valori ca in cazul train!
    transform = transforms.Compose([
        transforms.Resize((227,227)),
        transforms.ToTensor(),
        normalize,
    ])

    # Descarcarea bazei de test
    dataset = datasets.CIFAR10(
        root=data_dir, train=False,
        download=True, transform=transform,
    )

    # Crearea dataloader pentru test
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle
    )

    return data_loader


# Crearea efectiva a dataloaders
train_loader, valid_loader = get_train_valid_loader(
    data_dir = './data',     
    batch_size = 64,
    augment = True,                             		     
    random_seed = 1,
    normalize = normalize
)

test_loader = get_test_loader(
    data_dir = './data',
    batch_size = 64,
    normalize = normalize
)

##2. Definirea modelului

#✍\#TODO✍
 Completati modelul AlexNet de mai jos cu portiunea ramasa, si anume:
1. strat de dropout cu probabilitate 0.4
1. strat complet conectat de dimensiune $9216\times4096$
1. activare ReLU
1. strat de dropout cu probabilitate 0.4
1. strat complet conectat de dimensiune $4096 \times 1024$
1. activare ReLU
1. strat de dropout cu probabilitate 0.4
1. strat complet conectat de dimensiune $1024 \times 256$
1. activare ReLU
1. strat complet conectat de dimensiune $256 \times num\_classes$

In [None]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        # de completat
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        # de completat

        return out

##3. Antrenarea retelei

In [None]:
# Alegerea hiperparametrilor
num_classes = 10
num_epochs = 20
batch_size = 64
learning_rate = 0.005

# Trecerea modelului pe gpu
model = AlexNet(num_classes).to(device)


# Alegerea functiei de pierdere. Clasificare de imagini => cross-entropy
criterion = nn.CrossEntropyLoss()
# Alegerea optimizatorului
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)  

# Antrenarea modelului
total_step = len(train_loader)

for epoch in tqdm(range(num_epochs)):
    for i, (images, labels) in enumerate(train_loader):  
        # Incarcam tensorii pe gpu/cpu
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward propagation
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backprop si rularea unui pas de optimizare a ponderilor
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
            
    # Rularea algoritmului pe baza de validare
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
    
        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total)) 
