In [None]:
%matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
path_to_zip_file = "/content/gdrive/MyDrive/ethnicity/fairface-img-margin025-trainval.zip"
directory_to_extract_to = "/content/data/"

In [None]:
import zipfile
with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
    zip_ref.extractall(directory_to_extract_to)

In [None]:
from torchvision import models
import torch
from torchvision import transforms
from PIL import Image
from dataset_loader import gen_dataloader, make_class_to_index
import time
import copy
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import os
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm



In [None]:
writer = SummaryWriter()

In [None]:
train_data_path = "/content/data/train/"
val_data_path = "/content/data/val/"
models_path = "/content/gdrive/MyDrive/ethnicity/models/"
train_labels_path = "/content/gdrive/MyDrive/ethnicity/fairface_label_train.csv"
val_labels_path = "/content/gdrive/MyDrive/ethnicity/fairface_label_val.csv"

In [None]:
data_transforms = {
    'train': transforms.Compose([
        #transforms.ToPILImage(),
        transforms.RandomResizedCrop(224), # проверить кроп 
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        #transforms.ToPILImage(),
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}



In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

In [None]:
tb = None

In [None]:
def train_model(model, dataloaders, criterion, optimizer, scheduler, dataset_sizes, num_epochs=25, tb=tb, log_interval=10):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for batch_idx, (inputs, labels) in tqdm(enumerate(dataloaders[phase])):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                if batch_idx % log_interval == 0:
  
                  # This is where I'm recording to Tensorboard
                  #tb.save_value('Train Loss', 'train_loss', self.globaliter, loss.item())
                  writer.add_scalar("Loss/train", loss.item(), batch_idx)
                  print(f'{batch_idx} Loss: {loss.item():.4f}')


            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())


    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
dataloader_train, train_size, classes = gen_dataloader(train_data_path, train_labels_path,
                            transformers=data_transforms["train"], batch_size=128)

dataloader_val, val_size, classes = gen_dataloader(val_data_path, val_labels_path,
                            transformers=data_transforms["val"], batch_size=128)

dataloaders = {
    "train": dataloader_train,
    "val": dataloader_val
}
dataset_sizes = {'train': train_size, 'val': val_size}
dataset_sizes

In [None]:
model_ft = models.resnet50(pretrained=True)

In [None]:
# !tensorboard --logdir=runs

In [None]:
# num_ftrs = model_ft.fc.in_features
# # Here the size of each output sample is set to 2.
# # Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
# model_ft.fc = nn.Linear(num_ftrs, len(classes))

for param in model_ft.parameters():
    param.requires_grad = False

model_ft.fc = nn.Sequential(
    nn.Linear(2048, 128),
    nn.ReLU(inplace=True),
    nn.Linear(128, len(classes))).to(device)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) # Проверить lr/ переделать на adam

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) # Попробовать убрать

In [None]:
model_ft = train_model(model_ft, dataloaders, criterion, optimizer_ft, exp_lr_scheduler,
                       dataset_sizes, num_epochs=25)
writer.flush()

In [None]:
model_ft.save(os.path.join(models_path, "model_25.pth"))