<a href="https://colab.research.google.com/github/Afchis/test/blob/main/testcv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!wget --random-wait -P ./data_georg/non_georges/ -i ./drive/MyDrive/test_assignment_cv_engineer_data/non_georges.csv 
!wget --random-wait -P ./data_georg/georges/ -i ./drive/MyDrive/test_assignment_cv_engineer_data/georges.csv 

In [1]:
import os

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms, models

to_pill = transforms.ToPILImage()

In [38]:
# Args:
IMG_SIZE = 224
BATCH_SIZE = 16
NUM_WORKERS = 8
SPLIT_DATA = 0.8
LEANING_RATE = 0.005
EPOCHS = 20

LABELS = {0 : "george",
          1 : "non_george"}

In [39]:
# Data Preparation:
transform = transforms.Compose([transforms.Resize((IMG_SIZE, IMG_SIZE), interpolation=0),
                                transforms.RandomHorizontalFlip(p=0.5),
                                transforms.RandomRotation(45),
                                transforms.ToTensor()])

data = datasets.ImageFolder(os.path.join("/content/data_georg"), transform=transform)


train_size = int(SPLIT_DATA * len(data))
valid_size = len(data) - train_size
train_data, valid_data = torch.utils.data.random_split(data, [train_size, valid_size])

valid_size = int(SPLIT_DATA * valid_size)
test_size = len(data) - train_size - valid_size
valid_data, test_data = torch.utils.data.random_split(valid_data, [valid_size, test_size])

print("Full dataset len: ", len(data))
print("Train dataset len: " , len(train_data))
print("Valid dataset len: ", len(valid_data))
print("Test dataset len", len(test_data))

train_loader = DataLoader(dataset=train_data,
                          batch_size=BATCH_SIZE,
                          num_workers=NUM_WORKERS,
                          shuffle=True)
valid_loader = DataLoader(dataset=valid_data,
                          batch_size=BATCH_SIZE,
                          num_workers=NUM_WORKERS,
                          shuffle=False)
test_loader = DataLoader(dataset=test_data,
                         batch_size=1,
                         num_workers=NUM_WORKERS,
                         shuffle=False)

Full dataset len:  5700
Train dataset len:  4560
Valid dataset len:  912
Test dataset len 228


In [40]:
# Create model
class ResNet50(nn.Module):
    def __init__(self, pretrained=False):
        super().__init__()
        self.resnet50 = models.resnet50(pretrained=pretrained)
        self.resnet50_layers = list(self.resnet50.children())
        self.cnn = nn.Sequential(*self.resnet50_layers[:-1])
        self.fc = nn.Linear(in_features=2048, out_features=1, bias=True)
        self.sigmoid = nn.Sigmoid()
      
    def forward(self, x):
        out = self.cnn(x)
        out = self.fc(out.reshape(out.size(0), -1))
        out = self.sigmoid(out)
        return out

In [41]:
# Init model, optimizer, scheluler
model = ResNet50(pretrained=True).cuda()

optimizer = torch.optim.SGD(model.parameters(), lr=LEANING_RATE)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.8)

criterion = nn.BCELoss()

In [46]:
def train():
    for epoch in range(EPOCHS):
        model.train()
        loss_acc = 0
        for iter, data in enumerate(train_loader):
            iter += 1
            img, label = data
            img, label = img.cuda(), label.float().cuda()
            out = model(img)
            loss = criterion(out, label.reshape(out.size()))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_acc += loss.item()
            loss_train = loss_acc / iter
            print(" "*70, end="\r")
            print("Process ...", "Train iter:", iter, "loss:", loss_train, end="")
        model.eval()
        loss_acc = 0
        for iter, data in enumerate(valid_loader):
            iter += 1
            img, label = data
            img, label = img.cuda(), label.float().cuda()
            out = model(img)
            loss = criterion(out, label.reshape(out.size()))
            loss_acc += loss.item()
            loss_valid = loss_acc / iter
            print(" "*70, end="\r")
            print("Process ...", "Valid iter:", iter, "loss:", loss_valid, end="")
        scheduler.step()
        print(" "*70, end="\r")
        print("Epoch:", epoch, "Train loss:", loss_train, "Valid loss:", loss_valid)


def test():
    correct = 0
    for iter, data in enumerate(test_loader):
        iter += 1
        img, label = data
        img, label = img.cuda(), label.float().cuda()
        out = model(img)
        pred = (out>0.5).float()
        if pred == label: 
            correct += 1.
    accuracy = correct / iter
    print("Model acсuracy: %0.2f" % (accuracy*100))


def inference(idx, labels=LABELS, test_data=test_data):
    pred = model(test_data[idx][0].unsqueeze(0).cuda())
    print(labels[(pred > 0.5).item()])
    # return to_pill(test_data[idx][0])

In [43]:
train()

Epoch: 0 Train loss: 0.4957988835740508 Valid loss: 0.36974116663138074
Epoch: 1 Train loss: 0.3353715497150756 Valid loss: 0.3032502784279355
Epoch: 2 Train loss: 0.27026416324732594 Valid loss: 0.26769989245293435
Epoch: 3 Train loss: 0.2393423610089118 Valid loss: 0.25256026797650155
Epoch: 4 Train loss: 0.21504985377715344 Valid loss: 0.24330604475056916
Epoch: 5 Train loss: 0.18959443934523224 Valid loss: 0.24036219075583576
Epoch: 6 Train loss: 0.187611606501435 Valid loss: 0.23423817747256212
Epoch: 7 Train loss: 0.1615837683574411 Valid loss: 0.23853310436141073
Epoch: 8 Train loss: 0.1641975353030782 Valid loss: 0.22662079840767801
Epoch: 9 Train loss: 0.15383789247195973 Valid loss: 0.2394308867423158
Epoch: 10 Train loss: 0.14900961628739248 Valid loss: 0.22489245128082602
Epoch: 11 Train loss: 0.1317476462507457 Valid loss: 0.241335317623197
Epoch: 12 Train loss: 0.13808104912832117 Valid loss: 0.24278639907246097
Epoch: 13 Train loss: 0.12994026850843637 Valid loss: 0.2264

In [44]:
test()

Model acсuracy: 91.67


In [47]:
inference(19)

non_george
