In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import torch
from torch import nn
import torchvision as tv
import time

In [None]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [None]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X, y = X.to(dev), y.to(dev)
            trainer.zero_grad()
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            #print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
            #    (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net,dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))
        


In [None]:
transoforms = tv.transforms.Compose([
    tv.transforms.Grayscale(3),
    tv.transforms.Resize((224,224)),
    tv.transforms.RandomResizedCrop((224,224), scale=(0.9, 1.0), ratio=(0.9,1.1), interpolation=2),
    tv.transforms.ToTensor()
])



In [None]:
face_dataset_train = tv.datasets.ImageFolder(root='/kaggle/input/face-expression-recognition-dataset/images/train',
                                           transform=transoforms)
dataset_train_loader = torch.utils.data.DataLoader(face_dataset_train,
                                             batch_size=64, shuffle=True)

face_dataset_test = tv.datasets.ImageFolder(root='/kaggle/input/face-expression-recognition-dataset/images/validation',
                                           transform=transoforms)
dataset_test_loader = torch.utils.data.DataLoader(face_dataset_test,
                                             batch_size=64, shuffle=True)

In [None]:
dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
dev

In [None]:
# Попробуем модель resnet18

In [43]:
model = tv.models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = False
    
model.fc = nn.Linear(in_features=512, out_features=7) 

model = model.to(dev)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/checkpoints/resnet18-5c106cde.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 205MB/s]


In [None]:
# В общей сложности прошо 25 эпох и значение loss только колебался

In [44]:
num_epoch = 10

trainer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
train(model, dataset_train_loader, dataset_test_loader, trainer, num_epoch,dev)

epoch 1, loss 7.3438, train acc 0.297, test acc 0.230, time 119.9 sec
epoch 2, loss 5.7989, train acc 0.324, test acc 0.244, time 119.6 sec
epoch 3, loss 6.4226, train acc 0.329, test acc 0.310, time 119.0 sec
epoch 4, loss 5.3514, train acc 0.337, test acc 0.228, time 119.6 sec
epoch 5, loss 6.0385, train acc 0.330, test acc 0.301, time 120.0 sec


KeyboardInterrupt: 

In [None]:
# Попробуем резнет50

In [None]:
model = tv.models.resnet50(pretrained=True)

for param in model.parameters():
    param.requires_grad = False
    
model.fc = nn.Sequential(
    nn.BatchNorm1d(2048),
    nn.Linear(in_features=2048, out_features=512),
    nn.ReLU(),
    nn.Dropout(0.1),

    nn.Linear(512, 7)
)
model = model.to(dev)

In [None]:
num_epoch = 10

trainer = torch.optim.AdamW(model.parameters(), lr=0.01)
train(model, dataset_train_loader, dataset_test_loader, trainer, num_epoch,dev)

In [None]:
# Вывод не сохранился из-за глюка на kaggle

In [None]:
train(model, dataset_train_loader, dataset_test_loader, trainer, num_epoch,dev)

In [None]:
lr = 0.001

for param in model.layer4.parameters():
    param.requires_grad = True
    
trainer = torch.optim.SGD([
                {'params': model.layer4.parameters(), 'lr': lr/50},
                {'params': model.fc.parameters()}
            ], lr=lr, momentum=0.9)

In [None]:
train(model, dataset_train_loader, dataset_test_loader, trainer, num_epoch,dev)

In [None]:
lr = 0.0005

for param in model.layer3.parameters():
    param.requires_grad = True
    
trainer = torch.optim.SGD([
                {'params': model.layer3.parameters(), 'lr': lr/100},
                {'params': model.layer4.parameters(), 'lr': lr/50},
                {'params': model.fc.parameters()}
            ], lr=lr, momentum=0.9)

In [None]:
train(model, dataset_train_loader, dataset_test_loader, trainer, num_epoch,dev)

In [None]:
lr = 0.0001

for param in model.layer3.parameters():
    param.requires_grad = True
    
trainer = torch.optim.SGD([
                {'params': model.layer3.parameters(), 'lr': lr/100},
                {'params': model.layer4.parameters(), 'lr': lr/50},
                {'params': model.fc.parameters()}
            ], lr=lr, momentum=0.9)

In [None]:
train(model, dataset_train_loader, dataset_test_loader, trainer, num_epoch,dev)

In [24]:
lr = 0.00005

for param in model.layer2.parameters():
    param.requires_grad = True
    
trainer = torch.optim.SGD([
                {'params': model.layer2.parameters(), 'lr': lr/100},
                {'params': model.layer3.parameters(), 'lr': lr/75},
                {'params': model.layer4.parameters(), 'lr': lr/50},
                {'params': model.fc.parameters()}
            ], lr=lr, momentum=0.9)

In [None]:
# В общей сложности прошло около 100 эпох 

In [25]:
train(model, dataset_train_loader, dataset_test_loader, trainer, num_epoch,dev)

epoch 1, loss 0.1338, train acc 0.956, test acc 0.648, time 221.8 sec
epoch 2, loss 0.1311, train acc 0.954, test acc 0.651, time 221.4 sec
epoch 3, loss 0.1255, train acc 0.957, test acc 0.656, time 221.9 sec
epoch 4, loss 0.1219, train acc 0.959, test acc 0.652, time 222.7 sec
epoch 5, loss 0.1219, train acc 0.959, test acc 0.653, time 222.2 sec
epoch 6, loss 0.1127, train acc 0.961, test acc 0.648, time 222.1 sec
epoch 7, loss 0.1125, train acc 0.963, test acc 0.651, time 222.4 sec
epoch 8, loss 0.1114, train acc 0.962, test acc 0.648, time 223.9 sec
epoch 9, loss 0.1069, train acc 0.964, test acc 0.659, time 222.5 sec
epoch 10, loss 0.1031, train acc 0.965, test acc 0.650, time 222.0 sec


In [None]:
# Попробуем VGG16

In [36]:
model = tv.models.vgg16(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

for param in model.classifier.parameters():
    param.requires_grad = True    
    
model.classifier[6] = nn.Linear(in_features=4096, out_features=7, bias=True)

model = model.to(dev)

In [None]:
# После 20 эпох улучшения небыло

In [38]:
num_epoch = 10

trainer = torch.optim.AdamW(model.parameters(), lr=0.01)
train(model, dataset_train_loader, dataset_test_loader, trainer, num_epoch,dev)

epoch 1, loss 7.1014, train acc 0.224, test acc 0.258, time 198.4 sec
epoch 2, loss 2.1731, train acc 0.239, test acc 0.252, time 200.3 sec
epoch 3, loss 2.1207, train acc 0.243, test acc 0.207, time 199.7 sec
epoch 4, loss 2.0465, train acc 0.245, test acc 0.258, time 197.4 sec
epoch 5, loss 2.0211, train acc 0.247, test acc 0.260, time 196.2 sec
epoch 6, loss 2.0192, train acc 0.247, test acc 0.258, time 196.9 sec
epoch 7, loss 2.1742, train acc 0.247, test acc 0.258, time 197.0 sec
epoch 8, loss 1.8737, train acc 0.248, test acc 0.258, time 195.5 sec
epoch 9, loss 1.8183, train acc 0.248, test acc 0.258, time 194.5 sec
epoch 10, loss 2.1686, train acc 0.248, test acc 0.258, time 195.8 sec


In [None]:
# Попробуем VGG19

In [40]:
model = tv.models.vgg19(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

for param in model.classifier.parameters():
    param.requires_grad = True    
    
model.classifier[6] = nn.Linear(in_features=4096, out_features=7, bias=True)

model = model.to(dev)

In [None]:
# на 15 эпохе  улучшения небыло

In [42]:
trainer = torch.optim.AdamW(model.parameters(), lr=0.01)
train(model, dataset_train_loader, dataset_test_loader, trainer, 10,dev)

epoch 1, loss 5.7900, train acc 0.232, test acc 0.265, time 208.8 sec
epoch 2, loss 1.9354, train acc 0.245, test acc 0.258, time 207.8 sec
epoch 3, loss 1.9215, train acc 0.246, test acc 0.251, time 208.4 sec
epoch 4, loss 2.0234, train acc 0.246, test acc 0.251, time 208.0 sec
epoch 5, loss 1.9068, train acc 0.246, test acc 0.258, time 207.6 sec


KeyboardInterrupt: 

In [None]:
# Попробуем resnet152

In [8]:
model = tv.models.resnet152(pretrained=True)

for param in model.parameters():
    param.requires_grad = False
    
model.fc = nn.Sequential(
    nn.BatchNorm1d(2048),
    nn.Linear(in_features=2048, out_features=512),
    nn.ReLU(),
    nn.Dropout(0.1),

    nn.Linear(512, 7)
)
model = model.to(dev)

Downloading: "https://download.pytorch.org/models/resnet152-b121ed2d.pth" to /root/.cache/torch/checkpoints/resnet152-b121ed2d.pth
100%|██████████| 230M/230M [00:02<00:00, 82.8MB/s] 


In [11]:
lr = 0.001

for param in model.layer4.parameters():
    param.requires_grad = True
    
trainer = torch.optim.SGD([
                {'params': model.layer4.parameters(), 'lr': lr/50},
                {'params': model.fc.parameters()}
            ], lr=lr, momentum=0.9)

In [12]:
train(model, dataset_train_loader, dataset_test_loader, trainer, 10,dev)

epoch 1, loss 2.2153, train acc 0.359, test acc 0.398, time 295.1 sec
epoch 2, loss 1.4881, train acc 0.430, test acc 0.449, time 239.7 sec
epoch 3, loss 1.4063, train acc 0.466, test acc 0.460, time 240.9 sec
epoch 4, loss 1.3551, train acc 0.491, test acc 0.472, time 241.3 sec
epoch 5, loss 1.2972, train acc 0.510, test acc 0.458, time 241.3 sec
epoch 6, loss 1.2551, train acc 0.529, test acc 0.494, time 241.8 sec
epoch 7, loss 1.1967, train acc 0.550, test acc 0.516, time 242.3 sec
epoch 8, loss 1.1458, train acc 0.572, test acc 0.524, time 240.4 sec
epoch 9, loss 1.1069, train acc 0.590, test acc 0.545, time 239.8 sec
epoch 10, loss 1.0458, train acc 0.612, test acc 0.537, time 239.8 sec


In [13]:
lr = 0.0001

for param in model.layer3.parameters():
    param.requires_grad = True
    
trainer = torch.optim.SGD([
                {'params': model.layer3.parameters(), 'lr': lr/75},
                {'params': model.layer4.parameters(), 'lr': lr/50},
                {'params': model.fc.parameters()}
            ], lr=lr, momentum=0.9)

In [14]:
train(model, dataset_train_loader, dataset_test_loader, trainer, 10,dev)

epoch 1, loss 0.8948, train acc 0.662, test acc 0.590, time 369.5 sec
epoch 2, loss 0.8053, train acc 0.697, test acc 0.598, time 367.5 sec
epoch 3, loss 0.7506, train acc 0.714, test acc 0.613, time 369.0 sec
epoch 4, loss 0.7115, train acc 0.729, test acc 0.610, time 370.4 sec
epoch 5, loss 0.6751, train acc 0.740, test acc 0.618, time 367.9 sec
epoch 6, loss 0.6404, train acc 0.753, test acc 0.621, time 366.6 sec
epoch 7, loss 0.6119, train acc 0.768, test acc 0.625, time 366.9 sec
epoch 8, loss 0.5802, train acc 0.777, test acc 0.627, time 367.2 sec
epoch 9, loss 0.5630, train acc 0.787, test acc 0.628, time 365.8 sec
epoch 10, loss 0.5262, train acc 0.798, test acc 0.620, time 366.8 sec


In [15]:
lr = 0.00005

for param in model.layer2.parameters():
    param.requires_grad = True
    
trainer = torch.optim.SGD([
                {'params': model.layer2.parameters(), 'lr': lr/100},
                {'params': model.layer3.parameters(), 'lr': lr/75},
                {'params': model.layer4.parameters(), 'lr': lr/50},
                {'params': model.fc.parameters()}
            ], lr=lr, momentum=0.9)

In [None]:
train(model, dataset_train_loader, dataset_test_loader, trainer, 10,dev)

epoch 1, loss 0.4970, train acc 0.811, test acc 0.627, time 405.7 sec
epoch 2, loss 0.4757, train acc 0.819, test acc 0.623, time 406.4 sec
epoch 3, loss 0.4603, train acc 0.825, test acc 0.630, time 407.3 sec
epoch 4, loss 0.4473, train acc 0.829, test acc 0.636, time 406.5 sec
epoch 5, loss 0.4331, train acc 0.835, test acc 0.632, time 407.5 sec
epoch 6, loss 0.4160, train acc 0.844, test acc 0.629, time 406.2 sec
epoch 7, loss 0.3992, train acc 0.849, test acc 0.626, time 406.8 sec


In [1]:
# на 18 эпохе этого обучения сввязь с kaggle разорвалась
# значение точности на тесте начало ухудшаться, началось переобучение

In [None]:
# В итоге лучше всего на тесте показала себя модель resnet50