In [6]:
import torch
import os
from torchvision import datasets, models, transforms
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import glob
import copy
import neptune
#import splitfolders # Библиотека для разделения файлов картинок на train test
import matplotlib.gridspec as gridspec
cudnn.benchmark = True

In [7]:
run = neptune.init_run(
    project="neas1231/Neas1231",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJmY2VjMGIzOS01ZjI1LTQ1MTItODQxYi0zMjIyOWIwYWI0MzIifQ==",
)

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((256 , 256)),   
        transforms.RandomRotation(10),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((256 , 256)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = 'E:\datasets\dataset'
run["config/data_dir"] = data_dir

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
              for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=16,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) 
              for x in ['train', 'val']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

https://app.neptune.ai/neas1231/Neas1231/e/NEAS-40


In [8]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=14):
    since = time.time()
    #Сохраняем  лучшие веса 
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_loss = 99
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)


        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   

            running_loss = 0.0
            running_corrects = 0

            # перенос  вычислений на куду 
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                
                optimizer.zero_grad()

                #
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    acc = (torch.sum(preds == labels.data)) / len(inputs)
                    run["logs/training/batch/loss"].append(loss)
                    run["logs/training/batch/acc"].append(acc)

                    # считаем градиенты тольок если трейн данные 
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # стасистика  
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # сохраняем не только лучшие веса , но и акураси 
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Loss: {best_loss:4f}')
    

    model.load_state_dict(best_model_wts)
    return model

In [9]:
model_ft = models.resnet50(pretrained=True) 
num_ftrs = model_ft.fc.in_features
# num_ftrs = model_ft.fc.in_features Для рес нета , но у эфинета другая архитектура 

model_ft.fc = nn.Linear(num_ftrs, len(class_names))
# cuda
model_ft = model_ft.to(device)
run["config/model"] = model_ft

criterion = nn.CrossEntropyLoss()
run["config/criterion"] = criterion

# оптимайзер  с лернинг рейт 
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
run["config/optimizer"] = optimizer_ft
run["parameters"] = model_ft.parameters()

# редактируем лернинг рейт каждые 7 шагов 
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)


In [10]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=20)
torch.save(model_ft,'resnet-bestloss.pth')
torch.save(model_ft.state_dict(), "weights-resnet-bestloss.pth")
run.stop()

Epoch 0/19
----------
train Loss: 1.2484 Acc: 0.5655
val Loss: 1.0244 Acc: 0.6464

Epoch 1/19
----------
train Loss: 0.8664 Acc: 0.6984
val Loss: 0.7922 Acc: 0.7331

Epoch 2/19
----------
train Loss: 0.7296 Acc: 0.7518
val Loss: 0.8113 Acc: 0.7415

Epoch 3/19
----------
train Loss: 0.6777 Acc: 0.7635
val Loss: 0.5479 Acc: 0.8098

Epoch 4/19
----------
train Loss: 0.5908 Acc: 0.7934
val Loss: 0.5387 Acc: 0.8319

Epoch 5/19
----------
train Loss: 0.5627 Acc: 0.8050
val Loss: 0.5834 Acc: 0.8227

Epoch 6/19
----------
train Loss: 0.5244 Acc: 0.8214
val Loss: 0.4900 Acc: 0.8227

Epoch 7/19
----------
train Loss: 0.3762 Acc: 0.8694
val Loss: 0.3454 Acc: 0.8800

Epoch 8/19
----------
train Loss: 0.3414 Acc: 0.8835
val Loss: 0.3291 Acc: 0.8809

Epoch 9/19
----------
train Loss: 0.3126 Acc: 0.8923
val Loss: 0.3314 Acc: 0.8827

Epoch 10/19
----------
train Loss: 0.2997 Acc: 0.8968
val Loss: 0.3291 Acc: 0.8855

Epoch 11/19
----------
train Loss: 0.2798 Acc: 0.9018
val Loss: 0.3240 Acc: 0.8910

Ep

In [11]:
run.stop()

In [6]:
run = neptune.init_run(
    project="neas1231/Neas1231",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJmY2VjMGIzOS01ZjI1LTQ1MTItODQxYi0zMjIyOWIwYWI0MzIifQ==",
)
run["config/data_dir"] = data_dir

model_conv = torchvision.models.resnet50(pretrained=True)
for param in model_conv.parameters():
    param.requires_grad = False
run["config/model"] = model_conv
    
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, len(class_names))

model_conv = model_conv.to(device)

criterion = nn.CrossEntropyLoss()
run["config/criterion"] = criterion

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_conv = optim.Adam(model_conv.fc.parameters(), lr=0.001)
run["config/optimizer"] = optimizer_conv
run["parameters"] = model_conv.parameters()

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

https://app.neptune.ai/neas1231/Neas1231/e/NEAS-29


In [7]:
model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler,
                       num_epochs=14)

Epoch 0/13
----------
train Loss: 0.8951 Acc: 0.6950
val Loss: 0.5522 Acc: 0.8089

Epoch 1/13
----------
train Loss: 0.6198 Acc: 0.7741
val Loss: 0.4959 Acc: 0.8264

Epoch 2/13
----------
train Loss: 0.5799 Acc: 0.7915
val Loss: 0.5361 Acc: 0.8006

Epoch 3/13
----------
train Loss: 0.5464 Acc: 0.8032
val Loss: 0.5105 Acc: 0.8190

Epoch 4/13
----------
train Loss: 0.5444 Acc: 0.8065
val Loss: 0.4538 Acc: 0.8366

Epoch 5/13
----------
train Loss: 0.5339 Acc: 0.8080
val Loss: 0.4517 Acc: 0.8375

Epoch 6/13
----------
train Loss: 0.5214 Acc: 0.8156
val Loss: 0.4181 Acc: 0.8504

Epoch 7/13
----------
train Loss: 0.4278 Acc: 0.8467
val Loss: 0.4048 Acc: 0.8523

Epoch 8/13
----------
train Loss: 0.4229 Acc: 0.8496
val Loss: 0.3936 Acc: 0.8578

Epoch 9/13
----------
train Loss: 0.4353 Acc: 0.8438
val Loss: 0.4006 Acc: 0.8569

Epoch 10/13
----------
train Loss: 0.4229 Acc: 0.8466
val Loss: 0.3927 Acc: 0.8523

Epoch 11/13
----------
train Loss: 0.4183 Acc: 0.8522
val Loss: 0.3901 Acc: 0.8467

Ep

In [15]:
torch.save(model_ft,'resnet-conv.pth')
torch.save(model_ft.state_dict(), "weights-resnet-conv.pth")

In [10]:
import gc

gc.collect()
torch.cuda.empty_cache()
torch.cuda.memory_summary(device=None, abbreviated=False)



In [12]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"