In [None]:
import os
import pandas as pd
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
from sklearn.metrics import accuracy_score, roc_auc_score
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import numpy as np
def load_csv(csv_file, img_dir):
    data = pd.read_csv(csv_file)
    img_paths = [os.path.join(img_dir, img) for img in data['id']]
    labels = data['target_people'].values
    return img_paths, labels

class CustomDataset(Dataset):
    def __init__(self, img_paths, labels, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        label = self.labels[idx]

        image = Image.open(img_path).convert("RGB")
        image = np.array(image)

        if self.transform:
            image = self.transform(image=image)['image']

        return image, label

def prepare_dataloader(csv_file, img_dir, transforms, batch_size=128, shuffle=True):
    img_paths, labels = load_csv(csv_file, img_dir)
    dataset = CustomDataset(img_paths, labels, transform=transforms)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return loader


  check_for_updates()


In [None]:
import torch
import torch.nn as nn
import timm
from torchvision import models as torchvision_models

def get_model(model_name, num_classes=2):
    if model_name in ['efficientnet_b0', 'mobilenet_v2', 'mobilenet_v3']:
        if model_name == 'efficientnet_b0':
            model = torchvision_models.efficientnet_b0(weights='DEFAULT')
            num_ftrs = model.classifier[1].in_features
            model.classifier[1] = nn.Linear(num_ftrs, num_classes)
        elif model_name == 'mobilenet_v2':
            model = torchvision_models.mobilenet_v2(weights='DEFAULT')
            num_ftrs = model.classifier[1].in_features
            model.classifier[1] = nn.Linear(num_ftrs, num_classes)
        elif model_name == 'mobilenet_v3':
            model = torchvision_models.mobilenet_v3_large(weights='DEFAULT')
            num_ftrs = model.classifier[3].in_features
            model.classifier[3] = nn.Linear(num_ftrs, num_classes)

    elif model_name in ['regnet', 'shufflenet_v2']:
        if model_name == 'regnet':
            model = timm.create_model('regnety_002', pretrained=True, num_classes=num_classes)
        elif model_name == 'shufflenet_v2':
            model = torchvision_models.shufflenet_v2_x1_0(weights='DEFAULT')
            num_ftrs = model.fc.in_features
            model.fc = nn.Linear(num_ftrs, num_classes)

    else:
        raise ValueError(f"Model '{model_name}' is not supported.")

    return model

In [None]:
train_transforms = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=25),
    A.RandomResizedCrop(height=224, width=224, scale=(0.8, 1.0)),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

valid_transforms = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

train_csv = 'train.csv'
valid_csv = 'valid.csv'
train_dir = 'train'
valid_dir = 'valid'

train_loader = prepare_dataloader(train_csv, train_dir, train_transforms, batch_size=128, shuffle=True)
valid_loader = prepare_dataloader(valid_csv, valid_dir, valid_transforms, batch_size=128, shuffle=False)


In [None]:
def load_model_weights(model_name, weights_path='efficientnet_b0'):
    model = get_model(model_name)
    model.load_state_dict(torch.load(weights_path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
    model.eval()
    return model

In [None]:
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm
from sklearn.metrics import accuracy_score, roc_auc_score

def continue_training(train_loader, valid_loader, criterion, optimizer = None, model_name=None, model=None, scheduler=None, epochs=5, learn_r=0.001, weights_path='efficientnet_b0', new_save_path='updated_model_weights.pth'):
    if not model:
        model = load_model_weights(model_name, weights_path)
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        model = model.to(device)

    if not optimizer:
        optimizer = torch.optim.Adam(model.parameters(), lr=learn_r)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_losses, valid_losses = [], []
    valid_accuracies, valid_roc_aucs = [], []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        loop = tqdm(enumerate(train_loader), total=len(train_loader), desc=f'Epoch {epoch+1}/{epochs}', leave=False)

        for i, (images, labels) in loop:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            loop.set_postfix(loss=running_loss/(i+1))
            if (running_loss/(i+1)) > 0.4 or loss > 0.4:
                print(f"Training stopped due to significant loss increase at Epoch {epoch+1}")
                return model
                break

        train_losses.append(running_loss / len(train_loader))

        model.eval()
        val_loss = 0.0
        all_valid_labels, all_valid_preds, all_valid_probs = [], [], []

        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, preds = torch.max(outputs, 1)
                probabilities = torch.softmax(outputs, dim=1)[:, 1]

                all_valid_labels.extend(labels.cpu().numpy())
                all_valid_preds.extend(preds.cpu().numpy())
                all_valid_probs.extend(probabilities.cpu().numpy())

        valid_losses.append(val_loss / len(valid_loader))
        valid_accuracy = accuracy_score(all_valid_labels, all_valid_preds)
        valid_roc_auc = roc_auc_score(all_valid_labels, all_valid_probs)

        valid_accuracies.append(valid_accuracy)
        valid_roc_aucs.append(valid_roc_auc)

        print(f'Epoch {epoch+1}/{epochs}, Validation Loss: {val_loss/len(valid_loader):.4f}, '
              f'Validation Accuracy: {valid_accuracy:.4f}, ROC AUC: {valid_roc_auc:.4f}')
        torch.save(model.state_dict(), f'resnet_50_weights_for_accuracy_{valid_accuracy}_valid_loss_{val_loss}')
        print(f"Updated model weights saved to resnet_50_weights_for_accuracy_{valid_accuracy}_valid_loss_{val_loss}")

        if valid_accuracy < 0.89:
            print(f"Training stopped due to significant loss increase at Epoch {epoch+1}")
            return model
            break
        if scheduler:
            scheduler.step()
    torch.save(model.state_dict(), new_save_path)
    print(f"Updated model weights saved to {new_save_path}")
    return model



In [None]:
from torch.utils.data import Subset

def get_subset_loader(loader, fraction=0.33):
    dataset_size = len(loader.dataset)
    subset_size = int(dataset_size * fraction)

    indices = np.random.choice(dataset_size, subset_size, replace=False)  # Выбираем случайные индексы для подвыборки
    subset = Subset(loader.dataset, indices)

    subset_loader = DataLoader(subset, batch_size=loader.batch_size, shuffle=True)
    return subset_loader

In [None]:
def get_pretrained_resnet50(num_classes=2):
    model = models.resnet50(weights='ResNet50_Weights.DEFAULT')
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)  # Выход на 2 класса
    return model


def train_model(model, train_loader, valid_loader, criterion, optimizer, epochs=10, scheduler=None, early_stopping_threshold=1.3):
    train_losses, valid_losses = [], []
    valid_accuracies = []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}', leave=False):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        train_loss = running_loss / len(train_loader)
        train_losses.append(train_loss)

        model.eval()
        val_loss, correct, total = 0.0, 0, 0

        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        valid_loss = val_loss / len(valid_loader)
        valid_losses.append(valid_loss)

        accuracy = correct / total
        valid_accuracies.append(accuracy)

        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Accuracy: {accuracy:.4f}')
        torch.save(model.state_dict(), f'resnet_50_weights_for_accuracy_{accuracy}_valid_loss_{valid_loss}')
        print(f"Updated model weights saved to resnet_50_weights_for_accuracy_{accuracy}_valid_loss_{valid_loss}")

        if epoch > 0 and valid_loss > valid_losses[-2]:
            print(f"Training stopped due to significant loss increase at Epoch {epoch+1}")
            return model, train_losses, valid_losses, valid_accuracies
            break

        if scheduler:
            scheduler.step()

    return model, train_losses, valid_losses, valid_accuracies




In [None]:
def load_resnet_80_percent(path=None):
    model = models.resnet50(pretrained=False)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)  # Выход на 2 класса
    if not path:
        model.load_state_dict(torch.load('resnet_50_weights_best', map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
    else:
        model.load_state_dict(torch.load(path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
    return model

In [None]:
#NEW_BATCH_SIZE=128
#train_loader = prepare_dataloader(train_csv, train_dir, train_transforms, batch_size=NEW_BATCH_SIZE, shuffle=True)
#valid_loader = prepare_dataloader(valid_csv, valid_dir, valid_transforms, batch_size=NEW_BATCH_SIZE, shuffle=False)

criterion = nn.CrossEntropyLoss()

for BATCH in [16, 32, 64, 128]:
    print(f'now BATCH is {BATCH}')
    FRACTION_OF_TRAIN_SET=0.45
    train_loader = prepare_dataloader(train_csv, train_dir, train_transforms, batch_size=BATCH, shuffle=True)
    train_loader_subset = get_subset_loader(train_loader, fraction=FRACTION_OF_TRAIN_SET)
    valid_loader = prepare_dataloader(valid_csv, valid_dir, valid_transforms, batch_size=BATCH, shuffle=False)
    for learning_rate in [0.01, 0.005, 0.001, 0.0005, 0.0001, 0.00005, 0.00001]:
            print(f'now BATCH is {BATCH} and lr is {learning_rate}')
            model = load_resnet_80_percent().to(device)
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)  # Опционально
            continue_training(train_loader=train_loader_subset,
                              valid_loader=valid_loader,
                              criterion=criterion,
                              optimizer = optimizer,
                              model_name=None,
                              model=model,
                              scheduler=scheduler,
                              epochs=1,
                              learn_r=learning_rate,
                              weights_path=None,
                              new_save_path=f'resnet_lr_{learning_rate}_batch{BATCH}.pth')

now BATCH is 16
now BATCH is 16 and lr is 0.05


                                                                                                                       

Epoch 1/1, Validation Loss: 5.3647, Validation Accuracy: 0.6009, ROC AUC: 0.5056
Updated model weights saved to resnet_lr_0.05_batch16.pth
now BATCH is 16 and lr is 0.01


                                                                                                                       

Epoch 1/1, Validation Loss: 1.0751, Validation Accuracy: 0.6029, ROC AUC: 0.4889
Updated model weights saved to resnet_lr_0.01_batch16.pth
now BATCH is 16 and lr is 0.005


                                                                                                                       

Epoch 1/1, Validation Loss: 0.6660, Validation Accuracy: 0.6029, ROC AUC: 0.5699
Updated model weights saved to resnet_lr_0.005_batch16.pth
now BATCH is 16 and lr is 0.001


                                                                                                                       

Epoch 1/1, Validation Loss: 0.5521, Validation Accuracy: 0.7318, ROC AUC: 0.7890
Updated model weights saved to resnet_lr_0.001_batch16.pth
now BATCH is 16 and lr is 0.0005


                                                                                                                       

Epoch 1/1, Validation Loss: 0.4605, Validation Accuracy: 0.8040, ROC AUC: 0.8594
Updated model weights saved to resnet_lr_0.0005_batch16.pth
now BATCH is 16 and lr is 0.0001


                                                                                                                       

Epoch 1/1, Validation Loss: 0.3596, Validation Accuracy: 0.8516, ROC AUC: 0.9297
Updated model weights saved to resnet_lr_0.0001_batch16.pth
now BATCH is 16 and lr is 5e-05


                                                                                                                       

Epoch 1/1, Validation Loss: 0.3700, Validation Accuracy: 0.8418, ROC AUC: 0.9267
Updated model weights saved to resnet_lr_5e-05_batch16.pth
now BATCH is 16 and lr is 1e-05


                                                                                                                       

Epoch 1/1, Validation Loss: 0.3884, Validation Accuracy: 0.8262, ROC AUC: 0.9137
Updated model weights saved to resnet_lr_1e-05_batch16.pth
now BATCH is 32
now BATCH is 32 and lr is 0.05


                                                                                                                       

KeyboardInterrupt: 

In [None]:
for size_x in [224, 128]:
    train_transforms = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Rotate(limit=25),
        A.RandomResizedCrop(height=size_x, width=size_x, scale=(0.9, 0.9)),
        A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ])
    for BATCH in [16, 32, 64, 128, 224]:
        train_loader = prepare_dataloader(train_csv, train_dir, train_transforms, batch_size=BATCH, shuffle=True)
        valid_loader = prepare_dataloader(valid_csv, valid_dir, valid_transforms, batch_size=BATCH, shuffle=False)

        for learning_rate in [0.00005, 0.00003, 0.00001, 0.000005, 0.000003, 0.000001]:
            print(f'lr is {learning_rate}, batch {BATCH}, im size is {size_x}x{size_x}')
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
            model = get_pretrained_resnet50(num_classes=2).to(device)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)  # Опционально
            model, train_losses, valid_losses, valid_accuracies = train_model(model, train_loader, valid_loader, criterion, optimizer, epochs=4, scheduler=scheduler)
            torch.save(model.state_dict(), f'resnet50_batch_{BATCH}_image_size{size_x}x{size_x}_lr_{learning_rate}_val_accur_{valid_accuracies}.pth')

lr is 0.01, batch 16, im size is 224x224


                                                                                                                       

Epoch 1/4, Train Loss: 0.7331, Valid Loss: 0.6928, Accuracy: 0.5970
Updated model weights saved to resnet_50_weights_for_accuracy_0.5970052083333334_valid_loss_0.6927940410872301


                                                                                                                       

Epoch 2/4, Train Loss: 0.6722, Valid Loss: 0.6690, Accuracy: 0.6029
Updated model weights saved to resnet_50_weights_for_accuracy_0.6028645833333334_valid_loss_0.669025640313824


                                                                                                                       

Epoch 3/4, Train Loss: 0.6654, Valid Loss: 0.6716, Accuracy: 0.6003
Updated model weights saved to resnet_50_weights_for_accuracy_0.6002604166666666_valid_loss_0.6715954405566057


                                                                                                                       

Epoch 4/4, Train Loss: 0.6697, Valid Loss: 0.6926, Accuracy: 0.5970
Updated model weights saved to resnet_50_weights_for_accuracy_0.5970052083333334_valid_loss_0.6925956370929877
lr is 0.005, batch 16, im size is 224x224


                                                                                                                       

Epoch 1/4, Train Loss: 0.6775, Valid Loss: 0.8958, Accuracy: 0.5573
Updated model weights saved to resnet_50_weights_for_accuracy_0.5572916666666666_valid_loss_0.8958195165420572


                                                                                                                       

Epoch 2/4, Train Loss: 0.6710, Valid Loss: 0.6929, Accuracy: 0.5853
Updated model weights saved to resnet_50_weights_for_accuracy_0.5852864583333334_valid_loss_0.6929230087747177


                                                                                                                       

Epoch 3/4, Train Loss: 0.6674, Valid Loss: 0.6786, Accuracy: 0.5905
Updated model weights saved to resnet_50_weights_for_accuracy_0.5904947916666666_valid_loss_0.6786416471004486


                                                                                                                       

Epoch 4/4, Train Loss: 0.6432, Valid Loss: 0.6418, Accuracy: 0.6204
Updated model weights saved to resnet_50_weights_for_accuracy_0.6204427083333334_valid_loss_0.6418346123148998
lr is 0.001, batch 16, im size is 224x224


                                                                                                                       

Epoch 1/4, Train Loss: 0.5475, Valid Loss: 0.6921, Accuracy: 0.6888
Updated model weights saved to resnet_50_weights_for_accuracy_0.6888020833333334_valid_loss_0.6920803496614099


                                                                                                                       

Epoch 2/4, Train Loss: 0.4960, Valid Loss: 0.6803, Accuracy: 0.6719
Updated model weights saved to resnet_50_weights_for_accuracy_0.671875_valid_loss_0.6803151126950979


                                                                                                                       

Epoch 3/4, Train Loss: 0.4760, Valid Loss: 0.4772, Accuracy: 0.7734
Updated model weights saved to resnet_50_weights_for_accuracy_0.7734375_valid_loss_0.4771711028491457


                                                                                                                       

Epoch 4/4, Train Loss: 0.4658, Valid Loss: 0.5085, Accuracy: 0.7552
Updated model weights saved to resnet_50_weights_for_accuracy_0.7552083333333334_valid_loss_0.5084767742082477
lr is 0.003, batch 16, im size is 224x224


                                                                                                                       

Epoch 1/4, Train Loss: 0.6587, Valid Loss: 0.6658, Accuracy: 0.6263
Updated model weights saved to resnet_50_weights_for_accuracy_0.6263020833333334_valid_loss_0.6657711928710341


                                                                                                                       

Epoch 2/4, Train Loss: 0.6120, Valid Loss: 0.6344, Accuracy: 0.6530
Updated model weights saved to resnet_50_weights_for_accuracy_0.6529947916666666_valid_loss_0.6344185822332898


                                                                                                                       

Epoch 3/4, Train Loss: 0.5943, Valid Loss: 0.6072, Accuracy: 0.6673
Updated model weights saved to resnet_50_weights_for_accuracy_0.6673177083333334_valid_loss_0.6072102477774024


                                                                                                                       

Epoch 4/4, Train Loss: 0.5900, Valid Loss: 0.5962, Accuracy: 0.6738
Updated model weights saved to resnet_50_weights_for_accuracy_0.673828125_valid_loss_0.5962317467977604
lr is 0.0005, batch 16, im size is 224x224


                                                                                                                       

Epoch 1/4, Train Loss: 0.4411, Valid Loss: 0.4461, Accuracy: 0.8203
Updated model weights saved to resnet_50_weights_for_accuracy_0.8203125_valid_loss_0.44610250159166753


                                                                                                                       

Epoch 2/4, Train Loss: 0.4137, Valid Loss: 0.5070, Accuracy: 0.7682
Updated model weights saved to resnet_50_weights_for_accuracy_0.7682291666666666_valid_loss_0.5069823008961976


                                                                                                                       

Epoch 3/4, Train Loss: 0.3991, Valid Loss: 0.3552, Accuracy: 0.8457
Updated model weights saved to resnet_50_weights_for_accuracy_0.845703125_valid_loss_0.35516285492728156


                                                                                                                       

Epoch 4/4, Train Loss: 0.3881, Valid Loss: 0.4973, Accuracy: 0.7936
Updated model weights saved to resnet_50_weights_for_accuracy_0.7936197916666666_valid_loss_0.4973223175232609
Training stopped due to significant loss increase at Epoch 4
lr is 0.0003, batch 16, im size is 224x224


                                                                                                                       

Epoch 1/4, Train Loss: 0.3952, Valid Loss: 0.3447, Accuracy: 0.8607
Updated model weights saved to resnet_50_weights_for_accuracy_0.8606770833333334_valid_loss_0.3447067642118782


                                                                                                                       

Epoch 2/4, Train Loss: 0.3404, Valid Loss: 0.3887, Accuracy: 0.8346
Updated model weights saved to resnet_50_weights_for_accuracy_0.8346354166666666_valid_loss_0.3886822290563335


                                                                                                                       

Epoch 3/4, Train Loss: 0.3253, Valid Loss: 0.5234, Accuracy: 0.8060
Updated model weights saved to resnet_50_weights_for_accuracy_0.8059895833333334_valid_loss_0.5234181786266466
Training stopped due to significant loss increase at Epoch 3
lr is 0.0001, batch 16, im size is 224x224


                                                                                                                       

Epoch 1/4, Train Loss: 0.3400, Valid Loss: 0.3319, Accuracy: 0.8763
Updated model weights saved to resnet_50_weights_for_accuracy_0.8763020833333334_valid_loss_0.3318950613029301


                                                                                                                       

Epoch 2/4, Train Loss: 0.2625, Valid Loss: 0.2868, Accuracy: 0.8945
Updated model weights saved to resnet_50_weights_for_accuracy_0.89453125_valid_loss_0.28675911695851636


                                                                                                                       

Epoch 3/4, Train Loss: 0.2257, Valid Loss: 0.3000, Accuracy: 0.8926
Updated model weights saved to resnet_50_weights_for_accuracy_0.892578125_valid_loss_0.2999666694086045


                                                                                                                       

Epoch 4/4, Train Loss: 0.2056, Valid Loss: 0.4017, Accuracy: 0.8542
Updated model weights saved to resnet_50_weights_for_accuracy_0.8541666666666666_valid_loss_0.40174557198770344
Training stopped due to significant loss increase at Epoch 4
lr is 5e-05, batch 16, im size is 224x224


                                                                                                                       

Epoch 1/4, Train Loss: 0.3399, Valid Loss: 0.2754, Accuracy: 0.8906
Updated model weights saved to resnet_50_weights_for_accuracy_0.890625_valid_loss_0.27541186241433024


                                                                                                                       

Epoch 2/4, Train Loss: 0.2513, Valid Loss: 0.3088, Accuracy: 0.8796
Updated model weights saved to resnet_50_weights_for_accuracy_0.8795572916666666_valid_loss_0.3088111055549234


                                                                                                                       

Epoch 3/4, Train Loss: 0.2106, Valid Loss: 0.3207, Accuracy: 0.8913
Updated model weights saved to resnet_50_weights_for_accuracy_0.8912760416666666_valid_loss_0.3207009731559083


                                                                                                                       

Epoch 4/4, Train Loss: 0.1816, Valid Loss: 0.3532, Accuracy: 0.8874
Updated model weights saved to resnet_50_weights_for_accuracy_0.8873697916666666_valid_loss_0.35324579566561926
lr is 3e-05, batch 16, im size is 224x224


                                                                                                                       

Epoch 1/4, Train Loss: 0.3734, Valid Loss: 0.3077, Accuracy: 0.8783
Updated model weights saved to resnet_50_weights_for_accuracy_0.8782552083333334_valid_loss_0.3076865029676507


                                                                                                                       

Epoch 2/4, Train Loss: 0.2596, Valid Loss: 0.3354, Accuracy: 0.8750
Updated model weights saved to resnet_50_weights_for_accuracy_0.875_valid_loss_0.33537155696346116


                                                                                                                       

KeyboardInterrupt: 

In [None]:
#NEW_BATCH_SIZE=128
#train_loader = prepare_dataloader(train_csv, train_dir, train_transforms, batch_size=NEW_BATCH_SIZE, shuffle=True)
#valid_loader = prepare_dataloader(valid_csv, valid_dir, valid_transforms, batch_size=NEW_BATCH_SIZE, shuffle=False)

criterion = nn.CrossEntropyLoss()

for BATCH in [16, 32, 64]:
    print(f'now BATCH is {BATCH}')
    FRACTION_OF_TRAIN_SET=0.45
    train_loader = prepare_dataloader(train_csv, train_dir, train_transforms, batch_size=BATCH, shuffle=True)
    train_loader_subset = get_subset_loader(train_loader, fraction=FRACTION_OF_TRAIN_SET)
    valid_loader = prepare_dataloader(valid_csv, valid_dir, valid_transforms, batch_size=BATCH, shuffle=False)
    for learning_rate in [0.00005, 0.00003, 0.00001, 0.000005, 0.000003, 0.000001, 0.0000005, 0.0000003, 0.0000001]:
            print(f'now BATCH is {BATCH} and lr is {learning_rate}')
            model = load_resnet_80_percent().to(device)
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)  # Опционально
            continue_training(train_loader=train_loader_subset,
                              valid_loader=valid_loader,
                              criterion=criterion,
                              optimizer = optimizer,
                              model_name=None,
                              model=model,
                              scheduler=scheduler,
                              epochs=1,
                              learn_r=learning_rate,
                              weights_path=None,
                              new_save_path=f'resnet_lr_{learning_rate}_batch{BATCH}.pth')

In [None]:
criterion = nn.CrossEntropyLoss()

for BATCH in [64, 32, 16]:
    print(f'now BATCH is {BATCH}')
    FRACTION_OF_TRAIN_SET=0.45
    train_loader = prepare_dataloader(train_csv, train_dir, train_transforms, batch_size=BATCH, shuffle=True)
    train_loader_subset = get_subset_loader(train_loader, fraction=FRACTION_OF_TRAIN_SET)
    valid_loader = prepare_dataloader(valid_csv, valid_dir, valid_transforms, batch_size=BATCH, shuffle=False)
    for learning_rate in reversed([0.05, 0.03, 0.01, 0.005, 0.003, 0.001, 0.0005, 0.0003, 0.0001, 0.00005, 0.00003, 0.00001, 0.000005, 0.000003, 0.000001, 0.0000005, 0.0000003, 0.0000001]):
            print(f'now BATCH is {BATCH} and lr is {learning_rate}')
            model = load_resnet_80_percent().to(device)
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)  # Опционально
            continue_training(train_loader=train_loader_subset,
                              valid_loader=valid_loader,
                              criterion=criterion,
                              optimizer = optimizer,
                              model_name=None,
                              model=model,
                              scheduler=scheduler,
                              epochs=1,
                              learn_r=learning_rate,
                              weights_path=None,
                              new_save_path=f'resnet_lr_{learning_rate}_batch{BATCH}.pth')

now BATCH is 64
now BATCH is 64 and lr is 1e-07


                                                                                                                       

Epoch 1/1, Validation Loss: 0.2736, Validation Accuracy: 0.8939, ROC AUC: 0.9552
Updated model weights saved to resnet_50_weights_for_accuracy_0.8938802083333334_valid_loss_6.566490292549133
Updated model weights saved to resnet_lr_1e-07_batch64.pth
now BATCH is 64 and lr is 3e-07


                                                                                                                       

Epoch 1/1, Validation Loss: 0.2788, Validation Accuracy: 0.8965, ROC AUC: 0.9546
Updated model weights saved to resnet_50_weights_for_accuracy_0.896484375_valid_loss_6.692318812012672
Updated model weights saved to resnet_lr_3e-07_batch64.pth
now BATCH is 64 and lr is 5e-07


                                                                                                                       

Epoch 1/1, Validation Loss: 0.2729, Validation Accuracy: 0.8971, ROC AUC: 0.9555
Updated model weights saved to resnet_50_weights_for_accuracy_0.8971354166666666_valid_loss_6.550626754760742
Updated model weights saved to resnet_lr_5e-07_batch64.pth
now BATCH is 64 and lr is 1e-06


                                                                                                                       

Epoch 1/1, Validation Loss: 0.2719, Validation Accuracy: 0.8952, ROC AUC: 0.9558
Updated model weights saved to resnet_50_weights_for_accuracy_0.8951822916666666_valid_loss_6.525370255112648
Updated model weights saved to resnet_lr_1e-06_batch64.pth
now BATCH is 64 and lr is 3e-06


                                                                                                                       

Epoch 1/1, Validation Loss: 0.2766, Validation Accuracy: 0.8952, ROC AUC: 0.9554
Updated model weights saved to resnet_50_weights_for_accuracy_0.8951822916666666_valid_loss_6.639599561691284
Updated model weights saved to resnet_lr_3e-06_batch64.pth
now BATCH is 64 and lr is 5e-06


                                                                                                                       

KeyboardInterrupt: 

In [None]:
def scheduler(optimizer, now_epoch, initial_lr, epochs):
    end_lr_rate = 0.01  # end_lr = initial_lr * end_lr_rate
    rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate
    new_lr = rate * initial_lr

    for param_group in optimizer.param_groups:
        param_group['lr'] = new_lr

    return new_lr

In [None]:
train_transforms = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Rotate(limit=25),
        A.RandomResizedCrop(height=224, width=224, scale=(0.9, 0.9)),
        A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
])

In [None]:
import torch
import torch.nn as nn
from sklearn.metrics import roc_auc_score, accuracy_score
from torchvision import models
def load_resnet_best_percent(path=None):
    model = models.resnet50(weights=None)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)
    if not path:
        model.load_state_dict(torch.load('resnet_50_w_897_perc', map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
    else:
        model.load_state_dict(torch.load(path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
    return model

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = load_resnet_best_percent()
model.eval()
all_probs = []
all_labels = []

with torch.no_grad():
    for inputs, labels in valid_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)

        probabilities = nn.Softmax(dim=1)(outputs.clone().detach())

        probs_class_1 = probabilities[:, 1].cpu().numpy()
        all_probs.extend(probs_class_1)
        all_labels.extend(labels.cpu().numpy())

        print(f'Batch size: {inputs.size(0)}, Labels size: {labels.size(0)}, Probabilities size: {probs_class_1.size}')

all_labels = np.array(all_labels)
all_probs = np.array(all_probs)

valid_accuracy = accuracy_score(all_labels, (all_probs > 0.5).astype(int))
roc_auc = roc_auc_score(all_labels, all_probs)

print("Validation Accuracy:", valid_accuracy)
print("ROC AUC Score:", roc_auc)
print("Sample Probabilities for Class 1:", all_probs[:5])


Batch size: 128, Labels size: 128, Probabilities size: 128
Batch size: 128, Labels size: 128, Probabilities size: 128
Batch size: 128, Labels size: 128, Probabilities size: 128
Batch size: 128, Labels size: 128, Probabilities size: 128
Batch size: 128, Labels size: 128, Probabilities size: 128
Batch size: 128, Labels size: 128, Probabilities size: 128
Batch size: 128, Labels size: 128, Probabilities size: 128
Batch size: 128, Labels size: 128, Probabilities size: 128
Batch size: 128, Labels size: 128, Probabilities size: 128
Batch size: 128, Labels size: 128, Probabilities size: 128
Batch size: 128, Labels size: 128, Probabilities size: 128
Batch size: 128, Labels size: 128, Probabilities size: 128
Validation Accuracy: 0.8971354166666666
ROC AUC Score: 0.9554721523917431
Sample Probabilities for Class 1: [0.95841193 0.01654959 0.02925532 0.10077992 0.0406514 ]


In [None]:
for i in range(0, int(len(all_probs)*0.1)):
    print(f'valid{i+1}.jpg, prob: {all_probs[i]}')

valid1.jpg, prob: 0.9584119319915771
valid2.jpg, prob: 0.016549590975046158
valid3.jpg, prob: 0.02925531566143036
valid4.jpg, prob: 0.10077992081642151
valid5.jpg, prob: 0.04065139591693878
valid6.jpg, prob: 0.8007073998451233
valid7.jpg, prob: 0.049834027886390686
valid8.jpg, prob: 0.05276158079504967
valid9.jpg, prob: 0.0005217966972850263
valid10.jpg, prob: 0.5282792448997498
valid11.jpg, prob: 0.9562975764274597
valid12.jpg, prob: 0.9708375334739685
valid13.jpg, prob: 0.016439098864793777
valid14.jpg, prob: 0.9206658005714417
valid15.jpg, prob: 0.9996880292892456
valid16.jpg, prob: 0.21216605603694916
valid17.jpg, prob: 0.999099612236023
valid18.jpg, prob: 0.0017090424662455916
valid19.jpg, prob: 0.5621709227561951
valid20.jpg, prob: 0.9995854496955872
valid21.jpg, prob: 0.9304478764533997
valid22.jpg, prob: 0.1816292554140091
valid23.jpg, prob: 0.002914269920438528
valid24.jpg, prob: 0.01911434344947338
valid25.jpg, prob: 0.9950626492500305
valid26.jpg, prob: 0.56449294090271
vali

In [None]:
test_transforms = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])
print('test data transforms loaded')
class TestDataset(Dataset):
    def __init__(self, image_dir, transforms=None):
        self.image_dir = image_dir
        self.image_files = sorted(os.listdir(image_dir))
        self.transforms = transforms

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_dir, self.image_files[idx])
        image = Image.open(image_path).convert("RGB")
        image = np.array(image)

        if self.transforms:
            image = self.transforms(image=image)["image"]

        return image, self.image_files[idx]

In [None]:
model = load_resnet_best_percent()

In [None]:
test_transforms = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])
class TestDataset(Dataset):
    def __init__(self, image_dir, transforms=None):
        self.image_dir = image_dir
        self.image_files = sorted(os.listdir(image_dir))
        self.transforms = transforms

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_dir, self.image_files[idx])
        image = Image.open(image_path).convert("RGB")
        image = np.array(image)

        if self.transforms:
            image = self.transforms(image=image)["image"]

        return image, self.image_files[idx]
test_dir = 'test'
test_dataset = TestDataset(test_dir, transforms=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = load_resnet_best_percent()
model.eval()
all_probs = []
all_labels = []

predictions = []
image_names = []

with torch.no_grad():
    for images, filenames in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)

        predictions.extend(preds.cpu().numpy())
        image_names.extend(filenames)

        probabilities = nn.Softmax(dim=1)(outputs.clone().detach())

        probs_class_1 = probabilities[:, 1].cpu().numpy()
        all_probs.extend(probs_class_1)

print('predictions made')


all_probs = np.array(all_probs)
for i in range(0, int(len(all_probs)*0.1)):
    print(f'test{i+1}.jpg, prob: {all_probs[i]}')

predictions made
test1.jpg, prob: 0.017498066648840904
test2.jpg, prob: 0.0015255126636475325
test3.jpg, prob: 0.08327808976173401
test4.jpg, prob: 0.49012696743011475
test5.jpg, prob: 0.00018353310588281602
test6.jpg, prob: 0.0007997814100235701
test7.jpg, prob: 0.5294589996337891
test8.jpg, prob: 0.9882142543792725
test9.jpg, prob: 0.012590080499649048
test10.jpg, prob: 0.019112441688776016
test11.jpg, prob: 0.8994684219360352
test12.jpg, prob: 0.06712892651557922
test13.jpg, prob: 0.0010266819735988975
test14.jpg, prob: 0.05527040362358093
test15.jpg, prob: 0.17593280971050262
test16.jpg, prob: 0.07931371033191681
test17.jpg, prob: 0.996394693851471
test18.jpg, prob: 0.9937232136726379
test19.jpg, prob: 0.8809841871261597
test20.jpg, prob: 0.2748587429523468
test21.jpg, prob: 0.002357499673962593
test22.jpg, prob: 0.9579718708992004
test23.jpg, prob: 0.1676693856716156
test24.jpg, prob: 0.7842541337013245
test25.jpg, prob: 0.5094687342643738
test26.jpg, prob: 0.039665255695581436
te

In [None]:
import pandas as pd

submission_df = pd.read_csv('sample_submission.csv')

if len(submission_df) == len(all_probs):
    submission_df['target_people'] = all_probs

    submission_df.to_csv('updated_sample_submission.csv', index=False)
    print("Значения записаны в столбец target_people и сохранены в updated_sample_submission.csv.")
else:
    print("Размерности не совпадают. Проверьте данные.")


Значения записаны в столбец target_people и сохранены в updated_sample_submission.csv.
