In [1]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms, datasets, models

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [2]:
class ResNetUNet(nn.Module):
    def __init__(self, num_classes):
        def block(in_channels, out_channels, kernel, padding):
            return nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
                nn.ReLU(inplace=True),
            )
        super().__init__()
        
        self.base_model = models.resnet18(pretrained=True)
        
        self.base_layers = list(self.base_model.children())                
        
        self.layer0 = nn.Sequential(*self.base_layers[:3])
        self.layer0_1x1 = block(64, 64, 1, 0)
        self.layer1 = nn.Sequential(*self.base_layers[3:5])      
        self.layer1_1x1 = block(64, 64, 1, 0)       
        self.layer2 = self.base_layers[5]        
        self.layer2_1x1 = block(128, 128, 1, 0)  
        self.layer3 = self.base_layers[6] 
        self.layer3_1x1 = block(256, 256, 1, 0)  
        self.layer4 = self.base_layers[7] 
        self.layer4_1x1 = block(512, 512, 1, 0)  
        
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        
        self.conv_up3 = block(256 + 512, 512, 3, 1)
        self.conv_up2 = block(128 + 512, 256, 3, 1)
        self.conv_up1 = block(64 + 256, 256, 3, 1)
        self.conv_up0 = block(64 + 256, 128, 3, 1)
        
        self.conv_original_size0 = block(3, 64, 3, 1)
        self.conv_original_size1 = block(64, 64, 3, 1)
        self.conv_original_size2 = block(64 + 128, 64, 3, 1)
        
        self.conv_last = nn.Conv2d(64, num_classes, 1)
        
    def forward(self, input):
        x_original = self.conv_original_size0(input)
        x_original = self.conv_original_size1(x_original)
        
        layer0 = self.layer0(input)            
        layer1 = self.layer1(layer0)
        layer2 = self.layer2(layer1)
        layer3 = self.layer3(layer2)        
        layer4 = self.layer4(layer3)
        
        layer4 = self.layer4_1x1(layer4)
        x = self.upsample(layer4)
        layer3 = self.layer3_1x1(layer3)
        x = torch.cat([x, layer3], dim=1)
        x = self.conv_up3(x)
 
        x = self.upsample(x)
        layer2 = self.layer2_1x1(layer2)
        x = torch.cat([x, layer2], dim=1)
        x = self.conv_up2(x)

        x = self.upsample(x)
        layer1 = self.layer1_1x1(layer1)
        x = torch.cat([x, layer1], dim=1)
        x = self.conv_up1(x)

        x = self.upsample(x)
        layer0 = self.layer0_1x1(layer0)
        x = torch.cat([x, layer0], dim=1)
        x = self.conv_up0(x)
        
        x = self.upsample(x)
        x = torch.cat([x, x_original], dim=1)
        x = self.conv_original_size2(x)        
        
        out = self.conv_last(x)        
        
        return out
    

def validate_model(model, validation_loader, loss_function, device):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in validation_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = loss_function(outputs, targets)
            val_loss += loss.item()

    return val_loss / len(validation_loader)


def train_model(model, training_loader, validation_loader, num_epochs, loss_function, device):
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=5e-4)
    train_losses = []
    val_losses = []
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for batch_idx, (inputs, targets) in enumerate(training_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            print(f'Epoch {epoch+1}/{num_epochs} - Batch {batch_idx+1}/{len(training_loader)} - Loss: {loss.item()}')

        train_losses.append(train_loss / len(training_loader))
        val_loss = validate_model(model, validation_loader, loss_function, device)
        val_losses.append(val_loss)
        print(f'Epoch {epoch+1} - Train Loss: {train_loss / len(training_loader)}, Val Loss: {val_loss}')

    return train_losses, val_losses

In [5]:
import pandas as pd

gpu_number = 4
device = torch.device(device=f'cuda:{gpu_number}')

test_path = "/home/silevichar/nastya/dataset/Test/Images/"
train_path = "/home/silevichar/nastya/dataset/Train/Images/"

markup = pd.read_csv('/home/silevichar/nastya/dataset/Train/markup.csv', encoding="utf-16", header=None)
train_markup, val_markup = train_test_split(markup, test_size=0.2, random_state=42)

RGB_mean = [0.485, 0.456, 0.406]
RGB_std  = [0.229, 0.224, 0.225]


def get_training_transforms(image_size=224):
    training_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((image_size, image_size)),
        transforms.Normalize(mean=RGB_mean, std=RGB_std)
    ])
    return training_transforms

# Function to get transformations for test data
def get_testing_transforms(image_size=224):
    testing_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((image_size, image_size)),
        transforms.Normalize(mean=RGB_mean, std=RGB_std)
    ])
    return testing_transforms

class BarcodeDataset(Dataset):
    def __init__(self, annotations, image_directory, image_size=224, mode='train'):
        self.annotations = annotations
        self.image_size = image_size
        self.image_directory = image_directory

        if mode == 'train':
            self.transform = get_training_transforms(image_size)
        elif mode == 'test':
            self.transform = get_testing_transforms(image_size)

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        annotation = self.annotations.iloc[index]
        coordinates = np.intp(annotation.values[2:10].reshape(4, 2))
        image = cv2.imread(self.image_directory + annotation[0])
        
        mask = np.zeros(image.shape[:2])
        mask = cv2.drawContours(mask, [coordinates], -1, (1), -1)
        mask = cv2.resize(mask, (self.image_size, self.image_size))
        
        mask = torch.tensor(mask)[None, :, :]
        image = self.transform(image)

        return image, mask

# Load training and validation datasets
training_dataset = BarcodeDataset(train_markup, train_path, mode='train')
validation_dataset = BarcodeDataset(val_markup, train_path, mode='test')

# Create data loaders for training and validation datasets
training_loader = DataLoader(training_dataset, batch_size=32, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=64, shuffle=False)


In [6]:
criterion = nn.BCEWithLogitsLoss(reduction='sum')
model = ResNetUNet(num_classes=1).to(device)

model.load_state_dict(torch.load('model_0.pth'))
model.eval()

train_model(model, training_loader, validation_loader, num_epochs=6, loss_function=criterion, device=device)



Epoch 1/6 - Batch 1/205 - Loss: 51228.96456322415
Epoch 1/6 - Batch 2/205 - Loss: 64156.56449825421
Epoch 1/6 - Batch 3/205 - Loss: 57307.717278671305


KeyboardInterrupt: 

In [7]:
model = ResNetUNet(num_classes=1).to(device)

model.load_state_dict(torch.load('model_5.pth'))
model.eval()

ResNetUNet(
  (base_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tra

In [9]:
my_answer = pd.read_csv('course_intro_ocr/task3/answer.csv', encoding="utf-16", header=None)

for i, img_name in tqdm(enumerate(my_answer[0])):
    my_answer.at[i, 1] = int('1' * 13)
    my_answer.at[i, 10] = int('1' * 95)

    img_path = os.path.join(test_path, img_name)

    image = cv2.imread(img_path)

    test_transforms = get_testing_transforms()
    tensor_img = test_transforms(image)[None]
    w, h, _ = image.shape
    model.eval()
    with torch.no_grad():
        tensor_img = tensor_img.to(device)
        output_map = torch.sigmoid(model(tensor_img))
        output_map = torch.nn.functional.interpolate(output_map, size=(w, h)).cpu()
    output_map = output_map[0].permute(1, 2, 0).numpy()
    output_map = np.array((output_map > 0.4) * 255, np.uint8)  

    cnts, hierarchy = cv2.findContours(output_map.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[-2:]
    c = sorted(cnts, key = cv2.contourArea, reverse = True)[0]
    
    rect = cv2.minAreaRect(c)

    coords = np.intp(cv2.boxPoints(rect)).reshape(-1)
    my_answer.iloc[[i], 2:10] = coords

my_answer[1] = 0
my_answer[10] = 0

my_answer.to_csv('answer.csv', encoding='utf-16', header=None, index=False)

0it [00:00, ?it/s]

100it [00:09, 10.30it/s]
