Формирование pipeline'а с проверкой каждой заявки

# Загрузка библиотек и заморозка seed'а

In [1]:
import os
import shutil
import torch
import random
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
from PIL import Image
from torchvision import transforms
from pathlib import Path
import matplotlib.pyplot as plt
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset
from IPython.display import clear_output

In [2]:
RANDOM_STATE = 42

seed = RANDOM_STATE
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
# When running on the CuDNN backend, two further options must be set
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# Set a fixed value for the hash seed
os.environ["PYTHONHASHSEED"] = str(seed)

# Анализ заявок по ЗНО

In [3]:
class CustomDataset(Dataset):
    def __init__(self, data_folder, transform=None, filename=False):
        self.data_folder = data_folder
        self.transform = transform
        self.filename = filename
        self.data = [self.data_folder / x for x in os.listdir(self.data_folder)]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        name = self.data[idx]
        img = Image.open(name).convert('RGB')
        if self.transform:
            img = self.transform(img)
        if self.filename:
            return img, name.__str__().split('\\')[-1]
        return img

In [4]:
test_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

In [5]:
NUM_WORKERS = 0
BATCH_SIZE = 64

data_folder = Path('data')
photo_folder = Path('photos')
models_folder = Path('models')

In [6]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [7]:
model_terminal_presence = torch.jit.load(models_folder / 'model_terminal.pt', map_location=device)
model_terminal_presence = model_terminal_presence.to(device)
model_receipt_presence = torch.jit.load(models_folder / 'model_receipt.pt', map_location=device)
model_receipt_presence = model_receipt_presence.to(device)
model_terminal_quality = torch.jit.load(models_folder / 'model_terminal_quality.pt')
model_terminal_quality = model_terminal_quality.to(device)
model_terminal_presence.eval()
model_receipt_presence.eval()
model_terminal_quality.eval()
clear_output()

In [8]:
df_results = pd.DataFrame()
for folder in tqdm(os.listdir(data_folder)):
    df_zno = pd.DataFrame()
    train_dataset = CustomDataset(
        data_folder / folder / photo_folder, 
        transform=test_transforms, filename=True
    )
    train_dataloader = DataLoader(
        train_dataset, batch_size=BATCH_SIZE, 
        shuffle=False, num_workers=NUM_WORKERS
    )
    all_preds_terminal_presence, all_preds_receipt_presence, all_preds_terminal_damaged, all_labels = [], [], [], []
    for imgs, labels in train_dataloader:
        all_labels.append(labels)
        # receipt/terminal presence
        predicts_terminal_presence = model_terminal_presence(imgs).argmax(dim=1)
        predicts_receipt_presence = model_receipt_presence(imgs).argmax(dim=1)
        all_preds_terminal_presence.append(predicts_terminal_presence)
        all_preds_receipt_presence.append(predicts_receipt_presence)
        # terminal damaged
        indices_terminal = torch.nonzero(predicts_terminal_presence)
        predicts_terminal_quality = model_terminal_quality(imgs[indices_terminal].squeeze()).argmax(dim=1)
        predicts_terminal_damaged = torch.zeros(size=predicts_terminal_presence.shape)
        predicts_terminal_damaged[indices_terminal.squeeze()] = torch.Tensor.float(predicts_terminal_quality)
        all_preds_terminal_damaged.append(predicts_terminal_damaged)
        del (
            predicts_terminal_presence,
            predicts_receipt_presence,
            indices_terminal,
            predicts_terminal_damaged,
            imgs
        )
    df_zno['photo_name'] = np.hstack(labels)
    all_preds_terminal_presence = [torch.Tensor.cpu(x).numpy() for x in all_preds_terminal_presence]
    all_preds_receipt_presence = [torch.Tensor.cpu(x).numpy() for x in all_preds_receipt_presence]
    all_preds_terminal_damaged = [torch.Tensor.cpu(x).numpy() for x in all_preds_terminal_damaged]
    df_zno['zno'] = folder
    df_zno['receipt_presence'] = np.hstack(all_preds_receipt_presence)
    df_zno['terminal_presence'] = np.hstack(all_preds_terminal_presence)
    df_zno['terminal_damaged'] = np.hstack(all_preds_terminal_damaged)
    df_results = pd.concat((df_results, df_zno), axis=0)
    del (
        all_preds_terminal_presence, all_preds_receipt_presence, labels, df_zno 
    )

100%|██████████| 3/3 [00:05<00:00,  1.69s/it]


In [9]:
df_results

Unnamed: 0,photo_name,zno,receipt_presence,terminal_presence,terminal_damaged
0,ЗНО0301280225_TS903105961-8b7fb617-41b5-4940-a...,zno_1,1,0,0.0
1,ЗНО0301286912_TS903106654-9ffa2ca1-7ade-4c95-8...,zno_1,0,1,1.0
2,ЗНО0301374435_TS903124534-7f6a5d7f-636c-45a4-b...,zno_1,0,0,0.0
3,ЗНО0301507612_TS903151230-0f864947-4610-43c0-a...,zno_1,1,0,0.0
4,ЗНО0301515974_TS903153162-f6ea7159-f85c-4b6c-8...,zno_1,0,1,0.0
5,ЗНО0301516350_TS903153290-bb9ffefa-8153-479f-b...,zno_1,1,0,0.0
6,ЗНО0301517687_TS903153587-4b266687-641d-4533-9...,zno_1,1,0,0.0
7,ЗНО0301599875_TS903174076-617d2ffc-459d-4044-9...,zno_1,1,0,0.0
8,╨Ч╨Э╨Ю0301636887_TS903180274-11292520-1de5-4b1...,zno_1,0,1,0.0
9,╨Ч╨Э╨Ю0301669289_TS903187277-fb757364-c56d-4f3...,zno_1,0,1,0.0
