In [2]:
import os
import copy
import cv2
import random
import shutil
import torch
import torchvision
import IPython
import json

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import albumentations as A

from albumentations.pytorch import ToTensorV2
from PIL import Image
from plotly.subplots import make_subplots
from tqdm.notebook import tqdm
from torchvision.models import resnet50, ResNet50_Weights
from torchmetrics.classification import BinaryAccuracy, BinaryConfusionMatrix, BinaryPrecision, BinaryRecall, BinaryF1Score
from IPython.display import clear_output
from sklearn.model_selection import train_test_split
from tqdm.notebook import trange
from math import ceil
from glob import glob

%matplotlib inline

In [3]:
device = torch.device('cuda:5') if torch.cuda.is_available() else torch.device('cpu')

----

### Model

In [4]:
class TimeDistributed(nn.Module):
    def __init__(self, module):
        super(TimeDistributed, self).__init__()
        self.module = module

    def forward(self, x):
        if len(x.size()) <= 2:
            return self.module(x)

        x_reshape = x.contiguous().view(-1, x.size(-1))

        y = self.module(x_reshape)
        y = y.contiguous().view(x.size(0), -1, y.size(-1))
        return y

In [5]:
class ImageEmbender(nn.Module):
    def __init__(self, emb_dim, need_freeze=False):
        super(ImageEmbender, self).__init__()

        resnet = resnet50(weights=ResNet50_Weights.DEFAULT)
        resnet.fc = nn.Linear(in_features=2048, out_features=emb_dim, bias=True)
        
        if need_freeze:
            for param in resnet.parameters():
                param.requires_grad = False

        self.resnet = resnet
    
    def forward(self, x):
        x = self.resnet(x)
        return x


class TimeSeriesImageEncoder(nn.Module):
    def __init__(self, emb_dim, hidden_dim, n_layers, bidirectional, dropout):
        super(TimeSeriesImageEncoder, self).__init__()

        self.lstm = nn.LSTM(emb_dim,
                            hidden_dim,
                            num_layers=n_layers,
                            bidirectional=bidirectional,
                            dropout=dropout,
                            batch_first=True
                           )
    
    def forward(self, x):
        output, (hn, cn) = self.lstm(x)
        return output[:, -1, :]


class ClussifictionHead(nn.Module):
    def __init__(self, n_classes, input_dim, hidden_dim):
        super(ClussifictionHead, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim, n_classes)
        )
    
    def forward(self, x):
        x = self.model(x)
        return x


In [6]:
class TimeSeriesImagesClassificationModel(nn.Module):
    def __init__(self, emb_size, need_freeze_resnet, 
                enc_hid_dim, enc_n_layers, 
                enc_bidirectional, enc_dropout,
                dec_hid_dim, n_classes):
        super().__init__()
        
        self.embedding = ImageEmbender(
                                    emb_size, 
                                    need_freeze_resnet
                                    )
        

        self.encoder = TimeSeriesImageEncoder(
                                    emb_size, 
                                    enc_hid_dim,
                                    enc_n_layers, 
                                    enc_bidirectional, 
                                    enc_dropout
                                    )

        self.decoder = ClussifictionHead(
                                    n_classes,  
                                    enc_hid_dim, 
                                    dec_hid_dim
                                    )
    
    def forward(self, x):

        embs = []
        for i in range(x.shape[1]):
            embs.append(self.embedding(x[:, i, :, :].squeeze(1)))
        emb_out = torch.stack(embs, axis=1)

        enc_out = self.encoder(emb_out)
        dec_out = self.decoder(enc_out)
        return dec_out
        

In [7]:
test_model = TimeSeriesImagesClassificationModel(
    emb_size=128, 
    need_freeze_resnet=False, 
    enc_hid_dim=128, 
    enc_n_layers=1, 
    enc_bidirectional=False, 
    enc_dropout=0.1,
    dec_hid_dim=256, 
    n_classes=2
)



In [8]:
test_batch = torch.randn(16, 5, 3, 608, 208)

with torch.no_grad():
    logits = test_model(test_batch)

print('output shape:', logits.shape)
assert len(logits.shape) == 2
assert logits.shape[0] == 16
assert logits.shape[1] == 2

output shape: torch.Size([16, 2])


----

### Data

In [9]:
dataframe = pd.read_csv('./statistic_2022_12_07.csv', index_col=0)
dataframe.columns = ['add_date', 'is_touched', 'location', 's3_link', 'scan_result',
       'plt_dir']

In [10]:
dataframe.head()

Unnamed: 0,add_date,is_touched,location,s3_link,scan_result,plt_dir
0,2022-11-08 17:54:35.250071,0,K24-28A2,https://s3.mds.yandex.net/rms-cloud/69b312b3-2...,PLT11431241,data/PLT11431241/69b312b3-237a-4321-b683-4d85c...
1,2022-11-18 14:19:37.885847,0,K20-39A5,https://s3.mds.yandex.net/rms-cloud/4f0f1925-b...,PLT11305829,data/PLT11305829/4f0f1925-b320-4412-b0c5-666c7...
2,2022-11-16 15:59:55.10835,0,K20-50C4,https://s3.mds.yandex.net/rms-cloud/25aa652f-5...,PLT11397576,data/PLT11397576/25aa652f-5ea6-40b1-9af5-8b822...
3,2022-12-07 17:48:12.273726,0,K32-08C5,https://s3.mds.yandex.net/rms-cloud/e6f0cdfb-8...,PLT11494693,data/PLT11494693/e6f0cdfb-8064-4dc5-a26a-068ad...
4,2022-11-01 12:01:39.43983,0,K22-51B3,https://s3.mds.yandex.net/rms-cloud/1bcf8213-b...,PLT11441946,data/PLT11441946/1bcf8213-bb1e-438f-8ab2-8cbe0...


#### Избавимся от нет монотонности is_touched

In [11]:
dataframe['is_noisy_label'] = 0

deltas = []
cnt_reversed = 0
bad_palletes = []
for plts in tqdm(sorted(set(dataframe['scan_result']))):
    subs = dataframe[dataframe['scan_result'] == plts]
    if sum(subs['is_touched']):
        touched_loc = set(subs[subs['is_touched'] == 1]['location'])
        dataframe.loc[(dataframe['scan_result'] == plts) & (dataframe['is_touched'] == 0) & (~dataframe['location'].isin(touched_loc)), 'is_noisy_label'] = 1
        dataframe.loc[(dataframe['scan_result'] == plts) & (dataframe['is_touched'] == 0) & (dataframe['location'].isin(touched_loc)), 'is_noisy_label'] = 2

  0%|          | 0/6530 [00:00<?, ?it/s]

In [12]:
dataframe = dataframe[dataframe['is_noisy_label'] != 1]

In [13]:
class PackageDataset(torch.utils.data.Dataset):
    def __init__(self, dataset, transform, n_ts=5):
        self.dataset = dataset
        self.transform = transform
        self.n_ts = n_ts

        self.plts = list(dataset['scan_result'].unique())
    
    def __len__(self):
        return len(self.plts)
    
    def __getitem__(self, idx):
        plt_info = self.dataset[self.dataset['scan_result'] == self.plts[idx]]
        plt_info = plt_info.sort_values(['add_date'])

        n_ts = min(self.n_ts, len(plt_info))
        idxs = np.random.choice(range(len(plt_info)), self.n_ts)

        imgs_jpeg_path = list(self.dataset.iloc[idxs]['plt_dir'])
        is_touched = list(self.dataset.iloc[idxs]['is_touched'])[-1]
        
        images = []
        for img_jpeg_path in imgs_jpeg_path:
            image = np.array(Image.open(img_jpeg_path))
            if self.transform is not None:
                transformed = self.transform(image=image)
                image = transformed['image']

            images.append(image)
        
        images = torch.stack(images, axis=0)

        return {'images': images, 'targets': is_touched}

In [14]:
test_transform = A.Compose(
    [
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 
        A.Resize(width=208, height=608),
        ToTensorV2()
    ]
)

train_transform = A.Compose(
    [
        A.HorizontalFlip(p=0.5),
        A.Resize(width=208, height=608),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)

In [15]:
test_dataset = PackageDataset(dataframe, train_transform)

print(test_dataset[1]['images'].shape)
assert len(test_dataset[1]['images'].shape) == 4

torch.Size([5, 3, 608, 208])


----

### Разбивка данных, тестирование

In [16]:
plts = np.unique(dataframe['scan_result'])
train_pallets, test_pallets = train_test_split(plts, test_size=0.2, random_state=42)

data_train, data_test = dataframe.loc[dataframe['scan_result'].isin(train_pallets)], dataframe.loc[dataframe['scan_result'].isin(test_pallets)]
len(data_train) / len(data_test)

3.722775417010211

In [17]:
data_train.head()

Unnamed: 0,add_date,is_touched,location,s3_link,scan_result,plt_dir,is_noisy_label
0,2022-11-08 17:54:35.250071,0,K24-28A2,https://s3.mds.yandex.net/rms-cloud/69b312b3-2...,PLT11431241,data/PLT11431241/69b312b3-237a-4321-b683-4d85c...,0
1,2022-11-18 14:19:37.885847,0,K20-39A5,https://s3.mds.yandex.net/rms-cloud/4f0f1925-b...,PLT11305829,data/PLT11305829/4f0f1925-b320-4412-b0c5-666c7...,0
2,2022-11-16 15:59:55.10835,0,K20-50C4,https://s3.mds.yandex.net/rms-cloud/25aa652f-5...,PLT11397576,data/PLT11397576/25aa652f-5ea6-40b1-9af5-8b822...,0
3,2022-12-07 17:48:12.273726,0,K32-08C5,https://s3.mds.yandex.net/rms-cloud/e6f0cdfb-8...,PLT11494693,data/PLT11494693/e6f0cdfb-8064-4dc5-a26a-068ad...,0
4,2022-11-01 12:01:39.43983,0,K22-51B3,https://s3.mds.yandex.net/rms-cloud/1bcf8213-b...,PLT11441946,data/PLT11441946/1bcf8213-bb1e-438f-8ab2-8cbe0...,0


In [18]:
train_dataset = PackageDataset(data_train, train_transform)

test_dataset = PackageDataset(data_test, test_transform)

In [19]:
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, shuffle=True,
    batch_size=16, num_workers=1
)

val_dataloader = torch.utils.data.DataLoader(
    test_dataset, shuffle=False,
    batch_size=16, num_workers=1
)

In [20]:
data_test

Unnamed: 0,add_date,is_touched,location,s3_link,scan_result,plt_dir,is_noisy_label
15,2022-12-01 15:13:39.533238,0,K22-20B3,https://s3.mds.yandex.net/rms-cloud/980f25f7-1...,PLT11269629,data/PLT11269629/980f25f7-1179-45dd-8061-c2f7a...,0
17,2022-11-09 14:15:49.543392,0,K30-48B2,https://s3.mds.yandex.net/rms-cloud/b837bc1f-6...,PLT11279910,data/PLT11279910/b837bc1f-60a0-4f4b-9d1d-95945...,0
20,2022-10-28 11:23:28.245883,0,K20-46A3,https://s3.mds.yandex.net/rms-cloud/76a9f663-9...,PLT11298783,data/PLT11298783/76a9f663-95f5-4989-a465-4a653...,0
22,2022-10-31 11:27:44.944692,0,K22-14A3,https://s3.mds.yandex.net/rms-cloud/67024eb7-5...,PLT11314936,data/PLT11314936/67024eb7-5771-43f8-978b-e4e79...,0
29,2022-11-15 14:00:11.569384,0,K19-43C4,https://s3.mds.yandex.net/rms-cloud/3450a89e-5...,PLT11247221,data/PLT11247221/3450a89e-5376-44bd-be22-a4033...,0
...,...,...,...,...,...,...,...
76345,2022-11-08 15:12:09.636193,0,K21-48A5,https://s3.mds.yandex.net/rms-cloud/be164a89-2...,PLT11445920,data/PLT11445920/be164a89-258c-45ab-b370-30439...,0
76350,2022-12-01 12:29:03.306582,1,K33-02C2,https://s3.mds.yandex.net/rms-cloud/cb505213-2...,PLT9879030,data/PLT9879030/cb505213-2563-41b8-a1a8-96fa14...,0
76351,2022-11-17 17:30:07.118536,0,K20-28C3,https://s3.mds.yandex.net/rms-cloud/67240edd-a...,PLT11437353,data/PLT11437353/67240edd-a809-44a6-a180-f85cd...,0
76353,2022-12-01 11:53:11.565986,0,K20-51C6,https://s3.mds.yandex.net/rms-cloud/fe213616-8...,PLT11484262,data/PLT11484262/fe213616-82d5-4dc7-b0c7-dee0b...,0


In [21]:
for batch in val_dataloader:
    batch
    with torch.no_grad():
        logits = test_model(batch['images'])
    break

----

### Обучение

In [22]:
def train_loop(model, train_loader, criterion, optimizer):
    train_loss = num_samples = 0.0
    sigmoid = nn.Sigmoid()
    for batch in tqdm(train_loader):
        batch_pred = model(batch['images'].to(device))
        batch_pred = batch_pred.squeeze(-1)
        loss = criterion(sigmoid(batch_pred), batch['targets'].to(device).float())

        num_samples += len(batch_pred)
        train_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
            
    train_loss = train_loss / num_samples
    return model, optimizer, train_loss

In [23]:
def test_loop(model, val_loader, criterion, metrics):
    sigmoid = nn.Sigmoid()
    with torch.no_grad():
        logs_num = 0
        val_losses = 0.0
        accuracy = []
        precision = []
        recall = []
        f1_score = []
        confusion_matrix = torch.zeros((2, 2))
        
        for batch in tqdm(val_loader):
            preds = model(batch['images'].to(device))
            preds = sigmoid(preds.squeeze(-1))
            target = batch['targets'].to(device)

            # compute loss
            #
            loss = criterion(preds, batch['targets'].to(device).float())
            val_losses += loss.item()

            # compute metrics
            #
            accuracy.append(metrics['accuracy'](preds, target).cpu())
            precision.append(metrics['precision'](preds, target).cpu())
            recall.append(metrics['recall'](preds, target).cpu())
            f1_score.append(metrics['f1_score'](preds, target).cpu())
            confusion_matrix += metrics['confusion_matrix'](preds, target).cpu()

            logs_num += len(target)

    return val_losses / logs_num, np.mean(accuracy), np.mean(precision), np.mean(recall), np.mean(f1_score), confusion_matrix

In [24]:
def learning_loop(model, optimizer, train_loader, val_loader, 
                loss_fn, metrics, epochs=10):

    plot_result = {
        'train_loss': [], 
        'test_loss': [],
        'accuracy': [],
        'precision': [],
        'recall': [],
        'f1_score': [],
        'confusion_matrix': []
    }

    best_recall = float('inf')
    best_model = None

    for epoch in range(1, epochs+1):
        # train
        #
        print(f'#{epoch}/{epochs}:')
        model, optimizer, train_loss = train_loop(model, train_loader, loss_fn, optimizer)
        plot_result['train_loss'].append(train_loss)

        # test
        #
        test_losses, accuracy, precision, recall, f1_score, confusion_matrix = test_loop(model, val_loader, loss_fn, metrics)
        plot_result['test_loss'].append(test_losses)
        plot_result['accuracy'] += [accuracy]
        plot_result['precision'] += [precision]
        plot_result['recall'] += [recall]
        plot_result['f1_score'] += [f1_score]
        plot_result['confusion_matrix'] += [confusion_matrix]


        if recall < best_recall:
            best_recall = recall
            best_model = copy.deepcopy(model)

        # plot
        #
        clear_output(True)
       
        fig = make_subplots(rows=3, cols=2)

        for idx, (key, value) in enumerate(plot_result.items()):

            if key == 'confusion_matrix':
                print('Confusion Matrix:')
                for matrix in value:
                    cnfs_mtrx = '''
                    TP = {0}; FN = {1}
                    FP = {2}; TN = {3}
                    '''
                    cnfs_mtrx = cnfs_mtrx.format(matrix[1, 1], matrix[1, 0], matrix[0, 1], matrix[0, 0])
                    print(cnfs_mtrx)
                continue

            fig.add_trace(
                go.Scatter(y=value, name=key),
                row=idx // 2 + 1, col= idx % 2 + 1
            )

        fig.update_layout(height=600, width=800, title_text=f'#{epoch}/{epochs}:')
        fig.show()
    
    return model, optimizer, plot_result

In [25]:
def create_model_and_optimizer(net, lr=1e-4, beta1=0.9, beta2=0.999, device=device):
    model = net.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr, [beta1, beta2])
    return model, optimizer

In [26]:
metrics = {
    'accuracy': BinaryAccuracy().to(device),
    'precision': BinaryPrecision().to(device),
    'recall': BinaryRecall().to(device),
    'f1_score': BinaryF1Score().to(device),
    'confusion_matrix': BinaryConfusionMatrix().to(device)
}
loss_fn = nn.BCELoss()

model = TimeSeriesImagesClassificationModel(
    emb_size=128, 
    need_freeze_resnet=False, 
    enc_hid_dim=128, 
    enc_n_layers=1, 
    enc_bidirectional=False, 
    enc_dropout=0.1,
    dec_hid_dim=256, 
    n_classes=1
)

model, optimizer_model = create_model_and_optimizer(model)



In [27]:
model, optimizer_model, plot_result = learning_loop(
    model=model,
    optimizer=optimizer_model,
    train_loader=train_dataloader,
    val_loader=val_dataloader,
    loss_fn=loss_fn,
    metrics=metrics,
    epochs=10
)

Confusion Matrix:

                    TP = 0.0; FN = 22.0
                    FP = 9.0; TN = 1275.0
                    

                    TP = 0.0; FN = 24.0
                    FP = 0.0; TN = 1282.0
                    

                    TP = 2.0; FN = 16.0
                    FP = 17.0; TN = 1271.0
                    

                    TP = 0.0; FN = 20.0
                    FP = 0.0; TN = 1286.0
                    

                    TP = 0.0; FN = 23.0
                    FP = 10.0; TN = 1273.0
                    


#6/10:


  0%|          | 0/327 [00:00<?, ?it/s]

KeyboardInterrupt: 