In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install numpy==1.20.3
!pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
!pip install opencv-python
!pip install matplotlib==3.4.2

In [None]:
!git clone https://github.com/NVIDIA/apex
!cd apex
!pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
!rm -rf ./apex

In [None]:
!export CUDA_HOME=/usr/local/cuda-11.0 
!pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex

In [None]:
!conda install -c conda-forge nvidia-apex -y

In [None]:
!pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
!pip install torchtext==0.9.1

In [None]:
!pip freeze

In [None]:
!pip3 freeze > requirements.txt  # Python3


In [None]:
!pip install timm==0.3.2

In [None]:
from apex import amp

In [None]:
import os
os.remove('data.zip')

In [None]:
import shutil
shutil.unpack_archive('data.zip', "./")

In [None]:
import shutil

shutil.rmtree("./data/train_segmentation")

In [None]:
!pip uninstall opencv_python_headless -y
!pip install opencv-contrib-python-headless

In [None]:
import cv2
tps = cv2.createThinPlateSplineShapeTransformer()

In [None]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence

import numpy as np
import cv2
import os
import json
from matplotlib import pyplot as plt

In [None]:
!pip install patool pyunpack

In [None]:
!apt-get install unrar

In [None]:
!pip install gdown

In [None]:
!gdown --id 1LGm-ZWwesqRJwuzXW0ZgPA4JSF-HGdoD

In [None]:
!unzip synth_img.zip

In [None]:
import os
os.mkdir("./data")

In [None]:
shutil.rmtree('./data')

In [None]:
from pyunpack import Archive
Archive('./train.rar').extractall('./data')

In [None]:
with open('./data/train/labels.json') as f:
    train_data = json.load(f)


In [None]:
with open('data/train/labels.json') as f:
    train_data = json.load(f)

train_data = [(k, v) for k, v in train_data.items()]
print('train len', len(train_data))

split_coef = 0.75
train_len = int(len(train_data)*split_coef)

train_data_splitted = train_data[:train_len]
val_data_splitted = train_data[train_len:]

print('train len after split', len(train_data_splitted))
print('val len after split', len(val_data_splitted))


with open('data/train/train_labels_splitted.json', 'w') as f:
    json.dump(dict(train_data_splitted), f)
    
with open('data/train/val_labels_splitted.json', 'w') as f:
    json.dump(dict(val_data_splitted), f)

In [None]:
os.listdir('./data/train')

In [None]:
import pandas as pd

labels = pd.read_csv("./data/train/labels.csv")
data  = dict(zip(labels.file_name, labels.text))

train_data = [(k, v) for k, v in data.items()]
print('train len', len(train_data))

split_coef = 0.60
train_len = int(len(train_data)*split_coef)

train_data_splitted = train_data[:train_len]
val_data_splitted = train_data[train_len:]

print('train len after split', len(train_data_splitted))
print('val len after split', len(val_data_splitted))


with open('data/train_labels_splitted.json', 'w') as f:
    json.dump(dict(train_data_splitted), f)
    
with open('data/val_labels_splitted.json', 'w') as f:
    json.dump(dict(val_data_splitted), f)

In [None]:
%cd /kaggle/working

In [None]:
os.mkdir("./with_aug")

In [None]:
# функция которая помогает объединять картинки и таргет-текст в батч
def collate_fn(batch):
    images, texts, enc_texts = zip(*batch)
    images = torch.stack(images, 0)
    text_lens = torch.LongTensor([len(text) for text in texts])
    enc_pad_texts = pad_sequence(enc_texts, batch_first=True, padding_value=0)
    return images, texts, enc_pad_texts, text_lens


def get_data_loader(
    transforms, json_path, root_path, tokenizer, batch_size, drop_last
):
    dataset = OCRDataset(json_path, root_path, tokenizer, transforms)
    data_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        pin_memory = True,
        collate_fn=collate_fn,
        batch_size=batch_size,
        num_workers=8,
    )
    return data_loader


class OCRDataset(Dataset):
    def __init__(self, json_path, root_path, tokenizer, transform=None):
        super().__init__()
        self.transform = transform
        with open(json_path, 'r') as f:
            data = json.load(f)
        self.data_len = len(data)

        self.img_paths = []
        self.texts = []
        for img_name, text in data.items():
            self.img_paths.append(os.path.join(root_path, img_name))
            self.texts.append(text)
        self.enc_texts = tokenizer.encode(self.texts)

    def __len__(self):
        return self.data_len

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        text = self.texts[idx]
        enc_text = torch.LongTensor(self.enc_texts[idx])
        image = cv2.imread(img_path)
        if self.transform is not None:
            image = self.transform(image)
        return image, text, enc_text


class AverageMeter:
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


config_json = {
    "alphabet": '@!"\'()+,-./0123456789:;=?I[]ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё№IZVX ',
    "save_dir": "./with_aug",
    "num_epochs": 50,
    "learning_rate": 0.00001,
    "FPN_ON": True,
    "ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG": True,
    "BBOX_REG_LOSS_TYPE": "giou",
    "PIXEL_STD" : [57.375, 57.120, 58.395], 
    "steps": 6000,
    "feature_extract": True,
    "pin_memory": True,
    "NESTEROV": True,
    "image": {
        "width": 256,
        "height": 32
    },
    "train": {
        "root_path": "./data/train/images",
        "json_path": "./data/train/train_labels_splitted.json",
        "batch_size": 64
    },
    "val": {
        "root_path": "./data/train/images",
        "json_path": "./data/train/val_labels_splitted.json",
        "batch_size": 64
    }
}

In [None]:
!pip install straug 
!pip install opencv-contrib-python-headless
!pip install magickwand -y
!apt-get install libmagickwand-dev -y

In [None]:
!pip install gdown

In [None]:
OOV_TOKEN = '<OOV>'
CTC_BLANK = '<BLANK>'


def get_char_map(alphabet):
    """Make from string alphabet character2int dict.
    Add BLANK char fro CTC loss and OOV char for out of vocabulary symbols."""
    char_map = {value: idx + 2 for (idx, value) in enumerate(alphabet)}
    char_map[CTC_BLANK] = 0
    char_map[OOV_TOKEN] = 1
    return char_map


class Tokenizer:
    """Class for encoding and decoding string word to sequence of int
    (and vice versa) using alphabet."""

    def __init__(self, alphabet):
        self.char_map = get_char_map(alphabet)
        self.rev_char_map = {val: key for key, val in self.char_map.items()}

    def encode(self, word_list):
        """Returns a list of encoded words (int)."""
        enc_words = []
        for word in word_list:
            enc_words.append(
                [self.char_map[char] if char in self.char_map
                 else self.char_map[OOV_TOKEN]
                 for char in word]
            )
            
        return enc_words

    def get_num_chars(self):
        return len(self.char_map)

    def decode(self, enc_word_list):
        """Returns a list of words (str) after removing blanks and collapsing
        repeating characters. Also skip out of vocabulary token."""
        dec_words = []
        for word in enc_word_list:
            word_chars = ''
            for idx, char_enc in enumerate(word):
                # skip if blank symbol, oov token or repeated characters
                if (
                    char_enc != self.char_map[OOV_TOKEN]
                    and char_enc != self.char_map[CTC_BLANK]
                    # idx > 0 to avoid selecting [-1] item
                    and not (idx > 0 and char_enc == word[idx - 1])
                ):
                    word_chars += self.rev_char_map[char_enc]
            dec_words.append(word_chars)
        return dec_words

In [None]:
!pip3 install ASR-metrics

In [None]:
!git clone https://github.com/NVIDIA/apex
%cd apex
!pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./

In [None]:
%cd ./worfking

In [None]:
from apex import amp

In [None]:
from ASR_metrics import utils as metrics

def get_accuracy(y_true, y_pred):
    scores = []
    for true, pred in zip(y_true, y_pred):
        scores.append(metrics.calculate_cer(true, pred))
    avg_score = np.mean(scores)
    return avg_score

In [None]:
import wand
from straug.warp import *
from straug.geometry import *
from straug.blur import *

from PIL import Image
import copy
import cv2
import numpy as np

img = Image.open("./data/train/images/0.png").convert("RGB")

img = MotionBlur()(img, mag=2)
open_cv_image = np.array(img) 
open_cv_image = open_cv_image[:, :, ::-1].copy() 


In [None]:
from straug.warp import *
from straug.geometry import *
from straug.blur import *
from straug.camera import *
from straug.noise import *
from straug.process import *
import numpy as np
from PIL import Image
import random


class ChadAug:
    def __init__(self, n=1, aug_pol=None):
        self.aug_pol = [
                [ShotNoise()], [Perspective()],                    
                [DefocusBlur(), MotionBlur()],                     # Blur
                [Brightness(), JpegCompression(), Pixelate()],     # Camera
                [Sharpness(), Color(), AutoContrast()] 
        ]

        if aug_pol is not None:
            self.aug_pol = aug_pol
        
        self.n = n

    def roll_augs(self):
        aug_types = random.sample(self.aug_pol, self.n)
        return [random.choice(a) for a in aug_types]

    def __call__(self, img):
        img = Image.fromarray(img[:, :, ::-1])
      
        augs = self.roll_augs()
        #print(augs)
        for aug in augs:
            img = aug(img)
      
        img = np.array(img)[:, :, ::-1]
        return img

In [None]:
class MixedAug:
  def __init__(self, n=1, aug_pol=None):

    self.aug_pol = [
                                           # Warp
                [DefocusBlur(), MotionBlur()],                     # Blur
                [Brightness(), JpegCompression(), Pixelate()],     # Camera
                [Sharpness(), Color(), AutoContrast()]             # Process
        ] 

    if aug_pol is not None:
        self.aug_pol = aug_pol
    
    self.n = n

  def roll_mixed_augs(self):

      aug_types = random.sample(self.aug_pol, self.n)
      return [random.choice(a) for a in aug_types]

  def __call__(self, img):
        
        w, h = img.size
        img = np.asarray(img)

        half = w//2
        left_part = img[:, :half] 
        right_part = img[:, half:]

        augs = self.roll_mixed_augs()
        #print(augs)
        for aug in augs:

            img_l = aug(Image.fromarray(left_part[:, :, ::-1]))
            img_r = aug(Image.fromarray(right_part[:, :, ::-1]))

            list_im = [img_l, img_r]
            imgs = [i for i in list_im ]

            min_shape = sorted( [(np.sum(i.size), i.size ) for i in imgs])[0][1]
            imgs_comb = np.hstack((np.asarray( i.resize(min_shape) ) for i in imgs))

            img = np.array(imgs_comb)[:, :, ::-1]
        return img

In [None]:
from torchvision.transforms import  AutoAugmentPolicy
from matplotlib import pyplot as plt
from torchvision.transforms import ToPILImage
from torchvision.transforms import AutoAugment  
import albumentations as A
import torch.optim as optim
import wand
from straug.warp import *
from straug.geometry import *
from straug.blur import *
from torchvision.transforms import RandomAffine
from torchvision.transforms import RandomCrop
from torchvision.transforms import RandomRotation
from torchvision import transforms, models
from tqdm import tqdm
import copy
import random 
import PIL
from PIL import Image


class ImageFloppaResize:
    def __init__(self, height, width, pad_color=(0, 0, 0), border_m=cv2.BORDER_CONSTANT):
        # Resize to values
        self.height = height
        self.width = width
        self.pad_color = pad_color
        self.border_m = border_m

    def __call__(self, image):
        img_h, img_w = image.shape[:-1:]
        resize_f = min(self.height / img_h, self.width / img_w)
        dim = (int(img_w * resize_f), int(img_h * resize_f))
        image = cv2.resize(image, dim, interpolation=cv2.INTER_LINEAR)

        img_h, img_w = image.shape[:-1:]
        pad_b = self.height - img_h if self.height - img_h > 0 else 0
        pad_r = self.width - img_w if self.width - img_w > 0 else 0
        image = cv2.copyMakeBorder(image.copy(), 0, pad_b, 0, pad_r, self.border_m, value=self.pad_color)
        return image
    
class MagickAug:

  def __call__(self, img):

    img = Image.fromarray(img)
    img = Invert()(img, mag=2)
    img = np.array(img) 
    img = img[:, :, ::-1].copy() 
    return img




class Normalize:
    def __call__(self, img):
        img = img.astype(np.float32) / 255
        return img

class Alumentated:
  def __call__(self, img):

    transform = A.Compose([
            A.CLAHE(clip_limit=1.0, tile_grid_size=(8, 8), p=0.25, always_apply=False),
            A.Rotate(limit=3, interpolation=1, border_mode=0, p=0.1),
            A.JpegCompression(quality_lower=35, p=0.5),
        ], p=1.0)
    
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    transformed = transform(image=img)
    img = transformed["image"]
    return img 

class ToTensor:
    def __call__(self, arr):
        arr = torch.from_numpy(arr)
        return arr


class MoveChannels:

    def __init__(self, to_channels_first=True):
        self.to_channels_first = to_channels_first

    def __call__(self, image):
        if self.to_channels_first:
            return np.moveaxis(image, -1, 0)
        else:
            return np.moveaxis(image, 0, -1)


class ImageResize:
    def __init__(self, height, width):
        self.height = height
        self.width = width

    def __call__(self, image):
        image = cv2.resize(image, (self.width, self.height),
                           interpolation=cv2.INTER_LINEAR)
        return image



def get_train_transforms(height, width):
    transforms = torchvision.transforms.Compose([
        ChadAug(),
        ImageFloppaResize(height, width),
        MoveChannels(to_channels_first=True),
        Normalize(),
        ToTensor(),
    ])
    return transforms


def get_val_transforms(height, width):
    transforms = torchvision.transforms.Compose([
        ImageFloppaResize(height, width),
        MoveChannels(to_channels_first=True),
        Normalize(),
        ToTensor(),
       
  
    ])
    return transforms

In [None]:
!pip install --upgrade efficientnet-pytorch 


In [None]:
q =  torch.tensor([[3, 64, 32, 256]])
print(q.permute(1, 0))

In [None]:
!pip install onnx

In [None]:
!pip3 install timm==0.5.4 
from timm import create_model

In [None]:
import timm
import timm.optim
from timm import create_model

In [None]:
!pip install catboost==0.10.0

In [None]:
from catboost import CatBoostRegressor
model_с = CatBoostRegressor(iterations=700, l2_leaf_reg=0.07, task_type='GPU',
                          learning_rate=0.0001, silent=True, has_time=True, boosting_type= 'Plain',
                          depth=5, loss_function='MultiRMSE',)


In [None]:
m = nn.Linear(20, 30)
input_m = torch.randn(128, 20)
output = m(input_m)
output

In [None]:
%cd /kaggle/working

In [None]:
import torchvision.models as models
import onnx
model_name = "convnext_base_in22k"
from catboost import CatBoostClassifier, Pool, sum_models

def get_resnet50_backbone(pretrained=True):
    m = timm.create_model(model_name, pretrained=True)
    input_conv = nn.Conv2d(3, 128, 7, 3, bias=False)
    blocks = [input_conv, 
               m.stages]
    
    return nn.Sequential(*blocks)



class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout=0.1):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size, hidden_size, num_layers,
            dropout=dropout, batch_first=True, bidirectional=True)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        return out



def make_divisible(v, divisor=8, min_value=None, round_limit=.9):
    min_value = min_value or divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < round_limit * v:
        new_v += divisor
    return new_v


class DenseNetAtt(nn.Module):
    def __init__(
        self, number_class_symbols, time_feature_count=32, lstm_hidden=256,
        lstm_len=3,
    ):
        super().__init__()
        self.feature_extractor = get_resnet50_backbone(pretrained=True)

        self.avg_pool = nn.AdaptiveAvgPool2d(
            (time_feature_count, time_feature_count))
        
        self.bilstm = BiLSTM(time_feature_count, lstm_hidden, lstm_len)

        self.classifier = nn.Sequential(
            nn.Linear(lstm_hidden *2, time_feature_count,),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(time_feature_count, number_class_symbols)
        )

 
    def forward(self, x):
   
        x = self.feature_extractor(x)
        b, c, h, w = x.size()
        x = x.view(b, c * h, w)
        x = self.avg_pool(x)
        x = x.transpose(1, 2)
        x = self.bilstm(x)
        x = self.classifier(x)
        x = nn.functional.log_softmax(x, dim=2).permute(1, 0, 2)
  
        return x


In [None]:
train(config_json)

In [None]:
from IPython.display import FileLink
FileLink(r'./model-4-0.3787.ckpt')

In [None]:
%cd /kaggle/working

In [None]:
!conda install cudatoolkit==11.2

In [None]:
from torch.nn.utils.clip_grad import clip_grad_value_
from torch.optim import optimizer
from ASR_metrics import utils as metrics
from torch.nn import CTCLoss
from torch import Tensor
import time
import tqdm

torch.manual_seed(375451*70)

def get_accuracy(y_true, y_pred):
    scores = []
    for true, pred in zip(y_true, y_pred):
        scores.append(true == pred)
    avg_score = np.mean(scores)
    return avg_score


def get_cer(y_true, y_pred):
    scores = []
    for true, pred in zip(y_true, y_pred):
        scores.append(metrics.calculate_cer(true, pred))
    avg_score = np.mean(scores)
    return avg_score


def val_loop(data_loader, model, tokenizer, device):
    acc_avg = AverageMeter()
    cer_avg = AverageMeter()
    model.eval()
    for images, texts, _, _ in data_loader:
        
        batch_size = len(texts)
        
        text_preds = predict(images, model, tokenizer, device)
        acc_avg.update(get_accuracy(texts, text_preds), batch_size)
        cer_avg.update(get_cer(texts, text_preds), batch_size)

    return acc_avg.avg, cer_avg.avg

def train_loop(data_loader, model, criterion, optimizer, epoch):
    loss_avg = AverageMeter()
    model.train()
    print("train loop")
    for images, texts, enc_pad_texts, text_lens in tqdm.tqdm(data_loader):
        model.zero_grad()
        
        images = images.to(DEVICE)
        batch_size = len(texts)
    
        
        output = model(images)
        output_lenghts = torch.full(
            size=(output.size(1),),
            fill_value=output.size(0),
            dtype=torch.long
        )
        loss = criterion(output, enc_pad_texts, output_lenghts, text_lens)
        loss.backward(retain_graph=True)
        
        #with amp.scale_loss(loss, optimizer) as scaled_loss:
            #scaled_loss.backward()
        loss_avg.update(loss.item(), batch_size)
        
    
        torch.nn.utils.clip_grad_norm_(model.parameters(), 2)
        
        optimizer.step()
    
    for param_group in optimizer.param_groups:
        lr = param_group['lr']
    print(f'Epoch {epoch}, Loss: {loss_avg.avg:.5f}, LR: {lr:.7f}')
    return loss_avg.avg




def predict(images, model, tokenizer, device):
    model.eval()
    images = images.to(device)
    with torch.no_grad():
        output = model(images)
    pred = torch.argmax(output.detach().cpu(), -1).permute(1, 0).numpy()
    text_preds = tokenizer.decode(pred)
    return text_preds


def get_loaders(tokenizer, config):
    train_transforms = get_train_transforms(
        height=config['image']['height'],
        width=config['image']['width']
    )
    train_loader = get_data_loader(
        json_path=config['train']['json_path'],
        root_path=config['train']['root_path'],
        transforms=train_transforms,
        tokenizer=tokenizer,
        batch_size=config['train']['batch_size'],
        drop_last=True
    )
    val_transforms = get_val_transforms(
        height=config['image']['height'],
        width=config['image']['width']
    )
    val_loader = get_data_loader(
        transforms=val_transforms,
        json_path=config['val']['json_path'],
        root_path=config['val']['root_path'],
        tokenizer=tokenizer,
        batch_size=config['val']['batch_size'],
        drop_last=False
    )
    return train_loader, val_loader


def train(config):
    tokenizer = Tokenizer(config['alphabet'])
    os.makedirs(config['save_dir'], exist_ok=True)
    train_loader, val_loader = get_loaders(tokenizer, config)

    model = DenseNetAtt(number_class_symbols=tokenizer.get_num_chars())
   
    #model.load_state_dict(torch.load("./model-6-0.6521.ckpt"), strict=False)
    model.to(DEVICE)

    criterion = torch.nn.CTCLoss(blank=0, reduction='mean', zero_infinity=True)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.000125)
  
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='max', factor=0.5, patience=2)
    #model, optimizer = amp.initialize(model, optimizer, opt_level="O0")

    best_acc = -np.inf
    acc_avg, cer_avg = val_loop(val_loader, model, tokenizer, DEVICE)
    
    epoch_time_m = 0
    #print(val_loop(val_loader, model, tokenizer, DEVICE))
    for epoch in range(config['num_epochs']):
        timer = time.time()
        print("\nEpoch", epoch, "Previous took", epoch_time_m, "minutes")
        loss_avg = train_loop(train_loader, model, criterion, optimizer, epoch)
        acc_avg, cer_avg = val_loop(val_loader, model, tokenizer, DEVICE)
        print(f'acc: {acc_avg}; cer: {cer_avg};')
        scheduler.step(acc_avg)
        
        epoch_time_m = int((time.time() - timer)/60)
        if acc_avg > best_acc:
            best_acc = acc_avg
            model_save_path = os.path.join(
                config['save_dir'], f'model-{epoch}-{acc_avg:.4f}.ckpt')
            torch.save(model.state_dict(), model_save_path)
            print('Model weights saved')

In [None]:
!export CMAKE_ARGS='-DCMAKE_VERBOSE_MAKEFILE=ON'
!export VERBOSE=1