<a href="https://colab.research.google.com/github/Rajsingh92/small-projects/blob/master/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!ls

drive  sample_data


In [1]:
DATA_DIR = "drive/My Drive/captcha_images_v2"
BATCH_SIZE = 8
IMAGE_WIDTH = 300
IMAGE_HEIGHT = 75
NUM_WORKERS = 8
EPOCHS = 200
DEVICE = "cuda"

In [2]:
import numpy as np
import  torch
import albumentations
from PIL import Image
from PIL import ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True

class ClassificationDataset:
    def __init__(self,image_paths,targets,resize=None):
        self.image_paths = image_paths
        self.targets = targets
        self.resize = resize
        self.aug = albumentations.Compose([albumentations.Normalize(always_apply=True)])
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self,item):
        image = Image.open(self.image_paths[item]).convert("RGB")
        targets = self.targets[item]

        if self.resize is not None:
            image = image.resize(
                (self.resize[1],self.resize[0]),resample = Image.BILINEAR
                )

        image = np.array(image)
        augmented = self.aug(image=image)
        image = augmented['image']
        image = np.transpose(image,(2,0,1)).astype(np.float32)

        return {
            "images":torch.tensor(image,dtype=torch.float),
            "targets": torch.tensor(targets,dtype=torch.long),
        }



In [3]:
from tqdm import tqdm
import torch 



def train_fn(model,data_loader,optimizers):
    model.train()
    fin_loss = 0 
    tk = tqdm(data_loader,total=len(data_loader))

    for data in tk:
        for k,v in data.items():
            data[k] = v.to(DEVICE)
        optimizers.zero_grad()
        _,loss = model(**data)
        loss.backward()
        optimizers.step()
        fin_loss+=loss.item()

    return fin_loss/len(data_loader)

def eval_fn(model,data_loader):
    model.eval()
    fin_loss = 0 
    fin_preds = []
    tk = tqdm(data_loader,total=len(data_loader))

    for data in tk:
        for k,v in data.items():
            data[k] = v.to(DEVICE)

    
        batch_preds,loss = model(**data)
        fin_loss+= loss.item()
        fin_preds.append(batch_preds)

    return fin_preds,fin_loss/len(data_loader)
      



In [13]:
import torch
from torch import nn
from torch.nn import functional as F

class CaptchaModel(nn.Module):
    def __init__(self,num_chars):
        super(CaptchaModel,self).__init__()
        self.conv_1 = nn.Conv2d(3,128,kernel_size=(3,3),padding=(1,2))
        self.max_pool_1 = nn.MaxPool2d(kernel_size=(2,2))

        self.conv_2 = nn.Conv2d(128,64,kernel_size=(3,3),padding=(1,2))
        self.max_pool_2 = nn.MaxPool2d(kernel_size=(2,2))

        self.linear_1 = nn.Linear(1152,64)
        self.drop_1 = nn.Dropout(0.2)

        self.gru = nn.GRU(64,32,bidirectional = True,num_layers=2,dropout = 0.25)
        self.output = nn.Linear(64,num_chars+1)

    def forward(self,images,targets=None):
        bs,c,h,w = images.size()
        #print(bs,c,h,w)
        x = F.relu(self.conv_1(images))
        #print(x.size())
        x = self.max_pool_1(x)
        #print(x.size())
        x = F.relu(self.conv_2(x))
        #print(x.size())
        x = self.max_pool_2(x)  #[1, 64, 18, 76]
        #print(x.size())
        x = x.permute(0,3,1,2)  #[1, 76, 64, 18]
        #print(x.size())
        x= x.view(bs,x.size(1),-1)
        #print(x.size())
        x= self.linear_1(x)
        x= self.drop_1(x)
        #print(x.size())
        x,_= self.gru(x)
        #print(x.size())
        x= self.output(x)
        #print(x.size())
        x = x.permute(1,0,2)
        #print(x.size())
        if targets is not None:
            log_softmax_values = F.log_softmax(x,2)
            input_lengths = torch.full(
                size=(bs,),
                fill_value=log_softmax_values.size(0),
                dtype=torch.int32
            )
            print(input_lengths)

            target_lengths = torch.full(
                size=(bs,),
                fill_value=log_softmax_values.size(1),
                dtype=torch.int32
            )
            print(target_lengths)
            loss =nn.CTCLoss(blank=0)(
                log_softmax_values,targets,input_lengths,target_lengths
            )
            return x,loss

        return x,None


if __name__ == "__main__":
    cm = CaptchaModel(19)
    img = torch.rand(5,3,75,300)
    target = torch.randint(1,20,(5,8))
    x,loss = cm(img,target)


        


tensor([76, 76, 76, 76, 76], dtype=torch.int32)
tensor([5, 5, 5, 5, 5], dtype=torch.int32)


In [14]:
import os
import glob
import torch
import numpy as np
import torch.utils.data

from sklearn import preprocessing
from sklearn import model_selection
from sklearn import metrics




def run_training():
    image_files = glob.glob(os.path.join(DATA_DIR, "*.png"))
    targets_orig = [x.split("/")[-1][:-4] for x in image_files]
    targets = [[c for c in x] for x in targets_orig]
    targets_flat = [c for clist in targets for c in clist]

    lbl_enc = preprocessing.LabelEncoder()
    lbl_enc.fit(targets_flat)
    targets_enc = [lbl_enc.transform(x) for x in targets]
    targets_enc = np.array(targets_enc)+1

    (
        train_imgs,
        test_imgs,
        train_targets,
        test_targets,
        train_orig_targets,
        test_orig_targets
    ) = model_selection.train_test_split(image_files,targets_enc,targets_orig,test_size=0.1,random_state=42)

    train_dataset = ClassificationDataset(
        image_paths=train_imgs, 
        targets=train_targets, 
        resize=(IMAGE_HEIGHT, IMAGE_WIDTH)
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        shuffle=True
    )

    test_dataset = ClassificationDataset(
        image_paths=train_imgs,
        targets=test_targets,
        resize=(IMAGE_HEIGHT, IMAGE_WIDTH)
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        shuffle=False
    )

    model = CaptchaModel(num_chars=len(lbl_enc.classes_))
    model.to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(),lr=3e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,factor=0.8,patience=5,verbose=True
    )

    for epoch in range(EPOCHS):
        train_loss = train_fn(model,train_loader,optimizer)
        valid_preds,valid_loss = eval_fn(model,train_loader)


if __name__ == "__main__":
    run_training()




  0%|          | 0/117 [00:00<?, ?it/s][A[A

tensor([76, 76, 76, 76, 76, 76, 76, 76], dtype=torch.int32)
tensor([8, 8, 8, 8, 8, 8, 8, 8], dtype=torch.int32)


RuntimeError: ignored