## CNN Model (Notebook version)
> CNN Captcha Recognition Model 

Import the libraries

In [None]:
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from pathlib import Path
import random
from PIL import Image
from torchvision import transforms

Load the data, please modify the path by yourself

In [None]:
# Load the data from the Google Drive
# data_dir = Path("/content/drive/MyDrive/Data")

# path of data set for local
data_dir = Path("./dataset")

images = list(data_dir.glob("*.jpg")) #the size of dataset
print("Number of images found: ", len(images))



Show some samples (Optional)

In [None]:
sample_images =images[:4] 
_,ax = plt.subplots(2,2, figsize=(5,3))
for i in range(4):
    img = cv2.imread(str(sample_images[i]))
    print("Shape of image: ", img.shape)
    ax[i//2, i%2].imshow(img)
    ax[i//2, i%2].axis('off')
plt.show()

Customize the data set class

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, images, transform=None, target_transform=None, height=50, width=200):
        self.transform = transform
        self.num = len(images)
        self.target_transform = target_transform

        self.images = np.zeros((self.num, height, width), dtype=np.float32)
        self.labels = [0] * self.num

        for i in range(self.num):
            img = cv2.imread(str(images[i]))
            img = cv2.cvtColor(img, cv2.COLOR_RGBA2GRAY)
            img = cv2.resize(img, (width, height))
            self.labels[i] = images[i].name.split("_")[0]
            self.images[i, :, :] = img

        sample = self.images[0]
        self.labels = np.array(self.labels)

    def __getitem__(self, index):
        image = self.images[index]
        label = self.images[index]
        if self.transform != None:
            image = self.transform(image)
        return image, label

    def __len__(self):
        return self.num

Split the data set

In [None]:
# test data
test_data = images[8000:]  # 2000 for test

# the part for training
training = images[:8000]
valid_data = training[6000:]  # 2000 for validation
train_data = training[:6000]  # 6000 for train

print("test set size:", len(test_data))
print("validation set size:", len(valid_data))
print("train set size:", len(train_data))

train_set = CustomDataset(train_data, transform=transforms.ToTensor)
valid_set = CustomDataset(valid_data, transform=transforms.ToTensor)
test_set = CustomDataset(test_data, transform=transforms.ToTensor)

## CNN Model

Coming soon