In [0]:
# Import PyDrive and associated libraries.
# This only needs to be done once per notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Download a file based on its file ID.
#
# A file ID looks like: laggVyWshwcyP6kEI-y_W3P8D26sz
file_id = '1Tvi4Vk8MIBsVi4MIHX1zoDe6UsKpWBqj'
downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile('captcha.zip') 

In [0]:
!unzip captcha.zip > /dev/null

In [0]:
import os

def make_dirs():
  data_root = 'captcha'
  train_dir = 'train'
  val_dir = 'val'
  test_dir = 'test'
  classes =  ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
  for dir_name in [train_dir, val_dir, test_dir]:
      for class_name in classes:
          os.makedirs(os.path.join(dir_name, class_name), exist_ok=True)

In [0]:
import glob
from PIL import Image

def divide_data():
  j = 0
  train_indexes = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
  test_indexes = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
  val_indexes = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
  for filename in glob.glob('captcha/*.png'): 
      im=Image.open(filename)
      begining = 5;
      end = 30;
      width = 15;
      for i in range(5):
          im_cropped = im.crop((begining+i*width, 0, end+i*width, 48))
          if j%2 == 0:
            im_cropped.save(os.path.join('test', filename[-9+i])+'/' + filename[-9+i]+'_'+ str(test_indexes[int(filename[-9+i])]) + '.png', format = 'png')
            test_indexes[int(filename[-9+i])] = test_indexes[int(filename[-9+i])] + 1
          elif j%7 == 0:
            im_cropped.save(os.path.join('val', filename[-9+i])+'/' + filename[-9+i]+'_'+ str(val_indexes[int(filename[-9+i])]) + '.png', format = 'png')
            val_indexes[int(filename[-9+i])] = val_indexes[int(filename[-9+i])] + 1
          else:
            im_cropped.save(os.path.join('train', filename[-9+i])+'/' + filename[-9+i]+'_'+ str(train_indexes[int(filename[-9+i])]) + '.png', format = 'png')
            train_indexes[int(filename[-9+i])] = train_indexes[int(filename[-9+i])] + 1
      j = j+1;


In [None]:
make_dirs()
divide_data()

In [0]:
import torchvision
import torch
import random
import numpy as np

random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.determenistic = True

train_dataset = torchvision.datasets.ImageFolder('train', torchvision.transforms.ToTensor())
val_dataset = torchvision.datasets.ImageFolder('val', torchvision.transforms.ToTensor())
batch_size = 10;
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True, num_workers= batch_size)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, num_workers= batch_size)

In [0]:
import matplotlib.pyplot as plt
def show_tensorImage(tensorImage, title = ''):
    image = tensorImage.permute(1, 2, 0).numpy()
    plt.imshow(image.clip(0, 1))
    plt.title(title)
    plt.show()
    plt.pause(0.001)

In [0]:
%ls train

[0m[01;34m0[0m/  [01;34m1[0m/  [01;34m2[0m/  [01;34m3[0m/  [01;34m4[0m/  [01;34m5[0m/  [01;34m6[0m/  [01;34m7[0m/  [01;34m8[0m/  [01;34m9[0m/


In [0]:
len(train_dataset)

214285

In [0]:
inputs, labels = next(iter(train_dataloader))

In [0]:
inputs.shape

torch.Size([10, 3, 48, 25])

In [0]:
class CaptchaNet(torch.nn.Module):
  def __init__(self):
    super(CaptchaNet, self).__init__()
    self.conv1 = torch.nn.Conv2d(in_channels = 3, out_channels = 3,
                                kernel_size = 3, padding = 1) # size = 48*25
    self.conv2 = torch.nn.Conv2d(in_channels = 3, out_channels = 6,
                                kernel_size = 3, padding = 1) # size = 48*25
    self.act1 = torch.nn.ReLU()
    self.norm1 = torch.nn.BatchNorm2d(num_features = 6)
    self.pool1 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2) # size = 24*12
    
    self.conv3 = torch.nn.Conv2d(in_channels = 6, out_channels = 11, 
                                 kernel_size = 3, padding = 0) # size = 22*10
    self.conv4 = torch.nn.Conv2d(in_channels = 11, out_channels = 16,
                                kernel_size = 3, padding = 0) # size = 20*8
    self.act2 = torch.nn.ReLU()
    self.norm1 = torch.nn.BatchNorm2d(num_features = 16)
    self.pool2 = torch.nn.MaxPool2d(kernel_size = 2, stride = 2) # size = 10*4
    
    self.fc1 = torch.nn.Linear(10*4*16, 120)
    self.act3 = torch.nn.ReLU()
    self.fc2 = torch.nn.Linear(120, 84)
    self.act4 = torch.nn.ReLU()
    self.fc3 = torch.nn.Linear(84, 10)
  
  def forward(self, x):
    x = self.conv1(x)
    x = self.conv2(x)
    x = self.act1(x)
    x = self.pool1(x)
    
    x = self.conv3(x)
    x = self.conv4(x)
    x = self.act2(x)
    x = self.pool2(x)
    
    x = x.view(x.size(0), x.size(1)* x.size(2)* x.size(3))
    x = self.fc1(x)
    x = self.act3(x)
    x = self.fc2(x)
    x = self.act4(x)
    x = self.fc3(x)
    
    return x
  

model = CaptchaNet() 


In [0]:
device = torch.device('cuda:0')
model = model.to(device)

In [0]:
loss = torch.nn.CrossEntropyLoss();
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3, amsgrad=True, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.1)


In [0]:
def train_model(num_epochs):
  train_loss_history = []
  val_loss_history = []
  train_accuracy_history = []
  val_accuracy_history = []
  for epoch in range(num_epochs):
    print('Epoch {}/{}:'.format(epoch, num_epochs - 1), flush=True)
    for phase in ['train', 'val']:
      if phase == 'train':
        dataloader = train_dataloader
        model.train()
      else:
        dataloader = val_dataloader
        model.eval()
      running_loss = 0
      running_acc = 0
      for x_batch, y_batch in dataloader:
        optimizer.zero_grad()
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        preds = model.forward(x_batch)
        loss_value = loss(preds, y_batch)
        preds_classes = preds.argmax(dim=1)
        if phase == 'train':
          loss_value.backward()
          optimizer.step()
        running_loss += loss_value.item()
        running_acc += (preds_classes == y_batch.data).float().mean()
    
      epoch_loss = running_loss / len(dataloader)
      epoch_acc = running_acc / len(dataloader)
      if (phase == 'train'):
        train_accuracy_history.append(epoch_acc.data.cpu())
        train_loss_history.append(epoch_loss) 
      else:
        val_accuracy_history.append(epoch_acc.data.cpu())
        val_loss_history.append(epoch_loss) 
      print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc), flush=True)
    scheduler.step()
  plt.figure(1)
  plt.title('train_accuracy')
  plt.plot(train_accuracy_history)
  
  plt.figure(2)
  plt.title('val_accuracy')
  plt.plot(val_accuracy_history)  
  plt.figure(3)
  plt.title('train loss')
  plt.plot(train_loss_history, 'r')
  
  plt.figure(4)
  plt.title('val loss')
  plt.plot(val_loss_history, 'r')


In [None]:
train_model(50)

In [0]:
test_folder = torchvision.datasets.ImageFolder('test',torchvision.transforms.ToTensor())
test_dataloader = torch.utils.data.DataLoader(test_folder,batch_size = batch_size, num_workers=batch_size)


In [0]:
model.eval()
accuracy = 0
for x_batch, y_batch in test_dataloader:
  x_batch = x_batch.to(device)
  y_batch = y_batch.to(device)
  preds = model.forward(x_batch)
  preds_classes = preds.argmax(dim=1)
  accuracy += (y_batch == preds_classes).float().mean()
accuracy = accuracy/len(test_dataloader)
print(accuracy)  

In [0]:
torch.save(model.state_dict(), 'params.txt')

In [0]:
# Import PyDrive and associated libraries.
# This only needs to be done once in a notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Create & upload a text file.
uploaded = drive.CreateFile({'title': 'params.txt'})
uploaded.SetContentFile('params.txt')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))
