In [2]:
! pip install datasets



In [3]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader, Subset, random_split
import torchvision.transforms as transforms
import os
import numpy as np
import cv2
import datasets
import pandas as pd
from datasets import load_dataset, DatasetDict
import sklearn
from sklearn.preprocessing import LabelEncoder

In [4]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
img_path = "drive/MyDrive/data"
font_classes = os.listdir(img_path)
# Ignore .DS_store dir
font_classes = font_classes[1:]
font_classes

['I Love Glitter',
 'alsscrp',
 'AlexBrush',
 'GreatVibes',
 'AguafinaScript',
 'OpenSans',
 'Holligate Signature',
 'Allura',
 'Canterbury',
 'James Fajardo']

In [6]:
width, height = 500, 300

# Resize image
def processImage(image_path):
  orginal_img = cv2.imread(image_path)
  # img_data = np.array(orginal_img)

  # Upscale or downscale based on dimension
  if orginal_img.shape[1] < width or orginal_img.shape[0] < height:
    resize_img = cv2.resize(orginal_img, (width, height), interpolation= cv2.INTER_CUBIC)
  else:
    resize_img = cv2.resize(orginal_img, (width, height), interpolation= cv2.INTER_AREA)

  return resize_img


In [7]:
images = []
labels = []

In [8]:
for font_class in font_classes:
  # Get all the font images for specific font
  font_dir = os.path.join(img_path, font_class)

  for image_file in os.listdir(font_dir):
      image_path = os.path.join(font_dir, image_file)
      images.append(processImage(image_path))
      labels.append(font_class)



encoder = LabelEncoder()
labels = encoder.fit_transform(labels)

data = {'image': images, 'labels': labels}
df = pd.DataFrame(data)

In [22]:
df

Unnamed: 0,image,labels
0,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",I Love Glitter
1,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",I Love Glitter
2,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",I Love Glitter
3,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",I Love Glitter
4,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",I Love Glitter
...,...,...
1787,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",James Fajardo
1788,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",James Fajardo
1789,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",James Fajardo
1790,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",James Fajardo


In [9]:
df.image[0].shape

(300, 500, 3)

In [10]:
new_df = df.copy()

In [11]:
# Convert into CxHxW format from HxWxC
for index, row in new_df.iterrows():

    image_tensor = torch.tensor(row['image'])
    image_tensor_permuted = image_tensor.permute(2, 0, 1)

    new_df.at[index, 'image'] = image_tensor_permuted.numpy()

In [12]:
new_df.image[0].shape

(3, 300, 500)

In [13]:
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx]['image']
        label = self.data.iloc[idx]['labels']
        return image, label

In [14]:
custom_dataset = CustomDataset(new_df)

In [15]:
class ConvNetwork(nn.Module):
  def __init__(self, num_classes = 10):
    # Inheritance
    super(ConvNetwork, self).__init__()
    self.layer_1 = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=2),
        nn.BatchNorm2d(16),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )
    self.layer_2 = nn.Sequential(
        nn.Conv2d(in_channels=16, out_channels=16, kernel_size=5, stride=1, padding=2),
        nn.BatchNorm2d(16),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )
    self.fl = nn.Flatten()
    # self.fc = nn.Linear(32 * 75 * 125, num_classes)
    self.layer_fc = nn.Linear(16 * 75 * 125, num_classes)

  def forward(self, input):
    output = self.layer_1(input)
    output = self.layer_2(output)
    output = self.fl(output)
    final = self.layer_fc(output)
    return final

In [16]:
# Model
model = ConvNetwork(10).to(device)

In [17]:
# Parameters
epochs = 5
batch_size = 16
learning_rate = 0.01

In [18]:
all_data_loader = torch.utils.data.DataLoader(custom_dataset, batch_size=batch_size, shuffle=True)

In [19]:
# Split into train test validation
total_size = new_df.shape[0]
train_size = int(0.7 * total_size)
test_size = int(0.15 * total_size)
validation_size = total_size - train_size - test_size

In [20]:
train_set, test_set, validation_set = torch.utils.data.random_split(all_data_loader.dataset, [train_size, test_size, validation_size])

In [21]:
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=False)

In [22]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [23]:
steps = new_df.shape[0]

for epoch in range(epochs):
  model.train()
  train_loss = 0

  for image, label in train_loader:
    image = image.to(torch.float32)
    # print(image.size())
    output = model(image)
    loss = criterion(output, label)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # loss.item() -> avg loss per batch, so scale the loss
    train_loss += loss.item() * image.size(0)

  train_loss = train_loss / steps

  model.eval()
  val_loss = 0

  with torch.no_grad():
    for image, label in validation_loader:
      image = image.to(torch.float32)
      output = model(image)

      loss = criterion(output, label)
      val_loss += loss.item() * image.size(0)

  val_loss = val_loss / steps

  print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

Epoch [1/5], Train Loss: 67.5880, Validation Loss: 4.4442
Epoch [2/5], Train Loss: 3.0348, Validation Loss: 1.4925
Epoch [3/5], Train Loss: 1.1612, Validation Loss: 1.0080
Epoch [4/5], Train Loss: 0.6842, Validation Loss: 1.1706
Epoch [5/5], Train Loss: 0.5181, Validation Loss: 1.1498


In [35]:
len(test_loader.dataset)

268

In [25]:
# Test mode
model.eval()

with torch.no_grad():
  correct_pred = 0
  total_pred = 0

  for image, label in test_loader:
    image = image.to(torch.float32)
    output = model(image)

    values, prediction = torch.max(output.data, 1)
    total_pred += label.size(0)
    correct_pred += (prediction == label).sum().item()

  print(f'Accuracy -> {100*correct_pred / total_pred}%')


Accuracy -> 82.83582089552239%


In [36]:
# save model
torch.save(model.state_dict(), 'cnnmodel.ckpt')