In [1]:
! pip install datasets



In [2]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader, Subset, random_split
import torchvision.transforms as transforms
import os
import numpy as np
import cv2
import datasets
import pandas as pd
from datasets import load_dataset, DatasetDict
import sklearn
from sklearn.preprocessing import LabelEncoder

In [3]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [4]:
# from google.colab import drive
# drive.mount('/content/drive')

In [5]:
img_path = "/Users/satya/Desktop/take-home/FontClassifier/data"
font_classes = os.listdir(img_path)
# Ignore .DS_store dir
font_classes = font_classes[1:]
font_classes

['I Love Glitter',
 '.DS_Store',
 'Holligate Signature',
 'Canterbury',
 'AguafinaScript',
 'James Fajardo',
 'GreatVibes',
 'Allura',
 'alsscrp',
 'OpenSans']

In [6]:
width, height = 500, 300

# Resize image
def processImage(image_path):
  orginal_img = cv2.imread(image_path)
  # img_data = np.array(orginal_img)

  # Upscale or downscale based on dimension
  if orginal_img.shape[1] < width or orginal_img.shape[0] < height:
    resize_img = cv2.resize(orginal_img, (width, height), interpolation= cv2.INTER_CUBIC)
  else:
    resize_img = cv2.resize(orginal_img, (width, height), interpolation= cv2.INTER_AREA)

  return resize_img


In [7]:
images = []
labels = []

In [8]:
for font_class in font_classes:
  # Get all the font images for specific font
  font_dir = os.path.join(img_path, font_class)
  
  if os.path.isdir(font_dir):  
    for image_file in os.listdir(font_dir):
        if not image_file.startswith('.'):  
            image_path = os.path.join(font_dir, image_file)
            if os.path.isfile(image_path):
                processed_image = processImage(image_path)
                images.append(processed_image)
                labels.append(font_class)
      



encoder = LabelEncoder()
labels = encoder.fit_transform(labels)

data = {'image': images, 'labels': labels}
df = pd.DataFrame(data)

In [9]:
df

Unnamed: 0,image,labels
0,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",5
1,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",5
2,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",5
3,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",5
4,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",5
...,...,...
1609,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",7
1610,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",7
1611,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",7
1612,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",7


In [10]:
df.image[0].shape

(300, 500, 3)

In [11]:
new_df = df.copy()

In [12]:
# Convert into CxHxW format from HxWxC
for index, row in new_df.iterrows():

    image_tensor = torch.tensor(row['image'])
    image_tensor_permuted = image_tensor.permute(2, 0, 1)

    new_df.at[index, 'image'] = image_tensor_permuted.numpy()

In [13]:
new_df.image[0].shape

(3, 300, 500)

In [14]:
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx]['image']
        label = self.data.iloc[idx]['labels']
        return image, label

In [15]:
custom_dataset = CustomDataset(new_df)

In [16]:
class ConvNetwork(nn.Module):
  def __init__(self, num_classes = 10):
    # Random seed for PyTorch
    torch.manual_seed(42)
    
    # Inheritance
    super(ConvNetwork, self).__init__()
    self.layer_1 = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=2),
        nn.BatchNorm2d(16),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )
    self.layer_2 = nn.Sequential(
        nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )
    self.fl = nn.Flatten()
    self.layer_fc = nn.Linear(32 * 75 * 125, num_classes)
    # self.layer_fc = nn.Linear(16 * 75 * 125, num_classes)
    self.dropout = nn.Dropout(0.5)

  def forward(self, input):
    output = self.layer_1(input)
    output = self.layer_2(output)
    output = self.fl(output)
    final = self.layer_fc(output)
    return final

In [17]:
# Model
model = ConvNetwork(10).to(device)

In [18]:
# Parameters
epochs = 5
batch_size = 16
learning_rate = 0.01

In [19]:
all_data_loader = torch.utils.data.DataLoader(custom_dataset, batch_size=batch_size, shuffle=True)

In [20]:
# Split into train test validation
total_size = new_df.shape[0]
train_size = int(0.7 * total_size)
test_size = int(0.15 * total_size)
validation_size = total_size - train_size - test_size

In [21]:
train_set, test_set, validation_set = torch.utils.data.random_split(all_data_loader.dataset, [train_size, test_size, validation_size])

In [22]:
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=False)

In [23]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [24]:
steps = new_df.shape[0]

for epoch in range(epochs):
  model.train()
  train_loss = 0

  for image, label in train_loader:
    image = image.to(torch.float32)
    # print(image.size())
    output = model(image)
    loss = criterion(output, label)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # loss.item() -> avg loss per batch, so scale the loss
    train_loss += loss.item() * image.size(0)

  train_loss = train_loss / steps

  model.eval()
  val_loss = 0

  with torch.no_grad():
    for image, label in validation_loader:
      image = image.to(torch.float32)
      output = model(image)

      loss = criterion(output, label)
      val_loss += loss.item() * image.size(0)

  val_loss = val_loss / steps

  print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

Epoch [1/5], Train Loss: 197.4840, Validation Loss: 8.5515
Epoch [2/5], Train Loss: 9.8891, Validation Loss: 3.3822
Epoch [3/5], Train Loss: 3.7087, Validation Loss: 1.1341
Epoch [4/5], Train Loss: 1.4260, Validation Loss: 1.2852
Epoch [5/5], Train Loss: 0.4785, Validation Loss: 1.2080


In [25]:
len(test_loader.dataset)

242

In [26]:
# Test mode
model.eval()
preds = []

with torch.no_grad():
  correct_pred = 0
  total_pred = 0

  for image, label in test_loader:
    image = image.to(torch.float32)
    output = model(image)

    values, prediction = torch.max(output.data, 1)
    preds.append(prediction)
    # Add the batch size to total_pred
    total_pred += label.size(0)
    correct_pred += (prediction == label).sum().item()

  print(f'Accuracy -> {100*correct_pred / total_pred}%')


Accuracy -> 85.53719008264463%


In [27]:
preds

[tensor([8, 4, 7, 5, 6, 1, 5, 0, 6, 7, 7, 0, 6, 8, 4, 1]),
 tensor([5, 6, 7, 1, 5, 1, 4, 3, 0, 2, 3, 3, 2, 2, 2, 7]),
 tensor([5, 7, 0, 2, 4, 5, 5, 1, 7, 6, 1, 3, 4, 2, 0, 4]),
 tensor([5, 6, 5, 6, 6, 3, 2, 4, 0, 3, 1, 2, 4, 2, 6, 5]),
 tensor([2, 0, 6, 2, 3, 0, 4, 7, 2, 0, 1, 3, 0, 8, 6, 8]),
 tensor([7, 2, 8, 7, 2, 4, 0, 2, 5, 7, 6, 8, 1, 5, 2, 5]),
 tensor([7, 4, 5, 3, 7, 6, 6, 6, 7, 4, 6, 1, 1, 0, 5, 0]),
 tensor([3, 0, 2, 4, 5, 2, 1, 7, 4, 8, 7, 8, 4, 2, 3, 1]),
 tensor([7, 0, 5, 7, 8, 6, 3, 1, 6, 3, 8, 0, 7, 4, 3, 4]),
 tensor([2, 7, 1, 2, 3, 4, 5, 2, 2, 0, 6, 5, 0, 5, 2, 4]),
 tensor([5, 4, 4, 4, 8, 7, 2, 7, 4, 1, 2, 8, 4, 0, 0, 7]),
 tensor([8, 0, 6, 4, 3, 1, 8, 5, 0, 4, 2, 0, 7, 2, 8, 6]),
 tensor([5, 0, 4, 0, 5, 0, 4, 5, 8, 5, 4, 0, 2, 0, 4, 2]),
 tensor([4, 0, 2, 1, 2, 1, 1, 8, 3, 3, 1, 2, 2, 3, 6, 4]),
 tensor([1, 8, 7, 3, 8, 1, 1, 0, 3, 7, 2, 8, 2, 6, 7, 8]),
 tensor([4, 4])]

In [29]:
# save model
torch.save(model.state_dict(), 'model checkpoint/cnnmodelbetter.ckpt')