In [34]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18, ResNet18_Weights

# Option 1: Simple CNN from scratch
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

# Option 2: Transfer Learning with ResNet
class ResNetTransfer(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNetTransfer, self).__init__()
        # Load pretrained ResNet18
        self.resnet = resnet18(weights=ResNet18_Weights.DEFAULT)
        
        # Freeze all layers
        for param in self.resnet.parameters():
            param.requires_grad = False
            
        # Replace the final fully connected layer
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_features, num_classes)

    def forward(self, x):
        return self.resnet(x)

# Example usage:
def main():
    # Create instances of both models
    simple_cnn = SimpleCNN(num_classes=3)
    resnet_transfer = ResNetTransfer(num_classes=3)
    
    # Print model summaries
    print("Simple CNN Architecture:")
    print(simple_cnn)
    print("\nResNet Transfer Learning Architecture:")
    print(resnet_transfer)

if __name__ == "__main__":
    main()

Simple CNN Architecture:
SimpleCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1024, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=3, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
)

ResNet Transfer Learning Architecture:
ResNetTransfer(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kerne

In [35]:
import pandas as pd
import os

In [36]:
# get images from document_classification/
# set labels as the folder name
# create a dataframe
df = pd.DataFrame(columns=['image', 'label'])
data = []
# for each folder in the document_classification folder
for folder in os.listdir('document_classification'):
    # for each image in the folder
    for image in os.listdir(f'document_classification/{folder}'):
        # append the image and label to the list
        data.append({'image': f'document_classification/{folder}/{image}', 'label': folder})
# concatenate the list to the dataframe
df = pd.concat([df, pd.DataFrame(data)], ignore_index=True)
        

In [37]:
df.head()
import matplotlib.pyplot as plt

In [38]:
# in the image column, replace the path with the actual image
df['image'] = df['image'].apply(lambda x: plt.imread(x))


In [39]:
# suffle the dataframe
df = df.sample(frac=1).reset_index(drop=True)


In [40]:
df.head()

Unnamed: 0,image,label
0,"[[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,...",resume
1,"[[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,...",scientific_publication
2,"[[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,...",resume
3,"[[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,...",email
4,"[[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,...",email


In [41]:
# transform the images to tensors
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((32, 32)),  # Resize to match the input size expected by SimpleCNN
    transforms.Lambda(lambda x: x.convert('RGB')),
    transforms.ToTensor()
])


In [42]:
# create a dataset
class DocumentDataset(torch.utils.data.Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        self.label_to_idx = {label: idx for idx, label in enumerate(df['label'].unique())}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image = self.df['image'][idx]
        label = self.df['label'][idx]
        if self.transform:
            image = self.transform(image)
        label = self.label_to_idx[label]
        return image, label

In [43]:
# create a dataloader
dataset = DocumentDataset(df, transform=transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)

# get a batch of data
images, labels = next(iter(dataloader))

# print the shape of the images and labels
print(images.shape)
print(labels)

torch.Size([32, 3, 32, 32])
tensor([1, 1, 0, 1, 1, 2, 1, 2, 1, 0, 2, 2, 1, 1, 0, 0, 1, 1, 1, 2, 1, 0, 2, 1,
        2, 0, 0, 1, 0, 0, 0, 1])


In [None]:
# create a model
model = SimpleCNN(num_classes=3)
epochs = 30
# define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train the model
for epoch in range(epochs):
    for i, (images, labels) in enumerate(dataloader):
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [1/30], Loss: 1.1144
Epoch [2/30], Loss: 1.1031
Epoch [3/30], Loss: 1.0969
Epoch [4/30], Loss: 1.1000
Epoch [5/30], Loss: 1.0852
Epoch [6/30], Loss: 1.0978
Epoch [7/30], Loss: 1.0957
Epoch [8/30], Loss: 1.1208
Epoch [9/30], Loss: 1.0918
Epoch [10/30], Loss: 1.1046
Epoch [11/30], Loss: 1.0294
Epoch [12/30], Loss: 1.0230
Epoch [13/30], Loss: 1.0750
Epoch [14/30], Loss: 1.0731
Epoch [15/30], Loss: 1.0190
Epoch [16/30], Loss: 1.0192
Epoch [17/30], Loss: 1.0184
Epoch [18/30], Loss: 0.9697
Epoch [19/30], Loss: 0.9320
Epoch [20/30], Loss: 1.1083
Epoch [21/30], Loss: 0.8230
Epoch [22/30], Loss: 0.5996
Epoch [23/30], Loss: 1.0095
Epoch [24/30], Loss: 0.7295
Epoch [25/30], Loss: 0.8551
Epoch [26/30], Loss: 1.0753
Epoch [27/30], Loss: 0.8491
Epoch [28/30], Loss: 0.9050


In [None]:
# create a model
model = ResNetTransfer(num_classes=3)
epochs = 30
# define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train the model
for epoch in range(epochs):
    for i, (images, labels) in enumerate(dataloader):
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')


Epoch [1/30], Loss: 0.9255
Epoch [2/30], Loss: 1.1241
Epoch [3/30], Loss: 0.6670
Epoch [4/30], Loss: 0.9026
Epoch [5/30], Loss: 1.3434
Epoch [6/30], Loss: 0.3486
Epoch [7/30], Loss: 0.6882
Epoch [8/30], Loss: 1.0891
Epoch [9/30], Loss: 0.4234
Epoch [10/30], Loss: 1.0342
Epoch [11/30], Loss: 1.7357
Epoch [12/30], Loss: 1.0331
Epoch [13/30], Loss: 0.3549
Epoch [14/30], Loss: 0.7863
Epoch [15/30], Loss: 0.5722
Epoch [16/30], Loss: 1.4541
Epoch [17/30], Loss: 0.4736
Epoch [18/30], Loss: 1.1087
Epoch [19/30], Loss: 0.3596
Epoch [20/30], Loss: 0.2256
Epoch [21/30], Loss: 1.1065
Epoch [22/30], Loss: 0.2877
Epoch [23/30], Loss: 0.3807
Epoch [24/30], Loss: 0.6168
Epoch [25/30], Loss: 0.1290
Epoch [26/30], Loss: 0.6934
Epoch [27/30], Loss: 1.0047
Epoch [28/30], Loss: 0.3987
Epoch [29/30], Loss: 0.3024
Epoch [30/30], Loss: 0.1860
