In [15]:
import pandas as pd
from PIL import Image
import torch
from torch import cuda
import torchvision.transforms as transforms
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
import os

In [16]:
#verifying that cuda is available
#torch.__version__
cuda.is_available()

True

In [17]:
#creating a custom pytorch dataset
rvlcdip_path = "./../data/raw/selected"

class CustomDataset(Dataset):
    def __init__(self, img_dir, transform=None, target_transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        self.img_labels = self._get_img_labels()

    def _get_img_labels(self):
        img_labels = []
        for file in os.listdir(self.img_dir):
            if file.endswith(".txt"):
                with open(os.path.join(self.img_dir, file), 'r') as f:
                    label = f.read().strip()
                img_labels.append({"file": file.replace(".txt", ".tif"), "label": label})
        return pd.DataFrame(img_labels)

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label
    
data_transform = transforms.Compose([transforms.Resize((1000, 1000)),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5), (0.5))])

def target_transform(label, num_classes=16):
    one_hot = torch.zeros(num_classes)
    one_hot[int(label)] = 1
    return one_hot
    
dataset = CustomDataset(img_dir=rvlcdip_path, transform=data_transform, target_transform=target_transform)

In [18]:
#creating a custom pytorch dataloader
splits = random_split(dataset, [0.8, 0.2])

train_loader = DataLoader(splits[0], batch_size=4, shuffle=True)
test_loader = DataLoader(splits[1], batch_size=1, shuffle=False)

Model

In [19]:
#Defining a simple cnn model
import torch.nn as nn
import torch.nn.functional as F

#The formula for calculating the output size of a convolutional layer is:
#[(W−K+2P)/S]+1.

#    W is the input volume - 1000
#    K is the Kernel size - 5
#    P is the padding - 0
#    S is the stride - 1


class FirstCNN(nn.Module):
    def __init__(self):
        super(FirstCNN, self).__init__()
        #1,1000,1000
        self.conv1 = nn.Conv2d(1, 16, 5)
        #16,996,996
        self.pool1 = nn.MaxPool2d(2, 2)
        #16,498,498
        self.conv2 = nn.Conv2d(16, 32, 5)
        #32,494,494
        self.pool2 = nn.MaxPool2d(2, 2)
        #32,247,247
        self.fc1 = nn.Linear(32 * 247 * 247, 120)
        #120
        self.fc2 = nn.Linear(120, 84)
        #84
        self.fc3 = nn.Linear(84, 16)
        #16

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 247 * 247)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
#initializing the model
model = FirstCNN()

#visualizing the model
print(model)

FirstCNN(
  (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1952288, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=16, bias=True)
)


In [20]:
#compiling the model
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
#optimizer = optim.Adam(model.parameters(), lr=0.001)

In [21]:
#training the model
for epoch in range(2):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 5 == 4:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 5))
            running_loss = 0.0

print('Entrainement terminé')

[1,     5] loss: 2.819
[1,    10] loss: 2.776
[1,    15] loss: 2.768
[1,    20] loss: 2.851
[1,    25] loss: 2.795
[1,    30] loss: 2.776
[1,    35] loss: 2.795
[1,    40] loss: 2.719
[1,    45] loss: 2.715
[1,    50] loss: 2.756
[1,    55] loss: 2.786
[1,    60] loss: 2.964
[1,    65] loss: 2.782
[1,    70] loss: 2.752
[1,    75] loss: 2.778
[1,    80] loss: 2.701
[1,    85] loss: 2.797
[1,    90] loss: 2.750
[1,    95] loss: 2.702
[1,   100] loss: 2.774
[1,   105] loss: 2.795
[1,   110] loss: 2.757
[1,   115] loss: 2.740
[1,   120] loss: 2.669
[1,   125] loss: 2.698
[1,   130] loss: 2.766
[1,   135] loss: 2.762
[1,   140] loss: 2.690
[1,   145] loss: 2.689
[1,   150] loss: 2.957
[1,   155] loss: 2.698
[1,   160] loss: 2.739
[1,   165] loss: 2.728
[1,   170] loss: 2.714
[1,   175] loss: 2.640
[1,   180] loss: 2.661
[1,   185] loss: 2.674
[1,   190] loss: 2.526
[1,   195] loss: 2.769
[2,     5] loss: 2.542
[2,    10] loss: 2.676
[2,    15] loss: 2.525
[2,    20] loss: 2.615
[2,    25] 

In [22]:
#predicting with the model
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Precision sur le jeu de test: %d %%' % (
    100 * correct / total))

Accuracy of the network on the test images: 60 %
