In [6]:
import pandas as pd
import os
from tqdm import tqdm

root_path = r'C:\GraduateDesign\GD_20+_test\Datasets'

# Get all the image folder paths
all_paths = os.listdir(root_path)
all_paths.sort()

# Create a new DataFrame
data = pd.DataFrame(columns=['image_path', 'label'])
counter = 0

# Store all images in the DataFrame
for i, path in tqdm(enumerate(all_paths), total=len(all_paths)):
    folder_path = os.path.join(root_path, path)
    if os.path.isdir(folder_path):  # Ensure it's a directory
        all_images = os.listdir(folder_path)
        for image in all_images:
            if image.lower().endswith('.png'):  # Check if the file is a PNG image
                image_name = os.path.splitext(image)[0]
                data.loc[counter] = [os.path.join(folder_path, image), i]
                counter += 1

# Shuffle the dataset
data = data.sample(frac=1).reset_index(drop=True)

print(data.head(5))


100%|██████████| 146/146 [02:13<00:00,  1.09it/s]

                                          image_path  label
0  C:\GraduateDesign\GD_20+_test\Datasets\ps\1346...    108
1  C:\GraduateDesign\GD_20+_test\Datasets\il100\0...     23
2  C:\GraduateDesign\GD_20+_test\Datasets\pn\1410...     99
3  C:\GraduateDesign\GD_20+_test\Datasets\w31\130...    120
4  C:\GraduateDesign\GD_20+_test\Datasets\p23\100...     43





In [7]:
import torch
import pandas as pd
import cv2
import albumentations
import numpy as np
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

class GTSRDataset(Dataset):
    def __init__(self, images, labels, tfms=None):
        self.images = images
        self.labels = labels

        # apply augmentations
        if tfms == 0: # if validating
            self.aug = albumentations.Compose([
                # 48x48 resizing is required
                albumentations.Resize(48, 48, always_apply=True),
            ])
        else: # if training
            self.aug = albumentations.Compose([
                # 48x48 resizing is required
                albumentations.Resize(48, 48, always_apply=True),
            ])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image_path = f"{self.images[index]}"
        image = plt.imread(image_path)
        image = image / 255.
        image = self.aug(image=np.array(image))['image']
        image = np.transpose(image, (2, 0, 1))
        label = self.labels[index]

        return {
            'image': torch.tensor(image, dtype=torch.float),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Update the path to your dataset CSV file
df = data

X = df.image_path.values
y = df.label.values

(xtrain, xtest, ytrain, ytest) = train_test_split(X, y, test_size=0.10, random_state=42)
print(f"Training instances: {len(xtrain)}")
print(f"Validation instances: {len(xtest)}")

train_data = GTSRDataset(xtrain, ytrain, tfms=1)
val_data = GTSRDataset(xtest, ytest, tfms=0)

batch_size = 8
train_data_loader = DataLoader(
    train_data,
    batch_size=batch_size,
    shuffle=True,
    # num_workers=1,
)
val_data_loader = DataLoader(
    val_data,
    batch_size=batch_size,
    shuffle=False,
    # num_workers=1,
)

# visualization
visualize = False
if visualize:
    for i in range(1):
        sign_df = data
        sample = train_data[i]
        image = sample['image']
        label = sample['label']
        image = np.array(np.transpose(image, (1, 2, 0)))
        plt.imshow(image)
        plt.title(str(sign_df.loc[int(label), 'SignName']))
        plt.show()

Training instances: 74516
Validation instances: 8280


In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim

from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.autograd import Variable

nclasses = 145
DEBUG = False


def gaussian_filter(kernel_shape):
    x = np.zeros(kernel_shape, dtype='float32')

    def gauss(x, y, sigma=2.0):
        Z = 2 * np.pi * sigma ** 2
        return 1. / Z * np.exp(-(x ** 2 + y ** 2) / (2. * sigma ** 2))

    mid = np.floor(kernel_shape[-1] / 2.)
    for kernel_idx in range(0, kernel_shape[1]):
        for i in range(0, kernel_shape[2]):
            for j in range(0, kernel_shape[3]):
                x[0, kernel_idx, i, j] = gauss(i - mid, j - mid)
    return x / np.sum(x)


def LCN(image_tensor, gaussian, mid):
    filtered = gaussian(image_tensor)
    centered_image = image_tensor - filtered[:, :, mid:-mid, mid:-mid]
    sum_sqr_XX = gaussian(centered_image.pow(2))
    denom = sum_sqr_XX[:, :, mid:-mid, mid:-mid].sqrt()
    per_img_mean = denom.mean()
    divisor = torch.max(per_img_mean, denom)
    divisor = np.maximum(divisor.detach().cpu().numpy(), 1e-4)
    new_image = centered_image.detach().cpu() / divisor
    if DEBUG:  # visualize what the network sees
        plt.imshow(np.transpose(filtered[0].detach().cpu().numpy(),
                                (1, 2, 0)).reshape(filtered.shape[2], filtered.shape[3]))
        plt.title('Gaussian')
        plt.show()
        print('GAUSSIAN', filtered)
        print('LCN', new_image)
        plt.imshow(np.transpose(new_image[0, :3].detach().cpu().numpy(),
                                (1, 2, 0)))
        plt.title('LCN')
        plt.show()
    return new_image.cuda()


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 200, kernel_size=7, stride=1, padding=2)
        self.maxpool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
        self.gfilter1 = torch.Tensor(gaussian_filter((1, 200, 9, 9)))
        self.gaussian1 = nn.Conv2d(in_channels=200, out_channels=200,
                                   kernel_size=9, padding=8, bias=False)
        self.gaussian1.weight.data = self.gfilter1
        self.gaussian1.weight.requires_grad = False
        self.conv2 = nn.Conv2d(200, 250, kernel_size=4, stride=1, padding=2)
        self.maxpool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
        self.gfilter2 = torch.Tensor(gaussian_filter((1, 250, 9, 9)))
        self.gaussian2 = nn.Conv2d(in_channels=250, out_channels=250,
                                   kernel_size=9, padding=8, bias=False)
        self.gaussian2.weight.data = self.gfilter2
        self.gaussian2.weight.requires_grad = False
        self.conv3 = nn.Conv2d(250, 350, kernel_size=4, stride=1, padding=2)
        self.maxpool3 = nn.MaxPool2d(2, stride=2)
        self.gfilter3 = torch.Tensor(gaussian_filter((1, 350, 9, 9)))
        self.gaussian3 = nn.Conv2d(in_channels=350, out_channels=350,
                                   kernel_size=9, padding=8, bias=False)
        self.gaussian3.weight.data = self.gfilter3
        self.gaussian3.weight.requires_grad = False
        self.FC1 = nn.Linear(12600, 400)
        self.FC2 = nn.Linear(400, nclasses)

        # spatial attention model, spatial transformers layers
        self.st1 = nn.Sequential(
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
            nn.Conv2d(3, 250, kernel_size=5, stride=1, padding=2),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True),
            nn.Conv2d(250, 250, kernel_size=5, stride=1, padding=2),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2, ceil_mode=True)
        )
        self.FC1_ = nn.Sequential(
            nn.Linear(9000, 250),
            nn.ReLU(True),
            nn.Linear(250, 6)
        )
        self.st2 = nn.Sequential(
            nn.MaxPool2d(2, stride=2, ceil_mode=False),
            nn.Conv2d(200, 150, kernel_size=5, stride=1, padding=2),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2, ceil_mode=False),
            nn.Conv2d(150, 200, kernel_size=5, stride=1, padding=2),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2, ceil_mode=False)
        )
        self.FC2_ = nn.Sequential(
            nn.Linear(800, 300),
            nn.ReLU(True),
            nn.Linear(300, 6)
        )
        self.st3 = nn.Sequential(
            nn.MaxPool2d(2, stride=2, ceil_mode=False),
            nn.Conv2d(250, 150, kernel_size=5, stride=1, padding=2),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2, ceil_mode=False),
            nn.Conv2d(150, 200, kernel_size=5, stride=1, padding=2),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2, ceil_mode=False)
        )
        self.FC3_ = nn.Sequential(
            nn.Linear(200, 300),
            nn.ReLU(True),
            nn.Linear(300, 6)
        )
        self.FC1_[2].weight.data.zero_()
        self.FC1_[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
        self.FC2_[2].weight.data.zero_()
        self.FC2_[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
        self.FC3_[2].weight.data.zero_()
        self.FC3_[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

    def forward(self, x):
        # first layer is the Spatial Transformer Layer
        # ST-1
        h1 = self.st1(x)
        h1 = h1.view(-1, 9000)
        h1 = self.FC1_(h1)
        theta1 = h1.view(-1, 2, 3)
        grid1 = F.affine_grid(theta1, x.size())
        x = F.grid_sample(x, grid1)

        # convolution, Relu and Maxpool , SET #1
        x = F.relu(self.conv1(x))
        x = self.maxpool1(x)

        # paper Says to apply LCN here, but LCN Layer Before Convolution Worked for me better
        # ST-2
        h2 = self.st2(x)
        h2 = h2.view(-1, 800)
        h2 = self.FC2_(h2)
        theta2 = h2.view(-1, 2, 3)
        grid2 = F.affine_grid(theta2, x.size())
        x = F.grid_sample(x, grid2)

        # LCN Layer : Based on paper implemntation from the github and Yann Lecun Paper 2009
        mid1 = int(np.floor(self.gfilter1.shape[2] / 2.))
        x = LCN(x, self.gaussian1, mid1)

        # convolution, Relu and Maxpool , SET #2
        x = F.relu(self.conv2(x))
        x = self.maxpool2(x)

        # ST-2
        h3 = self.st3(x)
        h3 = h3.view(-1, 200)
        h3 = self.FC3_(h3)
        theta3 = h3.view(-1, 2, 3)
        grid3 = F.affine_grid(theta3, x.size())
        x = F.grid_sample(x, grid3)

        # LCN Layer : 2
        mid2 = int(np.floor(self.gfilter2.shape[2] / 2.))
        x = LCN(x, self.gaussian2, mid2)

        # convolution, Relu and Maxpool , SET #3
        x = F.relu(self.conv3(x))
        x = self.maxpool3(x)

        # LCN Layer : 3
        mid3 = int(np.floor(self.gfilter3.shape[2] / 2.))
        x = LCN(x, self.gaussian3, mid3)

        # dimensions in accordance to paper
        y = x.view(-1, 12600)
        y = F.relu(self.FC1(y))
        y = self.FC2(y)
        return F.log_softmax(y, dim=-1)

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import matplotlib
import numpy as np

matplotlib.style.use('ggplot')

# learning parameters
epochs = 20
lr = 0.001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# training function
def fit(model, dataloader, optimizer, criterion, train_data):
    print('Training')
    model.train()
    train_running_loss = 0.0
    train_running_correct = 0
    for i, data in tqdm(enumerate(dataloader), total=int(len(train_data) / dataloader.batch_size)):
        image, target = data['image'].to(device), data['label'].to(device)
        plt.show()
        optimizer.zero_grad()
        outputs = model(image)
        loss = criterion(outputs, target)
        train_running_loss += loss.item()
        _, preds = torch.max(outputs.data, 1)
        train_running_correct += (preds == target).sum().item()
        loss.backward()
        optimizer.step()

    train_loss = train_running_loss / len(dataloader.dataset)
    train_accuracy = 100. * train_running_correct / len(dataloader.dataset)
    return train_loss, train_accuracy


# validation function
def validate(model, dataloader, optimizer, criterion, val_data):
    print('Validating')
    model.eval()
    val_running_loss = 0.0
    val_running_correct = 0
    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total=int(len(val_data) / dataloader.batch_size)):
            image, target = data['image'].to(device), data['label'].to(device)
            outputs = model(image)
            loss = criterion(outputs, target)
            val_running_loss += loss.item()
            _, preds = torch.max(outputs.data, 1)
            val_running_correct += (preds == target).sum().item()

        val_loss = val_running_loss / len(dataloader.dataset)
        val_accuracy = 100. * (val_running_correct / len(dataloader.dataset))
        return val_loss, val_accuracy


# initialize the model
model = Net()
model = model.to(device)
# optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999),
                       eps=1e-8, weight_decay=0.0005)
criterion = nn.CrossEntropyLoss()

train_loss, train_accuracy = [], []
val_loss, val_accuracy = [], []
for epoch in range(epochs):
    print(f"Epoch {epoch + 1} of {epochs}")
    train_epoch_loss, train_epoch_accuracy = fit(model, train_data_loader,
                                                 optimizer, criterion,
                                                 train_data)
    val_epoch_loss, val_epoch_accuracy = validate(model, val_data_loader,
                                                  optimizer, criterion,
                                                  val_data)
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_loss.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
    print(f"Train Loss: {train_epoch_loss:.4f}, Train Acc: {train_epoch_accuracy:.2f}")
    print(f'Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_accuracy:.2f}')

# accuracy plots
plt.figure(figsize=(10, 7))
plt.plot(train_accuracy, color='green', label='train accuracy')
plt.plot(val_accuracy, color='blue', label='validataion accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('../outputs/accuracy.png')
plt.show()

# loss plots
plt.figure(figsize=(10, 7))
plt.plot(train_loss, color='orange', label='train loss')
plt.plot(val_loss, color='red', label='validataion loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('../outputs/loss.png')
plt.show()

# save model checkpoint
torch.save({
    'epoch': epochs,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': criterion,
}, '../outputs/model.pth')

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
