In [13]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

dataset = datasets.ImageFolder("tiles_dataset", transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

print("Classes found:", dataset.classes)


Classes found: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y']


In [30]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import torch.nn.functional as F


In [None]:
#LAYER 1 CONV2D

In [20]:
img = Image.open("tiles_dataset/A/Atile.png").convert("L")

transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),  
])
x = transform(img).unsqueeze(0)  

print("Input shape:", x.shape)


Input shape: torch.Size([1, 1, 64, 64])


In [21]:
conv = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1)

output = conv(x)

print("Output shape:", output.shape)


Output shape: torch.Size([1, 4, 64, 64])


In [25]:
#LAYER 2 MaxPool2d

In [27]:
pool = nn.MaxPool2d(kernel_size=2, stride=2)

pooled = pool(output)
print("Before pooling:", output.shape)
print("After pooling: ", pooled.shape)


Before pooling: torch.Size([1, 4, 64, 64])
After pooling:  torch.Size([1, 4, 32, 32])


In [None]:
#Layer 3 Classifer Header

In [29]:
print(x.shape)

x = torch.flatten(x, 1)

print(x.shape)

torch.Size([1, 1, 64, 64])
torch.Size([1, 4096])


In [33]:
class TileCNN(nn.Module):
    def __init__(self):
        super(TileCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(8192, 128)
        self.fc2 = nn.Linear(128, 24)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [34]:
model = TileCNN()
sample = torch.randn(1, 1, 64, 64)  # 1 grayscale tile
output = model(sample)
print(output.shape)


torch.Size([1, 24])


In [39]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 50

In [44]:
for epoch in range(epochs):
    running_loss = 0.0

    for images, labels in dataloader:
        outputs = model(images)
        loss = loss_fn(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss:.4f}")


Epoch 1/50, Loss: 0.0008
Epoch 2/50, Loss: 0.0008
Epoch 3/50, Loss: 0.0009
Epoch 4/50, Loss: 0.0009
Epoch 5/50, Loss: 0.0008
Epoch 6/50, Loss: 0.0008
Epoch 7/50, Loss: 0.0008
Epoch 8/50, Loss: 0.0007
Epoch 9/50, Loss: 0.0008
Epoch 10/50, Loss: 0.0007
Epoch 11/50, Loss: 0.0007
Epoch 12/50, Loss: 0.0008
Epoch 13/50, Loss: 0.0007
Epoch 14/50, Loss: 0.0008
Epoch 15/50, Loss: 0.0008
Epoch 16/50, Loss: 0.0007
Epoch 17/50, Loss: 0.0007
Epoch 18/50, Loss: 0.0007
Epoch 19/50, Loss: 0.0007
Epoch 20/50, Loss: 0.0007
Epoch 21/50, Loss: 0.0007
Epoch 22/50, Loss: 0.0007
Epoch 23/50, Loss: 0.0007
Epoch 24/50, Loss: 0.0007
Epoch 25/50, Loss: 0.0007
Epoch 26/50, Loss: 0.0007
Epoch 27/50, Loss: 0.0007
Epoch 28/50, Loss: 0.0006
Epoch 29/50, Loss: 0.0007
Epoch 30/50, Loss: 0.0007
Epoch 31/50, Loss: 0.0006
Epoch 32/50, Loss: 0.0006
Epoch 33/50, Loss: 0.0006
Epoch 34/50, Loss: 0.0006
Epoch 35/50, Loss: 0.0006
Epoch 36/50, Loss: 0.0006
Epoch 37/50, Loss: 0.0006
Epoch 38/50, Loss: 0.0006
Epoch 39/50, Loss: 0.

In [46]:
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

image = Image.open("tiles_dataset/C/Ctile.png")
image_tensor = transform(image).unsqueeze(0)

model.eval()
with torch.no_grad():
    output = model(image_tensor)
    predicted_class = torch.argmax(output, dim=1).item()
idx_to_class = dataset.classes  
predicted_letter = idx_to_class[predicted_class]
print("Predicted:", predicted_letter)



Predicted: C


In [49]:
#if reloaded 

model = TileCNN()  
model.load_state_dict(torch.load("tile_cnn.pth"))
model.eval()


TileCNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=8192, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=24, bias=True)
)