In [270]:

import torch.nn as nn
import torch.optim as optim
import torch
import numpy as np
%matplotlib inline
from os.path import join
import matplotlib.pyplot as plt
from billedeloader import MnistDataloader
import math
import random
from graphviz import Digraph
import pathlib
from PIL import Image
import torchvision
import time
from torch.utils.data import TensorDataset, DataLoader

In [271]:
import torchvision.transforms as transforms, torchvision, matplotlib.pyplot as plt
trainset = torchvision.datasets.CIFAR10(root='./data', 
                                        train=True, 
                                        download=True,
                                        transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=4, 
                                          shuffle=True)
images, labels = next(iter(trainloader))
#plt.imshow(torchvision.utils.make_grid(images).permute(1, 2, 0) / 2 + 0.5); 
#plt.title(' '.join(trainset.classes[label] for label in labels)); plt.show()
#/Users/frederik/miniforge3/envs/IntelligentSystems/lib/python3.12/site-packages/torchvision/datasets/cifar.py:83
#plt.imshow(trainset.data[2])

In [272]:
training_data = torch.tensor(trainset.data * (1/255)).float()
ygt = torch.tensor(trainset.targets).long()

In [273]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = nn.Sequential(
    nn.Conv3d(1,32, kernel_size=3, padding=1),
    nn.BatchNorm3d(32),
    nn.ReLU(),
    nn.MaxPool3d(kernel_size=(1,2,2)),
    
    nn.Conv3d(32, 64, kernel_size=3, padding=1),
    nn.BatchNorm3d(64),
    nn.ReLU(),
    
    nn.Flatten(),
    nn.Linear(64 * 3 * 16 * 16, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.5),  
    nn.Linear(128, 10)
)
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
loss_function = nn.CrossEntropyLoss()                  # nn.CrossEntropyLoss() nn.BCELoss() 

# prepare data
#ygt = torch.tensor(trainset.targets).double()
#ygt = torch.nn.functional.one_hot(ygt.long(), num_classes=10)

# --- THE CHANGE: Use .view(-1, 1, 28, 28) instead of .view(-1, 784) ---

# normal data
#training_data = x_train
# Shape becomes: (60000, 1, 28, 28)
#out = training_data.view(-1, 1, 28, 28).float()

# shifted data up
#training_data = x_train_up * (1/255)
#out_up = training_data.view(-1, 1, 28, 28).float()

# shifted data down
#training_data = x_train_down * (1/255)
#out_down = training_data.view(-1, 1, 28, 28).float()

# shifted data left
#training_data = x_train_left * (1/255)
#out_left = training_data.view(-1, 1, 28, 28).float()

# shifted data right
#training_data = x_train_right * (1/255)
#out_right = training_data.view(-1, 1, 28, 28).float()

# labels (No change needed here)
#labels = torch.tensor(y_train)

# all data together (No change needed here, torch.cat works on 4D tensors too)
#all_data = torch.cat((out, out_up, out_down, out_left, out_right), dim=0)
#all_labels = torch.cat((labels, labels, labels, labels, labels), dim=0)

# 1. Hent data som før
training_data = torch.tensor(trainset.data).float()
training_data = training_data / 255.0

# 2. VIGTIGT: Byt om på dimensionerne (Permute)
# Fra (N, 32, 32, 3) -> til (N, 3, 32, 32)
# 0: Batch, 1: Højde, 2: Bredde, 3: Farve -> Vi vil have (0, 3, 1, 2)
training_data = training_data.permute(0, 3, 1, 2)

# 3. Nu kan du reshape til 3D Conv formatet (N, 1, 3, 32, 32)
# Dette er sikkert nu, fordi farverne (3) ligger rigtigt i hukommelsen
training_data = training_data.unsqueeze(1) # Tilføjer 'Channel' dimensionen

# 4. Lav dit dataset
train_all = DataLoader(dataset=TensorDataset(training_data, ygt), 
                       batch_size=100, 
                       shuffle=True)


In [274]:
def training_loop():
    model.train()
    for (data, target) in train_all:
        data = data.to(device)
        target = target.to(device)
        output = model(data)
        loss = loss_function(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #print("loss:",loss, "output" ,output.shape, "target",target.shape)

In [275]:
def results():
    testtotal = 0
    fails = []
    model.eval()
    
    # 1. Turn off gradients to speed up testing
    with torch.no_grad():
        for i in range(len(test_data)):
            
            # 2. Get the image and normalize (Don't flatten with concatenate!)
            # Assuming x_test[i] gives you a 28x28 matrix
            image = np.array(test_data[i])
            image_normalized = image * (1/255)
            
            # 3. Reshape for CNN: (1 batch, 1 channel, 28 height, 28 width)
            input_tensor = torch.tensor(image_normalized).float().view(1, 1, 28, 28)
            
            # Optional: If you are using GPU, add .to(device)
            # input_tensor = input_tensor.to(device) 

            # 4. Get Prediction
            output = model(input_tensor)
            prediction = torch.argmax(output).item()
            
            # 5. Check correctness
            if prediction == test_data[i]:
                testtotal += 1
            else:
                fails.append((i, prediction, test_data[i]))

    print(f"{testtotal}/{len(test_data)}. Percentage: {testtotal/len(test_data)*100:.02f}%")
    return testtotal, fails

In [276]:
# First, ensure you have the testset downloaded
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True)

def results():
    testtotal = 0
    fails = []
    model.eval()
    
    # Use the testset data directly
    # testset.data shape is (10000, 32, 32, 3)
    # testset.targets is a list of 10000 labels
    
    with torch.no_grad():
        for i in range(len(testset.data)):
            
            # 1. Get image and normalize
            image = testset.data[i] # Shape: (32, 32, 3)
            label = testset.targets[i]
            
            # Convert to tensor and normalize
            # input_tensor starts as (32, 32, 3)
            input_tensor = torch.tensor(image).float() / 255.0
            
            # 2. Reshape for Conv3d
            # Step A: Permute to move Color to the front: (3, 32, 32)
            input_tensor = input_tensor.permute(2, 0, 1)
            
            # Step B: Add the Batch dimension and the Channel dimension
            # We need: (1 Batch, 1 Channel, 3 Depth, 32 Height, 32 Width)
            input_tensor = input_tensor.view(1, 1, 3, 32, 32)
            
            # 3. Move to GPU (Crucial!)
            input_tensor = input_tensor.to(device)

            # 4. Get Prediction
            output = model(input_tensor)
            prediction = torch.argmax(output).item()
            
            # 5. Check correctness
            if prediction == label:
                testtotal += 1
            else:
                fails.append((i, prediction, label))

    print(f"{testtotal}/{len(testset.data)}. Percentage: {testtotal/len(testset.data)*100:.02f}%")
    return testtotal, fails

In [277]:
def run(times):
    highest = 0
    for _ in range(times):
        training_loop()
        testtotal, fails = results()
        if testtotal > highest:
            highest = testtotal
    print(f"best: {highest} fails: {fails}")


In [278]:
run(5)

6406/10000. Percentage: 64.06%
6739/10000. Percentage: 67.39%
6975/10000. Percentage: 69.75%
7103/10000. Percentage: 71.03%
7052/10000. Percentage: 70.52%
best: 7103 fails: [(2, 0, 8), (15, 6, 8), (22, 0, 4), (24, 4, 5), (32, 6, 4), (35, 3, 2), (37, 9, 1), (40, 0, 4), (49, 4, 6), (52, 5, 0), (57, 5, 7), (58, 5, 4), (59, 3, 6), (61, 5, 3), (64, 5, 6), (69, 3, 7), (70, 3, 2), (76, 0, 9), (83, 3, 7), (84, 5, 2), (85, 7, 5), (87, 0, 7), (88, 9, 8), (91, 6, 3), (97, 2, 0), (106, 6, 3), (109, 5, 7), (112, 2, 6), (115, 0, 3), (118, 6, 2), (119, 2, 7), (121, 5, 3), (128, 3, 5), (132, 9, 8), (139, 0, 9), (140, 3, 6), (143, 5, 3), (145, 5, 7), (147, 5, 2), (149, 3, 2), (154, 3, 0), (158, 3, 5), (164, 0, 8), (172, 1, 9), (184, 5, 3), (188, 3, 4), (190, 6, 5), (191, 3, 8), (194, 6, 7), (197, 3, 8), (198, 3, 7), (199, 3, 8), (201, 0, 1), (210, 5, 7), (211, 9, 4), (213, 7, 9), (214, 1, 8), (215, 3, 0), (216, 0, 7), (218, 3, 8), (219, 7, 2), (221, 3, 6), (223, 5, 4), (226, 7, 6), (228, 3, 7), (232, 3