In [1]:
import numpy as np
import struct
from array import array
from os.path import join


class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath,
                 test_images_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath

    def read_images_labels(self, images_filepath, labels_filepath):
        labels = []
        if labels_filepath is not None:
            with open(labels_filepath, 'rb') as file:
                magic, size = struct.unpack(">II", file.read(8))
                if magic != 2049:
                    raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
                labels = array("B", file.read())

        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img            

        return images, labels

    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, None)
        return (x_train, y_train), x_test        

input_path = './MNIST_Data_Set'
training_images_filepath = join(input_path, 'train-images.idx3-ubyte')
training_labels_filepath = join(input_path, 'train-labels.idx1-ubyte')
test_images_filepath = join(input_path, 't10k-images.idx3-ubyte')

mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath)
(x_train, y_train), x_test = mnist_dataloader.load_data()

for i in range(len(x_train)):
    flattened_img = np.array(x_train[i]).flatten()
    x_train[i] = flattened_img


In [62]:
np.set_printoptions(linewidth=115)

print(x_train[0])

print(str(y_train[0]))

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   3  18  18  18 126 136 175  26 166 255 247 127   0   0   0   0
   0   0   0   0   0   0   0   0  30  36  94 154 170 253 253 253 253 253 225 172 253 242 195  64   0   0   0   0
   0   0   0   0   0   0   0  49 238 253 253 253 253 253 253 253 253 251  93  82  82  56  39   0   0   0   0   0
   0   0   0   0   0   0   0  18 219 253 253 253 253 253 198 182 247 241   0   0   0   0   0   0

In [63]:
from sklearn.model_selection import train_test_split

# Split the data into training and test sets
x_train = np.array(x_train)
y_train = np.array(y_train)
#split
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42) # 80% training and 20% test


In [None]:
import optuna
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Subset
import torch.nn.functional as F
from sklearn.model_selection import KFold
import numpy as np
import torch.optim as optim

# Check if GPU is available
print(torch.cuda.is_available())
print(torch.version.cuda)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters: learning rate, number of hidden layers, number of neurons in each hidden layer, activation function, batch size, number of epochs, optimizer

# Define the neural network model
class Net(nn.Module):
    def __init__(self, layer_sizes, activation_fn):
        super(Net, self).__init__()
        self.layers = nn.ModuleList()
        input_size = 784  # images are 28x28 pixels

        for size in layer_sizes:
            self.layers.append(nn.Linear(input_size, size))
            input_size = size

        self.output_layer = nn.Linear(input_size, 10)
        self.activation_fn = activation_fn

    def forward(self, x):
        for layer in self.layers:
            x = self.activation_fn(layer(x))
        x = self.output_layer(x)
        return x
"""
# Define the objective function
def objective(trial):
    # Suggest Hyperparameters
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    num_layers = trial.suggest_int('num_layers', 1, 5)
    layer_sizes = [trial.suggest_int(f'layer_size_{i}', 32, 512) for i in range(num_layers)]
    activation_name = trial.suggest_categorical('activation_fn', ['relu', 'tanh', 'sigmoid'])
    activation_fn = getattr(F, activation_name)
    batch_size = trial.suggest_int('batch_size', 32, 256)
    num_epochs = trial.suggest_int('num_epochs', 5, 25)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'AdamW', 'SGD'])
    
    # Create neural network model
    model = Net(layer_sizes, activation_fn).to(device)
    criterion = nn.CrossEntropyLoss()
    
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    else:
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    
    # Convert data to PyTorch tensors
    x_train_tensor = torch.tensor(x_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)

    # Create TensorDatasets
    train_dataset = TensorDataset(x_train_tensor, y_train_tensor)

    # K-Fold Cross-Validation
    kf = KFold(n_splits=5)
    accuracies = []
    
    for train_index, val_index in kf.split(x_train):
        train_subset = Subset(train_dataset, train_index)
        val_subset = Subset(train_dataset, val_index)

        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

        # Training loop
        for epoch in range(num_epochs):
            model.train()
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        accuracies.append(accuracy)

    # Return the average accuracy over all folds
    return np.mean(accuracies)

# Create a study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

# Print the best hyperparameters
print(study.best_params)
"""
# Current Best Hyperparameters
# {'learning_rate': 0.0003735727270151366, 'hidden_size1': 370, 'hidden_size2': 164, 'activation_fn': 'relu', 'num_epochs': 20} OBSOLETE (But Current Best in Kaggle)
# {'learning_rate': 0.00029516266449741116, 'hidden_size1': 345, 'hidden_size2': 44, 'activation_fn': 'relu', 'batch_size': 40, 'num_epochs': 20}


True
12.6


"\n# Define the objective function\ndef objective(trial):\n    # Suggest Hyperparameters\n    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)\n    num_layers = trial.suggest_int('num_layers', 1, 5)\n    layer_sizes = [trial.suggest_int(f'layer_size_{i}', 32, 512) for i in range(num_layers)]\n    activation_name = trial.suggest_categorical('activation_fn', ['relu', 'tanh', 'sigmoid'])\n    activation_fn = getattr(F, activation_name)\n    batch_size = trial.suggest_int('batch_size', 32, 256)\n    num_epochs = trial.suggest_int('num_epochs', 5, 25)\n    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'AdamW', 'SGD'])\n    \n    # Create neural network model\n    model = Net(layer_sizes, activation_fn).to(device)\n    criterion = nn.CrossEntropyLoss()\n    \n    if optimizer_name == 'Adam':\n        optimizer = optim.Adam(model.parameters(), lr=learning_rate)\n    elif optimizer_name == 'AdamW':\n        optimizer = optim.AdamW(model.parameters(), lr=

In [65]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Daten zu PyTorch-Tensoren konvertieren
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
x_val_tensor = torch.tensor(x_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

# Erstellung der TensorDatasets
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
val_dataset = TensorDataset(x_val_tensor, y_val_tensor)

# Erstellung Sie DataLoader
train_loader = DataLoader(train_dataset, batch_size=40, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=40, shuffle=False)

In [66]:
from torch import optim
from torch.optim.lr_scheduler import StepLR

model = Net([128, 64, 32], F.relu).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0001)
# Scheduler für das Lernen
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the same device as the model
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')
    
    scheduler.step()
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the same device as the model
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print(f'Validation Accuracy: {100 * correct / total}%')

Epoch 1, Loss: 0.4372280678742876
Validation Accuracy: 93.93333333333334%
Epoch 2, Loss: 0.17061844429544484
Validation Accuracy: 95.3%
Epoch 3, Loss: 0.11922908545685156
Validation Accuracy: 95.91666666666667%
Epoch 4, Loss: 0.08916343557667764
Validation Accuracy: 96.30833333333334%
Epoch 5, Loss: 0.06741343527207694
Validation Accuracy: 96.5%
Epoch 6, Loss: 0.040204144195158734
Validation Accuracy: 96.93333333333334%
Epoch 7, Loss: 0.03457907491577013
Validation Accuracy: 96.96666666666667%
Epoch 8, Loss: 0.03169619792955927
Validation Accuracy: 97.01666666666667%
Epoch 9, Loss: 0.029513637535759092
Validation Accuracy: 97.03333333333333%
Epoch 10, Loss: 0.02748181124557353
Validation Accuracy: 97.01666666666667%
Epoch 11, Loss: 0.024474887859954227
Validation Accuracy: 97.075%
Epoch 12, Loss: 0.02415807788267557
Validation Accuracy: 97.06666666666666%
Epoch 13, Loss: 0.023925721169119545
Validation Accuracy: 97.04166666666667%
Epoch 14, Loss: 0.02369744116362805
Validation Accuracy

In [67]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Convert test data to PyTorch tensor
x_test_tensor = torch.tensor(x_test, dtype=torch.float32)

# Create a TensorDataset and DataLoader for the test data
test_dataset = TensorDataset(x_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Set the model to evaluation mode
model.eval()

# List to store predictions
predictions = []

# Disable gradient computation
with torch.no_grad():
    for inputs in test_loader:
        inputs = inputs[0]  # Extract the inputs from the tuple
        inputs = inputs.view(inputs.size(0), -1)  # Reshape the inputs to (batch_size, 784)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(predicted.cpu().numpy())

# Convert predictions to a numpy array
predictions = np.array(predictions)

# Now `predictions` contains the predicted class labels for the test data
print(predictions)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

In [39]:
# Create CSV file for submission to Kaggle
import pandas as pd

# Create a DataFrame with the `Id` and `Label` columns
df = pd.DataFrame({'ID': np.arange(0, len(predictions)), 'Label': predictions})

# Save the DataFrame to a CSV file
df.to_csv('submission.csv', index=False)

# Display the first few rows of the DataFrame
print(df.head())

   ID  Label
0   0      7
1   1      2
2   2      1
3   3      0
4   4      4
