In [1]:
!unzip /content/drive/MyDrive/DataForNotebooks/streetnumbers.zip > /dev/null

In [2]:
!pip install wandb --upgrade > /dev/null
!pip install onnx > /dev/null
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import numpy as np
import wandb
import math

In [3]:
# Login wandb
# a4be845e3d458aaaf024a006080852669c103590
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [4]:
# Set up sweep
sweep_config = {
    'method': 'random'
}

metric = {
    'name': 'test_acc',
    'goal': 'maximize'
}

sweep_config['metric'] = metric

parameters_dict = {
    'optimizer': {
        'values': ['adam', 'sgd']
    },
    'epochs': {
        'values': [10, 20, 30 ,40, 50]
    },
    'learning_rate': {
        'distribution': 'uniform',
        'min': 0,
        'max': 0.01
    },
    'batch_size': {
        'values': [128, 256, 512]
    },
    'dropout': {
        'distribution':'uniform',
        'min': 0,
        'max': 1
    }
}

sweep_config['parameters'] = parameters_dict

In [5]:
# Run Configuration
config = dict(
    optimizer = 'adam',
    epochs=100,
    batch_size= 32,
    learning_rate=0.0027,
    dropout = 0.4,
    dataset="StreetNumbers",
    architecture="ResNet50"
)

In [6]:
def model_pipeline(hyperparameters=None):
  with wandb.init(entity="ole-elija-dziewas", project="myfirsttest", config=hyperparameters):
    config = wandb.config
    # Prepare data and model
    train_loader, test_loader = createDataloader(config)
    model, criterion, optimizer = createModel(config)
    train_and_test_model(model, train_loader, test_loader, criterion, optimizer, config)

  return model, test_loader

In [7]:
# Build a personal Dataset
class MyDataset(Dataset):
  def __init__(self, input_data, output_labels):
    self.input_data = input_data
    self.output_labels = output_labels

  def __len__(self):
    return len(self.input_data)

  def __getitem__(self, index):
    input_item = self.input_data[index]
    output_item = self.output_labels[index]
    return input_item, output_item

In [8]:
def createDataloader(config):
  # Activate gpu acceleration
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  # Load test and train files
  test_inputs = np.load("streetnumbers/test_inputs.npy")
  test_labels = np.load("streetnumbers/test_labels.npy")
  train_inputs = np.load("streetnumbers/train_inputs.npy")
  train_labels = np.load("streetnumbers/train_labels.npy")

  # Convert numpy array to torch tensors for training
  def convert_to_torch(arr, label = False):
    tensor = torch.from_numpy(arr)
    if (not label):
      tensor = tensor.view(-1, 3, 256, 256)
    return tensor

  # Convert them to tensors
  trainX = convert_to_torch(train_inputs)
  trainY = convert_to_torch(train_labels, label=True)
  testX = convert_to_torch(test_inputs)
  testY = convert_to_torch(test_labels, label=True)

  # Setup dataset and DataLoader
  train_dataset = MyDataset(trainX, trainY)
  test_dataset = MyDataset(testX, testY)

  # Move data samples to the GPU
  train_dataset = [(data.to(device), label.to(device)) for data, label in train_dataset]
  test_dataset = [(data.to(device), label.to(device)) for data, label in test_dataset]

  train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
  test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=True)
  return train_loader, test_loader

In [9]:
# Build model
# ResNet Code from https://blog.paperspace.com/writing-resnet-from-scratch-in-pytorch/
class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, i_downsample=None, stride=1):
        super(Bottleneck, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0)
        self.batch_norm3 = nn.BatchNorm2d(out_channels*self.expansion)

        self.i_downsample = i_downsample
        self.stride = stride
        self.relu = nn.ReLU()

    def forward(self, x):
        identity = x.clone()
        x = self.relu(self.batch_norm1(self.conv1(x)))

        x = self.relu(self.batch_norm2(self.conv2(x)))

        x = self.conv3(x)
        x = self.batch_norm3(x)

        #downsample if needed
        if self.i_downsample is not None:
            identity = self.i_downsample(identity)
        #add identity
        x+=identity
        x=self.relu(x)

        return x

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU())
        self.conv2 = nn.Sequential(
                        nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                        nn.BatchNorm2d(out_channels))
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

class Block(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, i_downsample=None, stride=1):
        super(Block, self).__init__()


        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        self.i_downsample = i_downsample
        self.stride = stride
        self.relu = nn.ReLU()

    def forward(self, x):
      identity = x.clone()

      x = self.relu(self.batch_norm2(self.conv1(x)))
      x = self.batch_norm2(self.conv2(x))

      if self.i_downsample is not None:
          identity = self.i_downsample(identity)
      print(x.shape)
      print(identity.shape)
      x += identity
      x = self.relu(x)
      return x




class ResNet(nn.Module):
    def __init__(self, ResBlock, layer_list, num_classes, num_channels=3):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(num_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(kernel_size = 3, stride=2, padding=1)

        self.layer1 = self._make_layer(ResBlock, layer_list[0], planes=64)
        self.layer2 = self._make_layer(ResBlock, layer_list[1], planes=128, stride=2)
        self.layer3 = self._make_layer(ResBlock, layer_list[2], planes=256, stride=2)
        self.layer4 = self._make_layer(ResBlock, layer_list[3], planes=512, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512*ResBlock.expansion, num_classes)

    def forward(self, x):
        x = self.relu(self.batch_norm1(self.conv1(x)))
        x = self.max_pool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

    def _make_layer(self, ResBlock, blocks, planes, stride=1):
        ii_downsample = None
        layers = []

        if stride != 1 or self.in_channels != planes*ResBlock.expansion:
            ii_downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, planes*ResBlock.expansion, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes*ResBlock.expansion)
            )

        layers.append(ResBlock(self.in_channels, planes, i_downsample=ii_downsample, stride=stride))
        self.in_channels = planes*ResBlock.expansion

        for i in range(blocks-1):
            layers.append(ResBlock(self.in_channels, planes))

        return nn.Sequential(*layers)

class ResNet34(nn.Module):
    def __init__(self, block, layers, num_classes = 9):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Sequential(
                        nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3),
                        nn.BatchNorm2d(64),
                        nn.ReLU())
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.layer0 = self._make_layer(block, 64, layers[0], stride = 1)
        self.layer1 = self._make_layer(block, 128, layers[1], stride = 2)
        self.layer2 = self._make_layer(block, 256, layers[2], stride = 2)
        self.layer3 = self._make_layer(block, 512, layers[3], stride = 2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc1 = nn.Linear(2048, num_classes)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:

            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)


    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)

        return x


class CNN(nn.Module):
  def __init__(self, config):
    super(CNN, self).__init__()
    self.conv_layers1 = nn.Sequential(
      nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(32),
      nn.ReLU(),
      nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(32),
      nn.ReLU(),
      nn.Conv2d(32, 32, kernel_size=5, stride=2, padding=1),
      nn.BatchNorm2d(32),
      nn.ReLU(),
      nn.Dropout(config.dropout)

    )

    self.conv_layers2 = nn.Sequential(
      nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=1),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=1),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=1),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=1),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.Dropout(config.dropout)
    )

    self.fc_layer = nn.Sequential(
      nn.Linear(3136, 9),
      nn.Sigmoid()
    )

  def forward(self, x):
    x = self.conv_layers1(x)
    x = self.conv_layers2(x)
    x = x.view(x.size(0), -1)
    x = self.fc_layer(x)
    return x

def ResNet50(num_classes):
    return ResNet(Bottleneck, [3,4,6,3], num_classes)

def ResNet101(num_classes):
    return ResNet(Bottleneck, [3,4,23,3], num_classes)

def ResNet152(num_classes):
    return ResNet(Bottleneck, [3,8,36,3], num_classes)

In [10]:
def createModel(config):
  # Activate gpu acceleration
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  # Instantiate model
  model = ResNet50(9)
  #model = CNN(config)
  model.to(device)
  criterion = nn.CrossEntropyLoss()
  if config.optimizer == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=config.learning_rate, momentum=0.9)
  elif config.optimizer == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=config.learning_rate)
  return model, criterion, optimizer

In [11]:
# Training
def train_and_test_model(model, train_loader, test_loader, criterion, optimizer, config):
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  #wandb.watch(model, criterion, log="all")
  print("Training the CNN ...")
  for epoch in range(config.epochs):
    train_loss = 0.0
    model.train()
    for images, labels in train_loader:
      # Load batches and fit weights
      optimizer.zero_grad()
      outputs = model(images)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()
      # Update loss
      train_loss += loss.item() * images.size(0)

    # Average train_loss
    train_loss = train_loss / len(train_loader.dataset)

    # Test the model
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
      for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += torch.eq(predicted, labels.argmax(dim=1)).sum().item()


    test_accuracy = 100.0 * correct/total
    print(f"Epoch {epoch+1}/{config.epochs} - Train Loss: {train_loss:.4f} - Test Accuracy: {test_accuracy:.2f}%")
    wandb.log({"epoch": epoch, "train_loss": train_loss, "test_acc": test_accuracy})
  #torch.onnx.export(model, images, "model.onnx")
  #wandb.save("model.onnx")

In [None]:
# Start sweep or run
do_sweep = False
sweep_count = 150
wandb.init(mode="disabled")
if do_sweep == True:
  sweep_id = wandb.sweep(sweep_config, entity="overxaited", project="NumberRecognition")
  wandb.agent(sweep_id, model_pipeline, count=sweep_count)
else:
  model, test_loader = model_pipeline(config)

Training the CNN ...
Epoch 1/100 - Train Loss: 2.4139 - Test Accuracy: 13.50%
Epoch 2/100 - Train Loss: 2.2217 - Test Accuracy: 12.88%
Epoch 3/100 - Train Loss: 2.1993 - Test Accuracy: 13.62%
Epoch 4/100 - Train Loss: 2.1937 - Test Accuracy: 14.86%
Epoch 5/100 - Train Loss: 2.1863 - Test Accuracy: 13.67%
Epoch 6/100 - Train Loss: 2.1821 - Test Accuracy: 14.75%


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.111792…

0,1
epoch,▁▂▄▅▇█
test_acc,▃▁▄█▄█
train_loss,█▂▂▁▁▁

0,1
epoch,5.0
test_acc,14.74576
train_loss,2.18214


KeyboardInterrupt: ignored

In [None]:
import random
import matplotlib.pyplot as plt
wandb.init(mode="disabled")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
validation_inputs = np.load("streetnumbers/validation_inputs.npy")
val_tensor = torch.from_numpy(validation_inputs)
val_tensor = val_tensor.view(-1, 3, 256, 256)
val_tensor.to(device)

results = []
for i in range(len(validation_inputs)):
  example = val_tensor[i]
  example = example.to(device)
  # Führe eine Vorwärtspropagierung durch
  with torch.no_grad():
    output = model(example.unsqueeze(0))  # Beispiel in eine Batch-Dimension umwandeln
    predicted_label = output.argmax(dim=1).item()
    results.append(predicted_label +1)

print(results)

In [None]:
import csv
# Name der CSV-Datei
filename = 'overxaited_submission01.csv'
results =[4, 5, 2, 3, 5, 2, 9, 5, 3, 3, 8, 9, 8, 2, 3, 5, 4, 1, 1, 5, 3, 4, 6, 9, 2, 9, 1, 3, 6, 3, 7, 3, 9, 6, 9, 7, 5, 4, 5, 6, 3, 2, 3, 9, 9, 2, 9, 9, 4, 6, 9, 9, 6, 4, 7, 5, 8, 7, 9, 2, 1, 1, 2, 4, 5, 6, 4, 2, 9, 9, 9, 9, 6, 5, 2, 2, 5, 2, 3, 7, 3, 5, 6, 4, 5, 1, 5, 3, 6, 3, 2, 7, 9, 2, 9, 6, 6, 5, 3, 1, 4, 5, 2, 2, 2, 1, 6, 9, 1, 9, 2, 3, 5, 9, 3, 6, 2, 9, 1, 8, 9, 1, 9, 1, 1, 5, 2, 4, 9, 4, 7, 2, 3, 5, 9, 4, 8, 2, 8, 7, 1, 7, 6, 2]
indices = [14, 17, 24, 27, 28, 30, 33, 34, 35, 36, 38, 39, 43, 46, 47, 49, 51, 60, 64, 70, 71, 74, 75, 77, 82, 86, 96, 97, 99, 105, 109, 115, 116, 124, 130, 146, 148, 151, 154, 155, 157, 159, 172, 179, 181, 186, 196, 221, 241, 247, 248, 251, 260, 270, 271, 273, 293, 298, 308, 309, 313, 317, 322, 332, 343, 348, 354, 355, 362, 365, 367, 380, 386, 392, 395, 396, 398, 406, 408, 409, 413, 414, 420, 421, 422, 426, 428, 431, 433, 434, 437, 439, 441, 446, 451, 454, 456, 458, 461, 468, 469, 481, 493, 495, 496, 511, 513, 518, 520, 521, 532, 533, 538, 558, 562, 565, 567, 571, 576, 584, 585, 587, 597, 599, 601, 604, 622, 629, 634, 636, 652, 655, 657, 665, 675, 684, 689, 693, 698, 701, 702, 709, 711, 712]
# Öffnen der CSV-Datei im Schreibmodus
with open(filename, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)

    # Schreiben des Headers
    writer.writerow(['Sample ID', 'Prediction'])

    # Schreiben der Daten
    for i, content in enumerate(results):
        writer.writerow([indices[i], results[i]])

In [None]:
# Wähle eine zufällige Indexposition im Datensatz
index = random.randint(0, len(validation_inputs) - 1)

# Hole das entsprechende Beispiel und Label
example = val_tensor[index]
print(index)
example = example.to(device)
# Führe eine Vorwärtspropagierung durch
output = model(example.unsqueeze(0))  # Beispiel in eine Batch-Dimension umwandeln
predicted_label = output.argmax(dim=1).item() +1

# Zeige das Bild und das vorhergesagte Label an
example = example.reshape((256, 256, 3))


# Zeige das Bild an
plt.imshow(example.to('cpu'))
plt.title(f', Vorhersage: {predicted_label}')
plt.show()