In [1]:
#trained using all images, normalized to 0,1
#differente kernel size from base model
#both changes had a slight improvemnt over the base model

In [1]:
#basic
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt

#dataset/dataloader stuff
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision.io import read_image
import torchvision.transforms as transforms 
from torchvision.io import ImageReadMode

#network stuff
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
#pointless now
#datasets dont need gpus to function but usually on top
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
RANDOM_SEED = 42
DEVICE

'cuda'

In [8]:
class LeBirbs(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.image_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        img_path = self.img_labels.iloc[idx]['image_path']
        img_path = img_path[1:]
        img_path = os.path.join(self.image_dir,img_path)
        image = read_image(img_path, mode = ImageReadMode.RGB)
        image = image.to(torch.float)
        image = normalize(image)
        label = self.img_labels.iloc[idx]['label']
        label = torch.tensor(label)
        
        if self.transform:
            transform = transforms.transforms.CenterCrop(128)
            image = transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [9]:
#initiate the dataset object
train_dataset = LeBirbs('train_images.csv','train_images',True,None)
test_dataset = LeBirbs('test_images_path.csv','test_images',True,None)
train_dataloader = DataLoader(train_dataset,batch_size=128,shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=128,shuffle=False)

In [10]:
#implementation without atributes
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(13456,7000)
        self.fc2 = nn.Linear(7000, 1000)
        self.fc3 = nn.Linear(1000, 201)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()
net.to(DEVICE)

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=13456, out_features=7000, bias=True)
  (fc2): Linear(in_features=7000, out_features=1000, bias=True)
  (fc3): Linear(in_features=1000, out_features=201, bias=True)
)

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
num_epochs = 25

In [12]:
%%time
for epoch in range(num_epochs):

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        inputs, labels = data[0].to(DEVICE), data[1].to(DEVICE)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(epoch,running_loss)

print('Finished Training')

0 1311.4973359107971
1 1299.4948244094849
2 1291.2570519447327
3 1285.2879219055176
4 1281.8681416511536
5 1279.7553009986877
6 1278.1751046180725
7 1277.1524214744568
8 1276.391191482544
9 1275.5489678382874
10 1274.5972609519958
11 1274.4927048683167
12 1273.5810570716858
13 1272.896478652954
14 1271.8993310928345
15 1270.9317827224731
16 1269.4513001441956
17 1266.51953125
18 1260.7991743087769
19 1245.6558094024658
20 1210.621669769287
21 1145.4528732299805
22 1059.5380175113678
23 953.6598377227783
24 859.0756077766418
Finished Training
CPU times: total: 9h 44min 33s
Wall time: 1h 41min 37s


In [None]:
#can save trained models, too large file size for github
#PATH = 'Nets/DK_1_net.pth'
#torch.save(net.state_dict(), PATH)

In [13]:
test_file = pd.read_csv('test_images_sample.csv')

all_predictions = []
with torch.no_grad():
    for data in test_dataloader:
        images, labels = data[0].to(DEVICE), data[1].to(DEVICE)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        predictions = torch.Tensor.tolist(predicted)
        all_predictions.extend(predictions)

In [14]:
test_file['label'] = all_predictions
test_file.to_csv('test_images_predictions_dk.csv',index=False)