In [2]:
%load_ext autoreload
%autoreload 2

In [3]:

from Libs.Dataset import Dataset
from Libs.Model import Net
from Libs.train import train_model, eval_model
import pandas as pd
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import numpy as np
import csv
import random
# random.seed(0)
np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x7f3594e2ff30>

In [4]:
BATCH_SIZE = 256

In [5]:
dir_training = "./data/training_data/training_data"
dir_testing = "./data//testing_data/testing_data"
csv_file = "data/training_labels.csv"

In [6]:

df = pd.read_csv(csv_file)
label_ids = {}
for label in df["label"]:
    if label not in label_ids:
        label_ids[label] = len(label_ids)
id_labels =  {v: k for k, v in label_ids.items()}

## Dataset preprocess

In [7]:
# data.train = data.load_dir(dir_training)
# data.test = data.load_dir(dir_testing)
train_trans = transforms.Compose([transforms.Resize((400, 400)),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.RandomRotation(15),
                                 transforms.ToTensor(), # range [0, 255] -> [0.0,1.0]
                                 transforms.Normalize((0.5), (0.5))])
# train_dataset = torchvision.datasets.ImageFolder(root=dir_training, transform=train_trans)
train_dataset = Dataset(dir_training, csv_file, label_ids, transform=train_trans)
# print(len(train_dataset))
train_dataset, test_dataset = train_dataset.train_test_split()
print("train dataset size: ", train_dataset.data.shape[0])
print("test dataset size: ", test_dataset.data.shape[0])
print(test_dataset.data[0])
# print(len(train_dataset))
# print(len(test_dataset))
train_loader =  torch.utils.data.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True,drop_last=False, num_workers=4)

# test_trans = transforms.Compose([transforms.Resize((400, 400)),
#                                  transforms.ToTensor(), # range [0, 255] -> [0.0,1.0]
#                                  transforms.Normalize((0.5), (0.5))])
# test_dataset =  Dataset(dir_testing, csv_file, label_ids=None, transform=train_trans)                               
# # test_dataset = torchvision.datasets.ImageFolder(root=dir_training, transform=test_trans)                                 
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=False, num_workers=4)



train dataset size:  10066
test dataset size:  1119
011396.jpg


## Model

In [8]:
TRAIN_EPOCH_LOAD = 10
MODEL_DIR = "./model/"
end_epoch = 15
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Net(use_att=True)
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
    print("torch.cuda.device_count(): ", torch.cuda.device_count())

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# lrscheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5)
lrscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=2, threshold = 0.9)


if TRAIN_EPOCH_LOAD <= 0 :
    start_epoch = 0
else:
    start_epoch = TRAIN_EPOCH_LOAD
    checkpoint = torch.load('{}.pth'.format(os.path.join(MODEL_DIR, str(TRAIN_EPOCH_LOAD))))
    model.load_state_dict(checkpoint["model"])
    optimizer.load_state_dict(checkpoint["optimizer"])
    for state in optimizer.state.values():
        for k, v in state.items():
            if torch.is_tensor(v):
                state[k] = v.to(device)

    lrscheduler = checkpoint["scheduler"]


model.train()
model.to(device)


model, training_losses, training_accs, test_accs = train_model(model, train_loader, test_loader, criterion, optimizer, lrscheduler, start_epoch, end_epoch)


torch.cuda.device_count():  10


RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 10.76 GiB total capacity; 147.55 MiB already allocated; 22.12 MiB free; 156.00 MiB reserved in total by PyTorch)

## Predict

In [15]:
predict = {}
load_epoch = 10

model.load_state_dict(torch.load('{}.pth'.format(os.path.join("./model/", str(load_epoch))))["model"])
model.eval()
print("Test accuracy of epoch {}: {}".format(load_epoch, eval_model(model, test_loader)))

eval_trans = transforms.Compose([transforms.Resize((400, 400)),
                                 transforms.ToTensor(), # range [0, 255] -> [0.0,1.0]
                                 transforms.Normalize((0.5), (0.5))])
eval_dataset =  Dataset(dir_testing, csv_file, label_ids=None, transform=eval_trans, eval=True)
eval_loader =  torch.utils.data.DataLoader(eval_dataset, batch_size = BATCH_SIZE, shuffle=True, num_workers=4) 

with torch.no_grad():
    for b, inputs in enumerate(eval_loader):
        if b%30 == 0:
            print(b)

        imgs, img_names = inputs
        imgs = imgs.to(device)
        output = model(imgs)
        p = torch.argmax(output, 1)
        for i, img_name in enumerate(img_names):
            predict[img_name] = id_labels[p[i].item()]
    # print(predict)

with open('submission.csv', 'w',newline='') as csvfile:
    fieldnames=["id", "label"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for key, value in predict.items():
        writer.writerow({fieldnames[0]: key, fieldnames[1]: value})
print("Done")
    

Accuracy of the network on the test images: 86 %
Test accuracy of epoch 10: 86.23771224307417
0
Done
