In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.utils.data as data
import os


# dataloader

In [2]:
TEST_DATA_PATH = './testdata'

In [3]:
TRAIN_DATA_PATH = './traindata'

In [4]:
# data transform, you can add different transform methods 

train_transform = transforms.Compose([
    transforms.RandomRotation(30),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(), 
    transforms.Resize(224),
    transforms.ToTensor()
])

dataset = datasets.ImageFolder(TRAIN_DATA_PATH, transform=train_transform)
TOTAL_SIZE = len(dataset)

# split your data into train and val
ratio = 0.9 #90%
train_len = round(TOTAL_SIZE * ratio)
valid_len = round(TOTAL_SIZE * (1-ratio))

train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_len, valid_len])

# your setting
train_data_loader = data.DataLoader(train_dataset, batch_size=15, shuffle=True, num_workers = 2)
val_data_loader = data.DataLoader(val_dataset, batch_size = 10, num_workers = 2)
print(dataset)
print(dataset.class_to_idx)

Dataset ImageFolder
    Number of datapoints: 124
    Root location: ./traindata
    StandardTransform
Transform: Compose(
               RandomRotation(degrees=[-30.0, 30.0], interpolation=nearest, expand=False, fill=0)
               RandomHorizontalFlip(p=0.5)
               ColorJitter(brightness=None, contrast=None, saturation=None, hue=None)
               Resize(size=224, interpolation=bilinear)
               ToTensor()
           )
{'airplane': 0, 'bird': 1, 'car': 2, 'cat': 3, 'dog': 4, 'horse': 5}


# DATALOADER FOR THE WHOLE DATA

In [None]:
test_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor()
])
testDataset = datasets.ImageFolder(TEST_DATA_PATH, transform = test_transform)
testDataLoader = data.DataLoader(test_dataset, num_workers = 2)
trainDataset = datasets.ImageFolder(TRAIN_DATA_PATH, transform = train_transform)
trainDataset = data.DataLoader(trainDataSet, shuffle = True, batch_size = 15, num_workers = 2)

In [5]:
# I have written the function for you this time, but it's strongly recommended that you 
# understand how to do training and validation


def train(model, data_loader, optimizer, epoch, verbose=True):
    model.train()
    loss_avg = 0.0
    for batch_idx, (data, target) in enumerate(data_loader):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        
        # loss function
        loss = loss_fn(output, target)
        
        loss_avg = loss.item()
        
        # do back propagation
        loss.backward()
        optimizer.step()
        
        #print
        verbose_step = len(data_loader) // 10
        if batch_idx % verbose_step == 0 and verbose:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(data_loader.dataset),
                100. * batch_idx / len(data_loader), loss.item()))
    return loss_avg / len(data_loader)

def valid(model, data_loader):
    with torch.no_grad():
        model.eval()
        valid_loss = 0
        correct = 0
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            valid_loss +=  loss_fn(output, target) # sum up batch loss
            pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).cpu().sum().item() 

        valid_loss /= len(data_loader.dataset)
        print('\nValid set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            valid_loss, correct, len(data_loader.dataset),
            100. * correct / len(data_loader.dataset)))
    return float(correct) / len(data_loader.dataset)

In [7]:
class BasicBlock(nn.Module):
    
    expansion = 1
    
    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        dilation: int = 1,
    ):
        super(BasicBlock, self).__init__()
        

        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
            
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.norm1 = nn.BatchNorm2d(planes)
        self.activ = nn.ReLU(inplace = True)
        
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.norm2 = nn.BatchNorm2d(planes)
        self.stride = stride
        
        self.shortcut = nn.Sequential()
        if stride != 1 or inplanes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inplanes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )
        
    def forward(self, x):

        y = self.conv1(x)
        y = self.norm1(y)
        y = self.activ(y)

        y = self.conv2(y)
        y = self.norm2(y)

        y += self.shortcut(x)
        y = self.activ(y)
        return y
            

In [13]:
############## Build the model here ##########
class ResNet(nn.Module):
    def __init__(
        self,
        layer,
        block,
        num_classes=6,
    ):
    
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(64, block, layer[0], stride=1)
        self.layer2 = self._make_layer(128, block, layer[1], stride=2)
        self.layer3 = self._make_layer(256, block, layer[2], stride=2)
        self.layer4 = self._make_layer(512, block, layer[3], stride=2)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)
        self.activ = nn.ReLU(inplace=True)
        

    def _make_layer(self, planes, block, layer, stride):
        strides = [stride] + [1]*(layer-1)
        layers = []
        for stride in strides:
            layers.append(block(self.inplanes, planes, stride))
            self.in_planes = planes * block.expansion   
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.activ(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.pool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out
 


In [9]:
# using gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
####################  implement your optimizer ###################################
## yo can use any training methods if you want (ex:lr decay, weight decay.....)
model = ResNet([2,2,2,2], BasicBlock)
model.to(device=device)
lr = 0.1
optimizer = optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
# start training
epochs = 300
acc = 0.0
for epoch in range(epochs):
    #model.train()
    train(model, train_data_loader, optimizer, epoch)
    accuracy = valid(model, val_data_loader)
    if accuracy > acc:
        acc = accuracy
        print("-------------saving model--------------")
        # save the model
        torch.save(model, "model.pth")

RuntimeError: Given groups=1, weight of size [128, 64, 3, 3], expected input[15, 128, 112, 112] to have 64 channels, but got 128 channels instead

In [None]:
test_transform = transforms.Compose([transforms.Resize((224,224)),
                                    transforms.ToTensor()
                                    ])

test_data = 
test_data_loader  = 

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load the model so that you don't need to train the model again
test_model = torch.load("model.pth").to(device)

In [None]:
def test(model,data_loader):
    with torch.no_grad():
        model.eval()
        valid_loss = 0
        correct = 0
        bs = test_data_loader.batch_size
        result = []
        for i, (data, target) in enumerate(test_data_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred =                                                # get the index of the max log-probability
            arr = pred.data.cpu().numpy()
            for j in range(pred.size()[0]):
                file_name = test_data.samples[i*bs+j][0].split('/')[-1]
                result.append((file_name,pred[j].cpu().numpy()[0]))
    return result

In [None]:
result = test(test_model,test_data_loader)

# Write results to csv