In [9]:
import os

#os.environ["LD_PRELOAD"] = "/opt/conda/lib/libiomp5.so"
os.environ["OMP_NUM_THREADS"] = "8"
os.environ["MKL_NUM_THREADS"] = "8"
#os.environ["MKL_VERBOSE"] = "1"
os.environ["OMP_DISPLAY_ENV"] = "TRUE"
#os.environ["OMP_PROC_BIND"] = "CLOSE"
#os.environ["KMP_AFFINITY"] = "verbose,granularity=fine,compact,1,0"
os.environ["KMP_AFFINITY"] = "granularity=verbose,fine,proclist=[0,1,2,3,4,5,6,7],explicit"
os.environ["KMP_BLOCKTIME"] = "1"
#os.environ["OMP_SCHEDULE"] = "STATIC"
#os.environ["OMP_WAIT_POLICY"] = "active"
#os.environ["KMP_LIBRARY"] = "turnaround"



In [10]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torchvision import models
from torch.utils.data.sampler import SubsetRandomSampler

print(torch.__version__)

2.0.0+cpu


In [14]:
def data_loader(data_dir, batch_size, random_seed=42, valid_size=0.1, shuffle=True, test=False):

    transform = transforms.Compose([transforms.ToTensor()])
    
    
    
#    if test:
#        dataset = datasets.Kitti(root=data_dir, train=True, download=True, transform=transform)
#        data_loader = torch.utils.data.Kitti(dataset, batch_size=batch_size, shuffle=shuffle)
#        return data_loader

    # load the dataset
    train_dataset = datasets.Kitti(root=data_dir, train=True, download=True, transform=transform)
    valid_dataset = datasets.Kitti(root=data_dir, train=False, download=True, transform=transform)

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)
 
    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)

In [15]:
class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.fc1 = nn.Sequential(
            nn.Linear(256*2*2, 512),
            nn.Dropout(0.25))
        
        self.fc2 = nn.Linear(512, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [16]:
#optimizer = torch.optim.Adam
max_lr = 0.00001
grad_clip = 0.1
weight_decay = 0.005

# Device configuration
device = 'cpu'
num_classes = 10
num_epochs = 10
batch_size = 64
learning_rate = 0.0001
data_dir="/tmp"

# CIFAR10 dataset 
train_loader, valid_loader = data_loader(data_dir=data_dir, batch_size=batch_size)
test_loader = data_loader(data_dir=data_dir, batch_size=batch_size, test=True)

#model = VGG_test(num_classes).to(device)
model = CNN(num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
#scheduler = torch.optim.lr_scheduler.OneCycleLR
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)
optimizer = torch.optim.Adam(model.parameters(), max_lr, weight_decay=weight_decay)
#scheduler = scheduler(optimizer, max_lr, epochs=num_epochs, steps_per_epoch=len(train_loader))

In [17]:
%%time
# Train the model
total_step = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
       
    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
            
    # Validation
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
    
        print('Accuracy of the network on the {} validation images: {:.2f} %'.format(5000, 100 * correct / total)) 

RuntimeError: stack expects each tensor to be equal size, but got [3, 375, 1242] at entry 0 and [3, 376, 1241] at entry 1

In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))   