In [1]:
import os
import warnings
warnings.filterwarnings('ignore')

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import numpy as np
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
#random seeds
torch.manual_seed(44)
np.random.seed(44)

In [4]:
#Data loaders

def get_train_valid_loader(data_dir, batch_size = 16, valid_size = 0.1):
    normalize = transforms.Normalize(
        mean = [0.4914, 0.4822, 0.4465],
        std = [0.2023, 0.1994, 0.2010],
    )
    
    #define transforms
    valid_transform = transforms.Compose([
        # transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize,
    ])
    
    train_transform = transforms.Compose([
            # transforms.Resize((224,224)),
            transforms.ToTensor(),
            normalize,
    ])
    
    # load the dataset
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=False, transform=train_transform,
    )

    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=False, transform=valid_transform,
    )
    
    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))
    
    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)
 
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)

In [5]:
def get_test_loader(data_dir, batch_size = 16):
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # define transform
    transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize,
    ])

    dataset = datasets.CIFAR10(
        root=data_dir, train=False,
        download=False, transform=transform,
    )

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size
    )

    return data_loader

In [6]:
# DenseNet model

In [7]:
class Block(nn.Module):
    
    def __init__(self, in_planes, out_planes):
        super(Block, self).__init__()

        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=1, stride=1,padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(in_planes)
        self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False)
        # self.droprate = dropRate
        
    def forward(self, x):
    
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)
        # if self.droprate > 0:
        #     out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
            
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)
        # if self.droprate > 0:
        #     out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
            
        return torch.cat([x, out], 1)


In [8]:
class TransitionBlock(nn.Module):
    
    def __init__(self, in_planes, out_planes):
        super(TransitionBlock, self).__init__()
        
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        # self.droprate = dropRate
        
    def forward(self, x):
        
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)
        
        # if self.droprate > 0:
        #     out = F.dropout(out, p=self.droprate, inplace=False, training=self.training)
            
        return F.avg_pool2d(out, 2)

In [9]:
class DenseBlock(nn.Module):
    
    def __init__(self, nb_layers, in_planes, growth_rate, block):
        super(DenseBlock, self).__init__()
        
        self.layer = self._make_layer(block, in_planes, growth_rate, nb_layers)
        
    def _make_layer(self, block, in_planes, growth_rate, nb_layers):
        layers = []
        for i in range(nb_layers):
            layers.append(block(in_planes+i*growth_rate, growth_rate))
            
        return nn.Sequential(*layers)
    
    def forward(self, x):
        return self.layer(x)

In [10]:
class DenseNet(nn.Module):
    def __init__(self, depth, num_classes, growth_rate=12, reduction=0.5):
        super(DenseNet, self).__init__()
        
        in_planes = 2 * growth_rate
        n = (depth - 4) / 3
        block = Block
        n = int(n)
        # 1st conv before any dense block
        # self.conv1 = nn.Conv2d(3, in_planes, kernel_size=7, stride=2, padding=2, bias=False)
        # self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.conv1 = nn.Conv2d(3, in_planes, kernel_size=3, stride=1, padding=1, bias=False)
        # 1st block
        self.block1 = DenseBlock(n, in_planes, growth_rate, block)
        in_planes = int(in_planes+n*growth_rate)
        self.trans1 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)))
        in_planes = int(math.floor(in_planes*reduction))
        # 2nd block
        self.block2 = DenseBlock(n, in_planes, growth_rate, block)
        in_planes = int(in_planes+n*growth_rate)
        self.trans2 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)))
        in_planes = int(math.floor(in_planes*reduction))
        # 3rd block
        self.block3 = DenseBlock(n, in_planes, growth_rate, block)
        in_planes = int(in_planes+n*growth_rate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(in_planes, num_classes)
        self.in_planes = in_planes

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
    def forward(self, x):
        out = self.conv1(x)
        out = self.trans1(self.block1(out))
        out = self.trans2(self.block2(out))
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.in_planes)
        return self.fc(out)

In [35]:
model = DenseNet( 50, 10, 12, reduction=0.5)

In [36]:
model = model.to(device)

In [37]:
#loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay = 0.0001)  


In [38]:
num_epochs = 10
batch_size = 16

In [39]:
train_loader, valid_loader = get_train_valid_loader(data_dir = 'C:\DATA\WIDS',
                                                    batch_size = 16,
                                                    valid_size = 0.1
                                                   )

test_loader = get_test_loader(data_dir = 'C:\DATA\WIDS',
                              batch_size = 16)

In [40]:
print(len(valid_loader),len(train_loader))

313 2813


In [41]:
# Initialize lists to store results
total_step = len(train_loader)
train_loss_list = []
valid_loss_list = []
valid_accuracy_list = []

In [42]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        train_loss = criterion(outputs, labels)
        
       # print("b" , i, "of 2813" , epoch, end = "\r") 
        # Backward and optimize
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        del images, labels, outputs
        torch.cuda.empty_cache()
        print("c" , i, "of", len(train_loader) , epoch, end = "\r") 
      
    # Store training loss for later visualization
    train_loss_list.append(train_loss.item())

            
    # Validation
    with torch.no_grad():
        correct = 0
        total = 0
        valid_loss = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            val_loss = criterion(outputs, labels)
            valid_loss += val_loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
    
        # Calculate and store validation accuracy and loss
        accuracy = 100 * correct / total
        valid_accuracy_list.append(accuracy)
        valid_loss_list.append(valid_loss / len(valid_loader))  # Average loss per batch
    
    print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Validation Loss: {:.4f}, Validation Accuracy: {:.2f} %'
          .format(epoch + 1, num_epochs, i + 1, total_step, train_loss.item(), valid_loss / len(valid_loader), accuracy))

Epoch [1/10], Step [2813/2813], Train Loss: 1.9146, Validation Loss: 2.0232, Validation Accuracy: 17.70 %
Epoch [2/10], Step [2813/2813], Train Loss: 2.0460, Validation Loss: 2.0522, Validation Accuracy: 17.36 %
Epoch [3/10], Step [2813/2813], Train Loss: 1.9905, Validation Loss: 2.0125, Validation Accuracy: 18.28 %
c 2764 of 2813 3

KeyboardInterrupt: 

In [None]:
#plots

plt.figure(figsize=(18, 5))

# Plot Training Loss
plt.subplot(2, 3, 1)
plt.plot(train_loss_list, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss over Epochs')
plt.legend()

# Plot Validation Loss
plt.subplot(2, 3, 2)
plt.plot(valid_loss_list, label='Validation Loss', color='red')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Validation Loss over Epochs')
plt.legend()

# Plot Validation Accuracy
plt.subplot(2, 3, 3)
plt.plot(valid_accuracy_list, label='Validation Accuracy', color='orange')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Validation Accuracy over Epochs')
plt.legend()
plt.show()            