In [1]:
import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms


In [2]:
class CustomDataset(Dataset):
    def __init__(self, txt_file, transform=None):
        self.img_files, self.labels = self.load_img_files(txt_file)
        self.transform = transform
    
    def __len__(self):
        return len(self.img_files)
    
    def __getitem__(self, idx):
        img = cv2.imread(self.img_files[idx])

        if img is None:
            raise FileNotFoundError(f"Image {self.img_files[idx]} not found")
        
        img = cv2.resize(img, (256, 256))
        
        if self.transform:
            # Convert BGR image (default in OpenCV) to RGB before applying transforms
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = self.transform(img)
        else:
            # Convert BGR image (default in OpenCV) to RGB before converting to tensor
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = torch.tensor(img.transpose((2, 0, 1)), dtype=torch.float32)
        
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return img, label

    def load_img_files(self, filename):
        with open(filename, 'r') as f:
            lines = f.readlines()
        
        img_files, labels = [], []

        for line in lines:
            fn, label = line.strip().split(' ')
            img_files.append(fn)
            labels.append(int(label))
        
        return img_files, labels

In [3]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = torch.relu(out)
        return out

In [4]:
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = torch.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = torch.relu(out)
        return out

In [5]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=1000):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        x = torch.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [6]:
def ResNet18(num_classes=1000):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes)

def ResNet34(num_classes=1000):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes)

def ResNet26(num_classes=1000):
    return ResNet(Bottleneck, [2, 2, 2, 2], num_classes)

def ResNet50(num_classes=1000):
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes)

In [7]:
# Function to find the latest checkpoint
def find_latest_checkpoint(model_name):
    checkpoints = [f for f in os.listdir(model_name) if f.startswith(model_name) and f.endswith('.pth')]
    if not checkpoints:
        return None, 0
    checkpoints.sort()
    latest_checkpoint = checkpoints[-1]
    epoch = int(latest_checkpoint.split('_epoch')[1].split('.')[0])
    return os.path.join(model_name, latest_checkpoint), epoch

In [8]:
# Check if GPU is available
num_epochs = 30
batch_size = 16

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the training and validation datasets
train_dataset = CustomDataset(txt_file='train.txt', transform=transform)
val_dataset = CustomDataset(txt_file='val.txt', transform=transform)
test_dataset = CustomDataset(txt_file='test.txt', transform=transform)

train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size, shuffle=False)

####################################################################################################
# Initialize the ResNet model, loss function, and optimizer
num_classes = len(set(train_dataset.labels))  # Number of unique classes in the dataset
model = ResNet34(num_classes).to(device)
model_name = 'ResNet34'
os.makedirs(model_name, exist_ok=True)

latest_checkpoint, start_epoch = find_latest_checkpoint(model_name)

if latest_checkpoint:
    model.load_state_dict(torch.load(latest_checkpoint))
    print(f"Loaded checkpoint '{latest_checkpoint}' (epoch {start_epoch})")
else:
    start_epoch = 0

# Create log files
batch_loss_log_path = os.path.join(model_name, f'{model_name}-batch_loss_log.txt')
batch_loss_log = open(batch_loss_log_path, "w")
epoch_log_path = os.path.join(model_name, f'{model_name}-epoch_log.txt')
epoch_log = open(epoch_log_path, "w")
test_log_path = os.path.join(model_name, f'{model_name}-test_log.txt')
test_log = open(epoch_log_path, "w")

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(num_epochs):
    print(f'Epoch {epoch + 1}, start')
    model.train()
    running_loss = 0.0

    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
        
        if i % 10 == 9:    # print every 10 mini-batches
            batch_loss_log.write(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {loss.item():.3f}\n')
            running_loss = 0.0
    
    model_save_path = os.path.join(model_name, f'{model_name}_epoch{epoch+1:02}.pth')
    torch.save(model.state_dict(), model_save_path)
    
    # Validation loop
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)
    
    epoch_val_loss = val_loss / len(val_loader.dataset)
    epoch_val_acc = correct.double() / len(val_loader.dataset)
    epoch_log.write(f'Epoch {epoch + 1}, Validation Loss: {epoch_val_loss:.3f}, Validation Accuracy: {epoch_val_acc:.3f}\n')
    print(f'Validation Loss: {epoch_val_loss:.3f}, Validation Accuracy: {epoch_val_acc:.3f}')

model.eval()
test_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        test_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels.data)

epoch_test_loss = test_loss / len(test_loader.dataset)
epoch_test_acc = correct.double() / len(test_loader.dataset)
test_log.write(f'Epoch {num_epochs}, Test Loss: {epoch_test_loss:.3f}, Test Accuracy: {epoch_test_acc:.3f}\n')
print(f'Test Loss: {epoch_test_loss:.3f}, Test Accuracy: {epoch_test_acc:.3f}')

print('Finished Training')

# Close log files
batch_loss_log.close()
epoch_log.close()

Using device: cuda
Epoch 1, start


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Validation Loss: 2.998, Validation Accuracy: 0.173
Epoch 2, start


  return F.conv2d(input, weight, bias, self.stride,


Validation Loss: 2.675, Validation Accuracy: 0.289
Epoch 3, start
Validation Loss: 2.254, Validation Accuracy: 0.340
Epoch 4, start
Validation Loss: 1.980, Validation Accuracy: 0.416
Epoch 5, start
Validation Loss: 1.621, Validation Accuracy: 0.520
Epoch 6, start
Validation Loss: 1.458, Validation Accuracy: 0.567
Epoch 7, start
Validation Loss: 1.356, Validation Accuracy: 0.620
Epoch 8, start
Validation Loss: 1.455, Validation Accuracy: 0.580
Epoch 9, start
Validation Loss: 1.331, Validation Accuracy: 0.618
Epoch 10, start
Validation Loss: 3.020, Validation Accuracy: 0.531
Epoch 11, start
Validation Loss: 1.558, Validation Accuracy: 0.624
Epoch 12, start
Validation Loss: 1.517, Validation Accuracy: 0.613
Epoch 13, start
Validation Loss: 1.566, Validation Accuracy: 0.624
Epoch 14, start
Validation Loss: 1.701, Validation Accuracy: 0.618
Epoch 15, start
Validation Loss: 1.647, Validation Accuracy: 0.616
Epoch 16, start
Validation Loss: 1.830, Validation Accuracy: 0.602
Epoch 17, start
Va