In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import sys
sys.path.append('..')
import module.dataloader as dataloader
from tqdm import tqdm
from model_architecture import Network
from model_architecture_improved import NetworkImproved, CustomCNN

In [None]:
PROPOSAL_SIZE = (128, 128)
BATCH_SIZE = 100
BALANCE = 0.5

transform = transforms.Compose([
    transforms.ToPILImage(),  # Convert NumPy array to PIL Image
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomGrayscale(p=0.1),
    transforms.ToTensor(),    # Convert PIL Image to Tensor [0,1]
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Normalize the tensor
                       std=[0.229, 0.224, 0.225])
])

normalize_only = transforms.Compose([
    transforms.ToPILImage(),  # Convert NumPy array to PIL Image
    transforms.ToTensor(),    # Convert PIL Image to Tensor [0,1]
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Normalize the tensor
                       std=[0.229, 0.224, 0.225])
])


dataset_train = dataloader.PotholeDataset(
    '../Potholes/annotated-images/',
    '../Potholes/labeled_proposals/',
    '../Potholes/annotated-images/',
    transform=transform,
    proposals_per_batch=BATCH_SIZE,
    proposal_size=PROPOSAL_SIZE,
    balance=BALANCE,
    split='train'
)

dataset_val = dataloader.PotholeDataset(
    '../Potholes/annotated-images/',
    '../Potholes/labeled_proposals/',
    '../Potholes/annotated-images/',
    transform=normalize_only, 
    proposals_per_batch=BATCH_SIZE,
    proposal_size=PROPOSAL_SIZE,
    balance=BALANCE,
    split='val'
)
# dataset_test = dataloader.PotholeDataset('../Potholes/annotated-images/', '../Potholes/labeled_proposals/', '../Potholes/annotated-images/', proposals_per_batch=BATCH_SIZE, proposal_size=PROPOSAL_SIZE, balance=BALANCE, split='test')


train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=1, shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(dataset_val, batch_size=1, shuffle=False)
# test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=1, shuffle=False)


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

def initialize_weights(model):
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
            nn.init.constant_(m.weight, 1)
            nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight)
            nn.init.constant_(m.bias, 0)



model = Network(proposal_size=PROPOSAL_SIZE)
model.apply(initialize_weights)
model.to(device)
#Initialize the optimizer
# optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)

cuda


In [None]:



def train(model, optimizer, num_epochs=10):
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)
    
    
    def loss_fun(output, target):
        pos_weight = torch.tensor([4.0]).to(device)
        return F.binary_cross_entropy_with_logits(output, target, pos_weight=pos_weight)
    
    out_dict = {
              'train_acc': [],
              'val_acc': [],
              'train_loss': [],
              'val_loss': []}
  
    for epoch in tqdm(range(num_epochs), unit='epoch'):
        model.train()
        train_correct = 0
        train_loss = []
        # for minibatch_no, (data, target) in tqdm(enumerate(dataset), total=len(dataset)):
        for idx, (single_image_dict) in tqdm(enumerate(train_loader), total=len(train_loader)):
            # for proposal, label, proposal_image in zip(single_image_dict['proposals'], single_image_dict['labels'], single_image_dict['proposal_images']):
            proposal_image, label = single_image_dict['proposal_images'][0].to(device), single_image_dict['labels'][0].to(device)
            label = label.unsqueeze(1).float()
            #Zero the gradients computed for each weight
            optimizer.zero_grad()
            #Forward pass your image through the network
            output = model(proposal_image)
            #Compute the loss
            loss = loss_fun(output, label)
            #Backward pass through the network
            loss.backward()
            #Update the weights
            optimizer.step()

            train_loss.append(loss.item())
            #Compute how many were correctly classified
            output = nn.functional.sigmoid(output)
            predicted = output > 0.5
            train_correct += (label==predicted).sum().cpu().item() / len(label)

        lr_scheduler.step()
        
        #Comput the test accuracy
        val_loss = []
        val_correct = 0
        model.eval()
        for single_val_dict in val_loader:
            # for proposal_val, label_val, proposal_image_val in zip(single_val_dict['proposals'], single_val_dict['labels'], single_val_dict['proposal_images']):
            proposal_image_val, label_val = single_val_dict['proposal_images'][0].to(device), single_val_dict['labels'][0].to(device)
            label_val = label_val.unsqueeze(1).float()
            
            with torch.no_grad():
                output = model(proposal_image_val)

            val_loss.append(loss_fun(output, label_val).cpu().item())
            output = nn.functional.sigmoid(output)
            predicted = output > 0.5
            val_correct += (label_val==predicted).sum().cpu().item() / len(label_val)

        out_dict['train_acc'].append(train_correct/len(dataset_train))
        out_dict['val_acc'].append(val_correct/len(dataset_val))
        out_dict['train_loss'].append(np.mean(train_loss))
        out_dict['val_loss'].append(np.mean(val_loss))

        print(f"Loss train: {np.mean(train_loss):.3f}\t test: {np.mean(val_loss):.3f}\t",
              f"Accuracy train: {out_dict['train_acc'][-1]*100:.1f}%\t test: {out_dict['val_acc'][-1]*100:.1f}%") # Dividing by 5 because of the batch_size
        
    return out_dict

In [7]:
train(model, optimizer)

100%|██████████| 463/463 [00:35<00:00, 13.09it/s]
 10%|█         | 1/10 [00:48<07:15, 48.34s/epoch]

Loss train: 0.745	 test: 1.284	 Accuracy train: 76.5%	 test: 73.7%


100%|██████████| 463/463 [00:36<00:00, 12.77it/s]
 20%|██        | 2/10 [01:37<06:29, 48.73s/epoch]

Loss train: 0.660	 test: 1.256	 Accuracy train: 78.8%	 test: 76.0%


100%|██████████| 463/463 [00:36<00:00, 12.83it/s]
 30%|███       | 3/10 [02:26<05:41, 48.79s/epoch]

Loss train: 0.635	 test: 1.258	 Accuracy train: 79.4%	 test: 75.9%


100%|██████████| 463/463 [00:36<00:00, 12.80it/s]
 40%|████      | 4/10 [03:15<04:53, 48.89s/epoch]

Loss train: 0.635	 test: 1.378	 Accuracy train: 79.6%	 test: 76.1%


100%|██████████| 463/463 [00:36<00:00, 12.83it/s]
 50%|█████     | 5/10 [04:04<04:04, 48.91s/epoch]

Loss train: 0.627	 test: 1.222	 Accuracy train: 79.5%	 test: 76.3%


100%|██████████| 463/463 [00:36<00:00, 12.72it/s]
 60%|██████    | 6/10 [04:53<03:16, 49.08s/epoch]

Loss train: 0.629	 test: 1.261	 Accuracy train: 79.2%	 test: 75.6%


100%|██████████| 463/463 [00:35<00:00, 13.10it/s]
 70%|███████   | 7/10 [05:41<02:26, 48.85s/epoch]

Loss train: 0.632	 test: 1.219	 Accuracy train: 79.4%	 test: 75.5%


100%|██████████| 463/463 [00:35<00:00, 13.07it/s]
 80%|████████  | 8/10 [06:29<01:37, 48.58s/epoch]

Loss train: 0.630	 test: 1.001	 Accuracy train: 79.6%	 test: 75.4%


100%|██████████| 463/463 [00:34<00:00, 13.43it/s]
 90%|█████████ | 9/10 [07:17<00:48, 48.29s/epoch]

Loss train: 0.626	 test: 1.076	 Accuracy train: 79.6%	 test: 76.4%


100%|██████████| 463/463 [00:36<00:00, 12.72it/s]
100%|██████████| 10/10 [08:07<00:00, 48.72s/epoch]

Loss train: 0.632	 test: 1.085	 Accuracy train: 79.3%	 test: 76.6%





{'train_acc': [0.764614431945842,
  0.788123208879375,
  0.7942306250526412,
  0.7960226581720582,
  0.794760249939252,
  0.791658771587995,
  0.7942055819650233,
  0.7960749125770411,
  0.7964621019673941,
  0.7928781214550903],
 'val_acc': [0.73681853878471,
  0.7598549255973275,
  0.7586481425699927,
  0.7610626320596421,
  0.7632519994444198,
  0.7563805245412273,
  0.7551878956841381,
  0.7536868752565169,
  0.7636374655830314,
  0.7661844605639295],
 'train_loss': [0.744725150500724,
  0.6601844908866717,
  0.6349266919731836,
  0.6352231899682698,
  0.627282217692093,
  0.6292411004621575,
  0.631784806241227,
  0.6299278192527866,
  0.6256811970253485,
  0.6318037473228528],
 'val_loss': [1.283623400962714,
  1.2564314004748758,
  1.2584266275769533,
  1.3777752044526013,
  1.2215861396657095,
  1.2610091237407741,
  1.2190603114137746,
  1.0012043223838614,
  1.0757801502642006,
  1.0847427530421152]}

In [8]:
torch.save(model.state_dict(), 'model_2.pth')