In [8]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import sys
sys.path.append('..')
import module.dataloader as dataloader
from tqdm import tqdm

from model_architecture import Network

In [9]:
PROPOSAL_SIZE = (128, 128)
BATCH_SIZE = 50
BALANCE = 0.5

transform = transforms.Compose([
    transforms.ToPILImage(),  # Convert NumPy array to PIL Image
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomGrayscale(p=0.1),
    transforms.ToTensor(),    # Convert PIL Image to Tensor [0,1]
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Normalize the tensor
                       std=[0.229, 0.224, 0.225])
])

normalize_only = transforms.Compose([
    transforms.ToPILImage(),  # Convert NumPy array to PIL Image
    transforms.ToTensor(),    # Convert PIL Image to Tensor [0,1]
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Normalize the tensor
                       std=[0.229, 0.224, 0.225])
])


dataset_train = dataloader.PotholeDataset(
    '../Potholes/annotated-images/',
    '../Potholes/labeled_proposals/',
    '../Potholes/annotated-images/',
    transform=transform,
    proposals_per_batch=BATCH_SIZE,
    proposal_size=PROPOSAL_SIZE,
    balance=BALANCE,
    split='train'
)

dataset_val = dataloader.PotholeDataset(
    '../Potholes/annotated-images/',
    '../Potholes/labeled_proposals/',
    '../Potholes/annotated-images/',
    transform=normalize_only, 
    proposals_per_batch=BATCH_SIZE,
    proposal_size=PROPOSAL_SIZE,
    balance=BALANCE,
    split='val'
)
# dataset_test = dataloader.PotholeDataset('../Potholes/annotated-images/', '../Potholes/labeled_proposals/', '../Potholes/annotated-images/', proposals_per_batch=BATCH_SIZE, proposal_size=PROPOSAL_SIZE, balance=BALANCE, split='test')


train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=1, shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(dataset_val, batch_size=1, shuffle=False)
# test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=1, shuffle=False)


In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)

model = Network(proposal_size=PROPOSAL_SIZE)
model.apply(init_weights)
model.to(device)
#Initialize the optimizer
# optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

cuda


In [None]:
def train(model, optimizer, num_epochs=10):
    def loss_fun(output, target):
        return F.cross_entropy(output, target, reduction='mean', weight=torch.tensor([1.0, 2.0]).to(device))
    
    out_dict = {
              'train_acc': [],
              'val_acc': [],
              'train_loss': [],
              'val_loss': []}
  
    for epoch in tqdm(range(num_epochs), unit='epoch'):
        model.train()
        train_correct = 0
        train_loss = []
        # for minibatch_no, (data, target) in tqdm(enumerate(dataset), total=len(dataset)):
        for idx, (single_image_dict) in tqdm(enumerate(train_loader), total=len(train_loader)):
            # for proposal, label, proposal_image in zip(single_image_dict['proposals'], single_image_dict['labels'], single_image_dict['proposal_images']):
            proposal_image, label = single_image_dict['proposal_images'][0].to(device), single_image_dict['labels'][0].to(device)
            #Zero the gradients computed for each weight
            optimizer.zero_grad()
            #Forward pass your image through the network
            output = model(proposal_image)
            #Compute the loss
            loss = loss_fun(output, label)
            #Backward pass through the network
            loss.backward()
            #Update the weights
            optimizer.step()

            train_loss.append(loss.item())
            #Compute how many were correctly classified
            output = nn.functional.softmax(output)
            predicted = output.argmax(1)
            train_correct += (label==predicted).sum().cpu().item()

        #Comput the test accuracy
        val_loss = []
        val_correct = 0
        model.eval()
        for single_val_dict in val_loader:
            # for proposal_val, label_val, proposal_image_val in zip(single_val_dict['proposals'], single_val_dict['labels'], single_val_dict['proposal_images']):
            proposal_image_val, label_val = single_val_dict['proposal_images'][0].to(device), single_val_dict['labels'][0].to(device)
            with torch.no_grad():
                output = model(proposal_image_val)

            val_loss.append(loss_fun(output, label_val).cpu().item())
            output = nn.functional.softmax(output)
            predicted = output.argmax(1)
            val_correct += (label_val==predicted).sum().cpu().item()

        out_dict['train_acc'].append(train_correct/len(dataset_train)/BATCH_SIZE)
        out_dict['val_acc'].append(val_correct/len(dataset_val)/BATCH_SIZE)
        out_dict['train_loss'].append(np.mean(train_loss))
        out_dict['val_loss'].append(np.mean(val_loss))

        print(f"Loss train: {np.mean(train_loss):.3f}\t test: {np.mean(val_loss):.3f}\t",
              f"Accuracy train: {out_dict['train_acc'][-1]*100:.1f}%\t test: {out_dict['val_acc'][-1]*100:.1f}%") # Dividing by 5 because of the batch_size
        
    return out_dict

In [15]:
train(model, optimizer)

  labels = torch.tensor(labels[selected_indices], dtype=torch.long) # Class indices should be long for torch to work.
  labels = torch.tensor(labels[selected_indices], dtype=torch.long) # Class indices should be long for torch to work.

  labels = torch.tensor(labels[selected_indices], dtype=torch.long) # Class indices should be long for torch to work.
  output = nn.functional.softmax(output)
100%|██████████| 463/463 [00:16<00:00, 28.93it/s]
  labels = torch.tensor(labels[selected_indices], dtype=torch.long) # Class indices should be long for torch to work.
  output = nn.functional.softmax(output)
 10%|█         | 1/10 [00:20<03:07, 20.79s/epoch]

Loss train: 0.450	 test: 1.139	 Accuracy train: 71.9%	 test: 57.6%


100%|██████████| 463/463 [00:16<00:00, 28.69it/s]
 20%|██        | 2/10 [00:41<02:47, 20.90s/epoch]

Loss train: 0.431	 test: 1.299	 Accuracy train: 72.8%	 test: 55.1%


100%|██████████| 463/463 [00:15<00:00, 28.99it/s]
 30%|███       | 3/10 [01:02<02:25, 20.84s/epoch]

Loss train: 0.439	 test: 1.434	 Accuracy train: 72.6%	 test: 57.8%


100%|██████████| 463/463 [00:16<00:00, 28.90it/s]
 40%|████      | 4/10 [01:23<02:04, 20.81s/epoch]

Loss train: 0.435	 test: 1.276	 Accuracy train: 72.4%	 test: 55.5%


100%|██████████| 463/463 [00:15<00:00, 29.08it/s]
 50%|█████     | 5/10 [01:44<01:43, 20.77s/epoch]

Loss train: 0.425	 test: 1.003	 Accuracy train: 73.2%	 test: 58.6%


100%|██████████| 463/463 [00:16<00:00, 28.86it/s]
 60%|██████    | 6/10 [02:04<01:23, 20.79s/epoch]

Loss train: 0.420	 test: 1.641	 Accuracy train: 73.6%	 test: 54.8%


100%|██████████| 463/463 [00:15<00:00, 29.02it/s]
 70%|███████   | 7/10 [02:25<01:02, 20.76s/epoch]

Loss train: 0.421	 test: 1.380	 Accuracy train: 73.7%	 test: 55.8%


100%|██████████| 463/463 [00:16<00:00, 28.76it/s]
 80%|████████  | 8/10 [02:46<00:41, 20.81s/epoch]

Loss train: 0.415	 test: 1.873	 Accuracy train: 74.0%	 test: 54.7%


100%|██████████| 463/463 [00:16<00:00, 28.78it/s]
 90%|█████████ | 9/10 [03:07<00:20, 20.91s/epoch]

Loss train: 0.409	 test: 1.213	 Accuracy train: 73.9%	 test: 56.1%


100%|██████████| 463/463 [00:16<00:00, 28.80it/s]
100%|██████████| 10/10 [03:28<00:00, 20.90s/epoch]

Loss train: 0.424	 test: 1.563	 Accuracy train: 73.7%	 test: 56.3%





{'train_acc': [0.7189200863930886,
  0.7281209503239741,
  0.7263930885529157,
  0.7241900647948164,
  0.7315334773218143,
  0.7359827213822894,
  0.7367602591792657,
  0.7400431965442764,
  0.7390064794816414,
  0.7368898488120951],
 'val_acc': [0.5761616161616162,
  0.5507070707070707,
  0.5775757575757576,
  0.554949494949495,
  0.5860606060606061,
  0.5480808080808081,
  0.5577777777777778,
  0.5468686868686868,
  0.5612121212121213,
  0.5632323232323233],
 'train_loss': [0.4495132342141854,
  0.43061876235847596,
  0.43865889613365766,
  0.434578953934411,
  0.4252537651305322,
  0.42024923193210145,
  0.4210601620874961,
  0.4151010038270806,
  0.4093325950897281,
  0.4244507958159076],
 'val_loss': [1.1385483312787432,
  1.2992016257661763,
  1.433503128287166,
  1.275570178423265,
  1.0034209306191917,
  1.6413017074869136,
  1.3798924570131783,
  1.8726759173653342,
  1.2132604037872468,
  1.56289015469527]}

In [None]:
torch.save(model.state_dict(), 'pothole_detection_model.pth')