In [None]:

# mount drive https://datascience.stackexchange.com/questions/29480/uploading-images-folder-from-my-system-into-google-colab
# login with your google account and type authorization code to mount on your google drive.
import os
from google.colab import drive
drive.mount('/gdrive')




Mounted at /gdrive


In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt
from pandas.core.common import flatten
import copy
import numpy as np
import random

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

import glob
from tqdm import tqdm

In [None]:
# Annotate and create CSV
import pandas as pd
import os
import torch
root2= '/gdrive/MyDrive/project/car-damage-dataset/data1a'
training = root2+'/training'
validation = root2+'/validation'
damaged = '/00-damage/'
whole = '/01-whole/'

device = ("cuda" if torch.cuda.is_available() else "cpu")

train_df = pd.DataFrame(columns=["img_name","label"])
train_df_2 = pd.DataFrame(columns=["img_name","label"])

max_idx=0
max_idx_2=0
train_df["img_name"] = os.listdir(training+whole)
for idx, i in enumerate(os.listdir(training+whole)):
    train_df['label'][idx] = 1
    # if idx>max_idx:
    #   max_idx = idx

# print(train_df)

train_df_2["img_name"] = os.listdir(training+damaged)
for idx, i in enumerate(os.listdir(training+damaged)):
    train_df_2['label'][idx] = 0


# print(train_df_2)
frames = [train_df,train_df_2]
result_train_df = pd.concat(frames)
result_train_df=result_train_df.sample(frac=1).reset_index(drop=True)
print(result_train_df)
result_train_df.to_csv (r'train_csv.csv', index = False, header=True)


       img_name label
0     0587.JPEG     0
1     0399.JPEG     0
2     0900.JPEG     1
3      0179.jpg     1
4     0890.JPEG     0
...         ...   ...
1835  0450.JPEG     0
1836  0886.JPEG     0
1837  0918.JPEG     1
1838  0479.JPEG     0
1839  0874.JPEG     1

[1840 rows x 2 columns]


In [None]:
# Create a custom Dataset class
from torch.utils.data import Dataset
import pandas as pd
import os
from PIL import Image
import torch

class WholeorDamaged(Dataset):
    def __init__(self, root_dir_whole_train, root_dir_damaged_train, annotation_file, transform=None):
        self.root_dir1 = root_dir_whole_train
        self.root_dir2 = root_dir_damaged_train

        self.annotations = pd.read_csv(annotation_file)
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_id = self.annotations.iloc[index, 0]
        value = self.annotations.iloc[index, 1]
        if value ==1:
          root_dir = self.root_dir1
        else:
          root_dir = self.root_dir2
        img = Image.open(os.path.join(root_dir, img_id)).convert("RGB")
        y_label = torch.tensor(float(self.annotations.iloc[index, 1]))

        if self.transform is not None:
            img = self.transform(img)

        return (img, y_label)

In [None]:
# do not run below

In [None]:
from torchvision.models import resnet50, ResNet50_Weights

import torch.nn as nn
import torchvision.models as models

In [None]:
# Model

class CNN(nn.Module):
    def __init__(self, train_CNN=False, num_classes=1):
        super(CNN, self).__init__()
        self.train_CNN = train_CNN
        self.inception = models.inception_v3(pretrained=True, aux_logits=True)
        self.inception.dropout=nn.Dropout(0.3)
        self.inception.fc = nn.Linear(self.inception.fc.in_features, num_classes)
        
        self.dropout = nn.Dropout(0.4)
        self.sigmoid = nn.Sigmoid()

    def forward(self, images):
        features = self.inception(images)
        features_2 = self.dropout(features)
        return self.sigmoid(features_2).squeeze(1)

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

from tqdm import tqdm
device = ("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
transform = transforms.Compose(
        [
            transforms.Resize((356, 356)),
            transforms.RandomCrop((299, 299)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ]
    )

In [None]:
num_epochs = 40
learning_rate = 0.00002
train_CNN = False
batch_size = 64
shuffle = True
pin_memory = True
num_workers = 1

In [None]:
dataset = WholeorDamaged(training+whole,training+damaged,"train_csv.csv",transform=transform)
train_set, validation_set = torch.utils.data.random_split(dataset,[1540,300])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers,pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers, pin_memory=pin_memory)

In [None]:
model = CNN().to(device)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for name, param in model.inception.named_parameters():
    if "fc.weight" in name or "fc.bias" in name:
        param.requires_grad = True
    else:
        param.requires_grad = train_CNN

In [None]:
max_accuracy = 0.89

In [None]:
def check_accuracy(loader, model):
    if loader == train_loader:
        print("Checking accuracy on training data")
    else:
        print("Checking accuracy on validation data")

    num_correct = 0
    num_samples = 0
    model.eval()
    root = '/gdrive/MyDrive/project/car_damaged_or_not/'
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
            ratio = float(num_correct)/float(num_samples)
            if loader != train_loader:
              global max_accuracy
              if ratio>= max_accuracy:
                max_accuracy = ratio
                print('max accuracy is:', max_accuracy)
                torch.save(model.state_dict(), root+'model_weights.pth')
    return f"{float(num_correct)/float(num_samples)*100:.2f}"
    print(f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}")
    model.train()

In [None]:
def train():
    model.train()
    for epoch in range(num_epochs):
        loop = tqdm(train_loader, total = len(train_loader), leave = True)
        if epoch % 5 == 0:
            loop.set_postfix(val_acc = check_accuracy(validation_loader, model))
        for imgs, labels in loop:
            imgs = imgs.to(device)
            labels = labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
            loop.set_postfix(loss = loss.item())


In [None]:
if __name__ == "__main__":
    train()

In [None]:
print(max_accuracy)

0.8958333333333334


In [None]:
train()