In [2]:
import numpy as np
import pandas as pd 
import cv2
import os
import tqdm
import glob

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.utils import shuffle

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class OralCancerDataset(Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""

    def __init__(self, path_to_images, path_to_csv = None, validation=False, val_ratio=1.0):
        
        # Passing the path to the train csv file reads the data from the csv with the labels
        # If None is passes insted only the images in the image folder is loaded (wich is useful for the test set)
        
        self.path_to_images = path_to_images
        self.path_to_csv = path_to_csv
        self.v=validation
        self.v_r=val_ratio

        if self.path_to_csv is not None:
            dat=pd.read_csv(self.path_to_csv)
            dat=shuffle(dat)
            dat.reset_index(inplace=True, drop=True)
            val,tr=np.split(dat,[int(len(dat)*val_ratio)]) #train val split
            
            if self.v==False:
                #self.df = pd.read_csv(self.path_to_csv)
                self.df=tr

            elif self.v==True:
                self.df=val
    
    def __len__(self):
        if self.path_to_csv:
            return len(self.df)
        else:
            return len(glob.glob(self.path_to_images + '/*.jpg'))
    
    def __getitem__(self, idx):
        
        if self.path_to_csv:
            data = self.df.iloc[idx]
            #print(data['Name'])
            image = cv2.imread(os.path.join(self.path_to_images, data['Name']), -1)
            label = data['Diagnosis']
            
            # You can input torchvision (or other) transforms and directly augment the data
            # if self.transform:
            #    image = self.transform(image)
            # ..
            
            return image, label
            
        else:
            name = 'image_' + str(idx) + '.jpg'
            image = cv2.imread(os.path.join(self.path_to_images, name), -1)
            
            return image, name

In [3]:
class VGG_4Cancer_Classification(nn.Module):
    def __init__(self, num_classes=2):
        super(VGG_4Cancer_Classification, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU())
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU())
        self.layer4 = nn.Sequential(

            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer5 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())
        self.layer6 = nn.Sequential(
            #nn.Dropout(0.2),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        # self.layer7 = nn.Sequential(
        #     nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(256),
        #     nn.ReLU()
        #     )
        # self.layer8 = nn.Sequential(
        #     nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(256),
        #     nn.ReLU(),
        #     nn.MaxPool2d(kernel_size = 2, stride = 2)
        #     )
        # self.layer9 = nn.Sequential(
        #     nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(512),
        #     nn.ReLU())
        # self.layer10 = nn.Sequential(
        #     nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(512),
        #     nn.ReLU(),
        #     nn.MaxPool2d(kernel_size = 2, stride = 2))
        # self.layer11 = nn.Sequential(
        #     nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(512),
        #     nn.ReLU())
        # self.layer12 = nn.Sequential(
        #     nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(512),
        #     nn.ReLU())
        # self.layer13 = nn.Sequential(
        #     nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
        #     nn.BatchNorm2d(512),
        #     nn.ReLU(),
        #     nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            #nn.Linear(7*7*512, 4096),
            nn.Linear(16*16*128, 4096),
            nn.ReLU())
        # self.fc1 = nn.Sequential(
        #     nn.Dropout(0.5),
        #     nn.Linear(32000, 4096),
        #     nn.ReLU())
        self.fc2= nn.Sequential(

            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        # out = self.layer7(out)
        # out = self.layer8(out)
        # out = self.layer9(out)
        # out = self.layer10(out)
        # out = self.layer11(out)
        # out = self.layer12(out)
        # out = self.layer13(out)
        #print(out.shape)
        out = out.reshape(out.size(0), -1)
        #print(out.shape)
        out = self.fc(out)
        #out = self.fc1(out)
        out = self.fc2(out)
        return out

In [4]:
path_to_csv = 'Data/train.csv'
path_to_train_images = 'Data/train'
path_to_test_images = 'Data/test'


train_dataset = OralCancerDataset(path_to_train_images, path_to_csv, validation=False, val_ratio=0.3)
val_dataset = OralCancerDataset(path_to_train_images, path_to_csv, validation=True, val_ratio=0.3)

test_dataset = OralCancerDataset(path_to_test_images)

train_dataloader = DataLoader(train_dataset,
batch_size=32,
shuffle=True,
num_workers=0 ) #* (1+torch.cuda.device_count()))

val_dataloader= DataLoader(val_dataset,
batch_size=32,
shuffle=True,
num_workers=0 ) #* (1+torch.cuda.device_count()))


test_dataloader = DataLoader(test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0 ) #* (1+torch.cuda.device_count()))

In [5]:
#len(train_dataloader)*32 +
len(val_dataloader)*32 

14688

In [None]:
#HYPERPARAMS

num_classes = 2
num_epochs = 25
batch_size = 32
learning_rate = 0.005
l1_lambda = 0.001
weight_decay_=0.001 #L2 Loss

model = VGG_4Cancer_Classification(num_classes).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = weight_decay_, momentum = 0.95)  



In [None]:
# import torchsummary
# torchsummary.summary(model,(3,128,128))

In [None]:
#TRAINING

total_step = len(train_dataloader)+len(val_dataloader)
df_train={'Loss':[], 'Accuracy':[]}
df_val={'Loss':[], 'Accuracy':[]}

for epoch in range(num_epochs):
    print ('Epoch [{}/{}]' 
                   .format(epoch+1, num_epochs))
    i=0
    correct = 0
    total = 0
    for (images, labels) in tqdm.tqdm(train_dataloader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        images=images.permute(0,3,1,2)
        # Forward pass
        outputs = model(images.float())
        loss = criterion(outputs, labels)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        #L1 loss
       
        l1_norm = sum(abs(p).sum()
                  for p in model.parameters())

        loss = loss + l1_lambda * l1_norm

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        i=i+1

    df_train['Loss'].append(loss.item())
    df_train['Accuracy'].append(100 * correct / total)
    print ('Epoch [{}/{}], Step [{}/{}], Accuracy: {:0.4f}; Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step,100 * correct / total, loss.item()))
            
    # Validation
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in val_dataloader:
            images = images.to(device)
            labels = labels.to(device)

            images=images.permute(0,3,1,2)
            outputs = model(images.float())
            loss_val = criterion(outputs, labels)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

        df_val['Loss'].append(loss_val.item())
        df_val['Accuracy'].append(100 * correct / total)
        print('Accuracy of the network on the {} validation images: {:.4f} % ; Loss - {:.4f}'.format(len(val_dataloader)*32, 100 * correct / total,loss_val.item())) 


In [None]:
#SAVING EVAL DATA FOR INFERENCE
train_summary = pd.DataFrame(df_train)
tr_filename='Inference/Trial_2/train.csv'
train_summary.to_csv(tr_filename, index = False)
print("File {} saved successfully!".format(tr_filename))

val_summary = pd.DataFrame(df_val)
val_filename='Inference/Trial_2/val.csv'
val_summary.to_csv(val_filename, index = False)
print("File {} saved successfully!".format(val_filename))

In [None]:
# Test Prediction
df={'Name':[], 'Diagnosis':[]}
i=0
with torch.no_grad():
    correct = 0
    total = 0
    for images, names in test_dataloader:
        images = images.to(device)

        images=images.permute(0,3,1,2)
        outputs = model(images.float()).cpu()
        _, predicted = torch.max(outputs.data, 1)

        df['Name'].extend(names)
        df['Diagnosis'].extend(predicted.detach().numpy())
        i+=1
        print('Test images {}/{} done.'.format(i*32,len(test_dataloader)*32),end='\r') 

test_preds = pd.DataFrame(df)
test_filename='test_preds_2.csv'

test_preds.to_csv(test_filename, index = False)
print("File {} saved successfully!".format(test_filename))


In [None]:
#SAVING MODEL DATA 
model_path="Model/final_model_2.pt"
torch.save(model,model_path)
print("Model saved successfully!\nPath: {}".format(model_path))