In [None]:
import os
import json
import pandas as pd
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
import torch

# Custom class for recieving data and transforming it
class SkinCancerDataset(Dataset):

  def __init__(self, data_dict, data_dir, transforms = None):
    self.info = pd.DataFrame(data_dict)
    self.root = data_dir
    self.transforms = transforms

  def __len__(self):
    return len(self.info)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
        idx = idx.tolist()
    img_name = os.path.join(self.root,
                            self.info.iloc[idx, 0])+".jpg"
    image = Image.open(img_name).convert("RGB")
    if self.transforms is not None:
        img_as_tensor = self.transforms(image)
    label = self.info.iloc[idx, 1]

    return (img_as_tensor, torch.tensor(int(label)))

In [None]:
# Connect to Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install kaggle
import os
os.environ['KAGGLE_CONFIG_DIR'] = '/content/drive/MyDrive/kaggle' # Folder to download dataset into



In [None]:
!kaggle datasets download -d fanconic/skin-cancer-malignant-vs-benign

Traceback (most recent call last):
  File "/usr/local/bin/kaggle", line 5, in <module>
    from kaggle.cli import main
  File "/usr/local/lib/python3.10/dist-packages/kaggle/__init__.py", line 23, in <module>
    api.authenticate()
  File "/usr/local/lib/python3.10/dist-packages/kaggle/api/kaggle_api_extended.py", line 403, in authenticate
    raise IOError('Could not find {}. Make sure it\'s located in'
OSError: Could not find kaggle.json. Make sure it's located in /root/.kaggle. Or use the environment method.


In [None]:
import zipfile

# Define the path to zip file
file_path = '/content/skin-cancer-malignant-vs-benign.zip'

# Unzip the file to a specific destination
with zipfile.ZipFile(file_path, 'r') as zip_ref:
    zip_ref.extractall('/content/drive/MyDrive/kaggle')

In [None]:
import os

# folder path
dir_path = r'/content/drive/MyDrive/kaggle/data/train/benign'
b_count = 0
# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        b_count += 1
print('Benign File count:', b_count)

# folder path
dir_path = r'/content/drive/MyDrive/kaggle/data/train/malignant'
m_count = 0
# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        m_count += 1
print('Malignant File count:', m_count)


Benign File count: 0
Malignant File count: 1


In [None]:
import os

# folder path
dir_path = r'/content/drive/MyDrive/kaggle/data/test/benign'
b_count_test = 0
# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        b_count_test += 1
print('Benign File count:', b_count_test)

# folder path
dir_path = r'/content/drive/MyDrive/kaggle/data/test/malignant'
m_count_test = 0
# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        m_count_test += 1
print('Malignant File count:', m_count_test)

Benign File count: 0
Malignant File count: 1


In [None]:
# import requests
# import zipfile
# from pathlib import Path

# # Setup path to data folder
# data_path = Path("data/")
# image_path = data_path / "skin-cancer-malignant-vs-benign"

# # If the image folder doesn't exist, download it and prepare it...
# if image_path.is_dir():
#     print(f"{image_path} directory exists.")
# else:

#     !kaggle datasets download -d fanconic/skin-cancer-malignant-vs-benign
#     print(f"Did not find {image_path} directory, creating one...")
#     image_path.mkdir(parents=True, exist_ok=True)

#     # # Download pizza, steak, sushi data
#     # with open(data_path / "skin-cancer-malignant-vs-benign.zip", "wb") as f:
#     #     f.write('skin-cancer-malignant-vs-benign.zip')

#     # Unzip pizza, steak, sushi data
#     with zipfile.ZipFile(data_path / "skin-cancer-malignant-vs-benign.zip", "r") as zip_ref:
#         print("Unzipping data...")
#         zip_ref.extractall(image_path)

# # # Unzip pizza, steak, sushi data
# # with zipfile.ZipFile(image_path / "skin-cancer-malignant-vs-benign.zip", "r") as zip_ref:
# #     print("Unzipping data...")
# #     zip_ref.extractall(image_path)

data_path = "/content/drive/MyDrive/kaggle/data"

In [None]:
import os
import csv
import glob

# Rename raw benign training data
os.chdir(r"/content/drive/MyDrive/kaggle/data/train/benign")
for index, oldfile in enumerate(glob.glob("*.jpg"), start=1):
    newfile = 'benign-{}.jpg'.format(index)
    os.rename (oldfile, newfile)

# Rename raw malignant training data
os.chdir(r"/content/drive/MyDrive/kaggle/data/train/malignant")
for index, oldfile in enumerate(glob.glob("*.jpg"), start=1):
    newfile = 'malignant-{}.jpg'.format(index)
    os.rename (oldfile, newfile)

In [None]:
import os
import csv
import glob

# Rename raw benign testing data
os.chdir(r"/content/drive/MyDrive/kaggle/data/test/benign")
for index, oldfile in enumerate(glob.glob("*.jpg"), start=1):
    newfile = 'benign-{}.jpg'.format(index)
    os.rename (oldfile, newfile)

# Rename raw malignant testing data
os.chdir(r"/content/drive/MyDrive/kaggle/data/test/malignant")
for index, oldfile in enumerate(glob.glob("*.jpg"), start=1):
    newfile = 'malignant-{}.jpg'.format(index)
    os.rename (oldfile, newfile)

In [None]:
os.listdir("/content/drive/MyDrive/kaggle/data/train/malignant")

['malignant-1.jpg',
 'malignant-2.jpg',
 'malignant-3.jpg',
 'malignant-4.jpg',
 'malignant-5.jpg',
 'malignant-6.jpg',
 'malignant-7.jpg',
 'malignant-8.jpg',
 'malignant-9.jpg',
 'malignant-10.jpg',
 'malignant-11.jpg',
 'malignant-12.jpg',
 'malignant-13.jpg',
 'malignant-14.jpg',
 'malignant-15.jpg',
 'malignant-16.jpg',
 'malignant-17.jpg',
 'malignant-18.jpg',
 'malignant-19.jpg',
 'malignant-20.jpg',
 'malignant-21.jpg',
 'malignant-22.jpg',
 'malignant-23.jpg',
 'malignant-24.jpg',
 'malignant-25.jpg',
 'malignant-26.jpg',
 'malignant-27.jpg',
 'malignant-28.jpg',
 'malignant-29.jpg',
 'malignant-30.jpg',
 'malignant-31.jpg',
 'malignant-32.jpg',
 'malignant-33.jpg',
 'malignant-34.jpg',
 'malignant-35.jpg',
 'malignant-36.jpg',
 'malignant-37.jpg',
 'malignant-38.jpg',
 'malignant-39.jpg',
 'malignant-40.jpg',
 'malignant-41.jpg',
 'malignant-42.jpg',
 'malignant-43.jpg',
 'malignant-44.jpg',
 'malignant-45.jpg',
 'malignant-46.jpg',
 'malignant-47.jpg',
 'malignant-48.jpg',
 

In [None]:
os.listdir("/content/drive/MyDrive/kaggle/data/train/benign")

['benign-1.jpg',
 'benign-2.jpg',
 'benign-3.jpg',
 'benign-4.jpg',
 'benign-5.jpg',
 'benign-6.jpg',
 'benign-7.jpg',
 'benign-8.jpg',
 'benign-9.jpg',
 'benign-10.jpg',
 'benign-11.jpg',
 'benign-12.jpg',
 'benign-13.jpg',
 'benign-14.jpg',
 'benign-15.jpg',
 'benign-16.jpg',
 'benign-17.jpg',
 'benign-18.jpg',
 'benign-19.jpg',
 'benign-20.jpg',
 'benign-21.jpg',
 'benign-22.jpg',
 'benign-23.jpg',
 'benign-24.jpg',
 'benign-25.jpg',
 'benign-26.jpg',
 'benign-27.jpg',
 'benign-28.jpg',
 'benign-29.jpg',
 'benign-30.jpg',
 'benign-31.jpg',
 'benign-32.jpg',
 'benign-33.jpg',
 'benign-34.jpg',
 'benign-35.jpg',
 'benign-36.jpg',
 'benign-37.jpg',
 'benign-38.jpg',
 'benign-39.jpg',
 'benign-40.jpg',
 'benign-41.jpg',
 'benign-42.jpg',
 'benign-43.jpg',
 'benign-44.jpg',
 'benign-45.jpg',
 'benign-46.jpg',
 'benign-47.jpg',
 'benign-48.jpg',
 'benign-49.jpg',
 'benign-50.jpg',
 'benign-51.jpg',
 'benign-52.jpg',
 'benign-53.jpg',
 'benign-54.jpg',
 'benign-55.jpg',
 'benign-56.jpg',
 

In [None]:
os.listdir("/content/drive/MyDrive/kaggle/data/test/malignant")

['malignant-1.jpg',
 'malignant-2.jpg',
 'malignant-3.jpg',
 'malignant-4.jpg',
 'malignant-5.jpg',
 'malignant-6.jpg',
 'malignant-7.jpg',
 'malignant-8.jpg',
 'malignant-9.jpg',
 'malignant-10.jpg',
 'malignant-11.jpg',
 'malignant-12.jpg',
 'malignant-13.jpg',
 'malignant-14.jpg',
 'malignant-15.jpg',
 'malignant-16.jpg',
 'malignant-17.jpg',
 'malignant-18.jpg',
 'malignant-19.jpg',
 'malignant-20.jpg',
 'malignant-21.jpg',
 'malignant-22.jpg',
 'malignant-23.jpg',
 'malignant-24.jpg',
 'malignant-25.jpg',
 'malignant-26.jpg',
 'malignant-27.jpg',
 'malignant-28.jpg',
 'malignant-29.jpg',
 'malignant-30.jpg',
 'malignant-31.jpg',
 'malignant-32.jpg',
 'malignant-33.jpg',
 'malignant-34.jpg',
 'malignant-35.jpg',
 'malignant-36.jpg',
 'malignant-37.jpg',
 'malignant-38.jpg',
 'malignant-39.jpg',
 'malignant-40.jpg',
 'malignant-41.jpg',
 'malignant-42.jpg',
 'malignant-43.jpg',
 'malignant-44.jpg',
 'malignant-45.jpg',
 'malignant-46.jpg',
 'malignant-47.jpg',
 'malignant-48.jpg',
 

In [None]:
labels = {} # Labels for training images
for r in range(1, b_count + 1):
    labels["benign-{}".format(r)] = "0"

In [None]:
for r in range(1, m_count + 1):
    labels["malignant-{}".format(r)] = "1"

In [None]:
labels_test = {} # Labels for testing images
for r in range(1, b_count_test + 1):
    labels_test["benign-{}".format(r)] = "0"

In [None]:
for r in range(1, m_count_test + 1):
    labels_test["malignant-{}".format(r)] = "1"

In [None]:
labels["benign-1440"]

'0'

In [None]:
labels["malignant-1"]

'1'

In [None]:
import json

with open('labels.json', 'w') as f:
    json.dump(labels, f) # Dump dictionary into training_labels file

NameError: ignored

In [None]:
import json

with open("/content/drive/MyDrive/kaggle/c_data_test/labels_test.json", "w") as f:
  json.dump(labels_test, f) # Dump dictionary into testing_labels_file

In [None]:
with open('/content/drive/MyDrive/kaggle/c_data/labels.json') as f:
    targets = json.load(f)
# targets

In [None]:
with open('/content/drive/MyDrive/kaggle/c_data_test/labels_test.json') as f:
    targets = json.load(f)

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

## Defining the NN architecture

# Load the pretrained VGG16 architechture from pytorch
vgg16 = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

penultimate_layer = nn.Linear(4096, 512)
last_layer = nn.Linear(512, 2) # Last layer only 0 or 1

#Changing layers
vgg16.classifier[3] = penultimate_layer
vgg16.classifier[6] = last_layer

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:06<00:00, 79.2MB/s]


In [None]:
import numpy as np
import json
import torch.utils.data
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader

# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20
# percentage of training set to use as validation
valid_size = 0.2

# test_size = 0.1

In [None]:
import os

# Directory
directory = "c_data"

# Parent Directory path
parent_dir = "/content/drive/MyDrive/kaggle"

# Path
path = os.path.join(parent_dir, directory)

# Create the directory
os.mkdir(path)
print("Directory '% s' created" % directory)

FileExistsError: ignored

In [None]:
import os

# Directory
directory = "c_data_test"

# Parent Directory path
parent_dir = "/content/drive/MyDrive/kaggle"

# Path
path = os.path.join(parent_dir, directory)

# Create the directory
# 'GeeksForGeeks' in
# '/home / User / Documents'
os.mkdir(path)
print("Directory '% s' created" % directory)

Directory 'c_data_test' created


In [None]:
os.listdir("/content/drive/MyDrive/kaggle/c_data_test")

[]

In [None]:
import os
import shutil

# Moving being and malignant training images into one file for ease of access

source_folder = r"/content/drive/MyDrive/kaggle/data/train/benign/"
destination_folder = r"/content/drive/MyDrive/kaggle/c_data/"

# fetch all files
for file_name in os.listdir(source_folder):
    # construct full file path
    source = source_folder + file_name
    destination = destination_folder + file_name
    # move only files
    if os.path.isfile(source):
        shutil.move(source, destination)
        print('Moved:', file_name)

source_folder = r"/content/drive/MyDrive/kaggle/data/train/malignant/"
destination_folder = r"/content/drive/MyDrive/kaggle/c_data/"

# fetch all files
for file_name in os.listdir(source_folder):
    # construct full file path
    source = source_folder + file_name
    destination = destination_folder + file_name
    # move only files
    if os.path.isfile(source):
        shutil.move(source, destination)
        print('Moved:', file_name)

Moved: benign-1.jpg
Moved: benign-2.jpg
Moved: benign-3.jpg
Moved: benign-4.jpg
Moved: benign-5.jpg
Moved: benign-6.jpg
Moved: benign-7.jpg
Moved: benign-8.jpg
Moved: benign-9.jpg
Moved: benign-10.jpg
Moved: benign-11.jpg
Moved: benign-12.jpg
Moved: benign-13.jpg
Moved: benign-14.jpg
Moved: benign-15.jpg
Moved: benign-16.jpg
Moved: benign-17.jpg
Moved: benign-18.jpg
Moved: benign-19.jpg
Moved: benign-20.jpg
Moved: benign-21.jpg
Moved: benign-22.jpg
Moved: benign-23.jpg
Moved: benign-24.jpg
Moved: benign-25.jpg
Moved: benign-26.jpg
Moved: benign-27.jpg
Moved: benign-28.jpg
Moved: benign-29.jpg
Moved: benign-30.jpg
Moved: benign-31.jpg
Moved: benign-32.jpg
Moved: benign-33.jpg
Moved: benign-34.jpg
Moved: benign-35.jpg
Moved: benign-36.jpg
Moved: benign-37.jpg
Moved: benign-38.jpg
Moved: benign-39.jpg
Moved: benign-40.jpg
Moved: benign-41.jpg
Moved: benign-42.jpg
Moved: benign-43.jpg
Moved: benign-44.jpg
Moved: benign-45.jpg
Moved: benign-46.jpg
Moved: benign-47.jpg
Moved: benign-48.jpg
M

In [None]:
import os
import shutil

# Moving being and malignant testing images into one file for ease of access

source_folder = r"/content/drive/MyDrive/kaggle/data/test/benign/"
destination_folder = r"/content/drive/MyDrive/kaggle/c_data_test/"

# fetch all files
for file_name in os.listdir(source_folder):
    # construct full file path
    source = source_folder + file_name
    destination = destination_folder + file_name
    # move only files
    if os.path.isfile(source):
        shutil.move(source, destination)
        print('Moved:', file_name)

source_folder = r"/content/drive/MyDrive/kaggle/data/test/malignant/"
destination_folder = r"/content/drive/MyDrive/kaggle/c_data_test/"

# fetch all files
for file_name in os.listdir(source_folder):
    # construct full file path
    source = source_folder + file_name
    destination = destination_folder + file_name
    # move only files
    if os.path.isfile(source):
        shutil.move(source, destination)
        print('Moved:', file_name)

Moved: benign-1.jpg
Moved: benign-2.jpg
Moved: benign-3.jpg
Moved: benign-4.jpg
Moved: benign-5.jpg
Moved: benign-6.jpg
Moved: benign-7.jpg
Moved: benign-8.jpg
Moved: benign-9.jpg
Moved: benign-10.jpg
Moved: benign-11.jpg
Moved: benign-12.jpg
Moved: benign-13.jpg
Moved: benign-14.jpg
Moved: benign-15.jpg
Moved: benign-16.jpg
Moved: benign-17.jpg
Moved: benign-18.jpg
Moved: benign-19.jpg
Moved: benign-20.jpg
Moved: benign-21.jpg
Moved: benign-22.jpg
Moved: benign-23.jpg
Moved: benign-24.jpg
Moved: benign-25.jpg
Moved: benign-26.jpg
Moved: benign-27.jpg
Moved: benign-28.jpg
Moved: benign-29.jpg
Moved: benign-30.jpg
Moved: benign-31.jpg
Moved: benign-32.jpg
Moved: benign-33.jpg
Moved: benign-34.jpg
Moved: benign-35.jpg
Moved: benign-36.jpg
Moved: benign-37.jpg
Moved: benign-38.jpg
Moved: benign-39.jpg
Moved: benign-40.jpg
Moved: benign-41.jpg
Moved: benign-42.jpg
Moved: benign-43.jpg
Moved: benign-44.jpg
Moved: benign-45.jpg
Moved: benign-46.jpg
Moved: benign-47.jpg
Moved: benign-48.jpg
M

In [None]:
transform = transforms.Compose([
    transforms.Resize(size=224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225]) # Imagenet standards
    ])
#data directory
data_dir = "/content/drive/MyDrive/kaggle/c_data/"
with open('/content/drive/MyDrive/kaggle/c_data/labels.json') as f:
    targets = json.load(f)

data_dir_test = "/content/drive/MyDrive/kaggle/c_data_test/"
with open('/content/drive/MyDrive/kaggle/c_data_test/labels_test.json') as f:
    targets_test = json.load(f)

In [None]:
train_data = SkinCancerDataset(targets.items(), data_dir, transform)
test_data = SkinCancerDataset(targets_test.items(), data_dir_test, transform)

# obtain training indices that will be used for validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)

# Randomize order
num_train_test = len(test_data)
indices_test = list(range(num_train_test))
np.random.shuffle(indices_test)

In [None]:
# Slicing Train/Valid in 80/20 ratio
split = int(np.floor(valid_size * num_train))
test_idx, valid_idx, train_idx = indices_test[:], indices[:split], indices[split:]

In [None]:
# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
test_sampler = SubsetRandomSampler(test_idx)

In [None]:
# prepare data loaders (combine dataset and sampler)
train_loader = DataLoader(train_data, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers)
valid_loader = DataLoader(train_data, batch_size=batch_size,
    sampler=valid_sampler, num_workers=num_workers)
test_loader = DataLoader(test_data, batch_size=batch_size,
    sampler=test_sampler, num_workers=num_workers)

In [None]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn

# check if CUDA is available. If so, device will be gpu
device = "cuda" if torch.cuda.is_available() else "cpu"

vgg16.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
# specify loss function
criterion = nn.CrossEntropyLoss()
# specify optimize
optimizer = optim.Adam(params=vgg16.classifier.parameters(), lr = 0.001)

In [None]:
# Freeze training for all "features" layers
for param in vgg16.features.parameters():
    param.requires_grad = False

In [None]:
def accuracy_fn(y_true, y_pred):
  # correct = torch.eq(y_true, y_pred).sum().item()
  # acc = (correct/len(y_pred)) * 100
  # return acc

In [None]:
# Saving library
from pathlib import Path

# Create model directory path
MODEL_PATH = Path("/content/drive/MyDrive/kaggle/models/")
MODEL_PATH.mkdir(parents=True,
                 exist_ok=True)

# Create model save path
MODEL_NAME = "vgg16_skin_cancer.pt"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

In [None]:
from tqdm.auto import tqdm
from timeit import default_timer as timer
# number of epochs to train the model

start = timer()

n_epochs = 10

valid_loss_min = np.Inf # track change in validation loss(starting with infinity)
train_loss_min = np.Inf

for epoch in tqdm(range(1, n_epochs+1)):

  # keep track of training and validation loss
  train_loss = 0.0
  valid_loss = 0.0

  ### Train the model
  vgg16.train()
  for data, target in train_loader:
    # move tensors to GPU if CUDA is available
    data, target = data.to(device), target.to(device)
    # forward pass: compute predicted outputs by passing inputs to the model
    output = vgg16(data)
    # calculate the batch loss
    loss = criterion(output, target)
    # clear the gradients of all optimized variables
    optimizer.zero_grad()
    # backward pass: compute gradient of the loss with respect to model parameters
    loss.backward()
    # perform a single optimization step (parameter update)
    optimizer.step()
    # update training loss
    train_loss += loss.item()*data.size(0)

  ### Validation
  vgg16.eval()

  with torch.inference_mode():
    for data, target in valid_loader:
      # move tensors to GPU if CUDA is available
      data, target = data.to(device), target.to(device)
      # forward pass: compute predicted outputs by passing inputs to the model
      output = vgg16(data)
      # calculate the batch loss
      loss = criterion(output, target)
      # update average validation loss
      valid_loss += loss.item()*data.size(0)

  # calculate average losses
  train_loss = train_loss/len(train_loader.dataset)
  valid_loss = valid_loss/len(valid_loader.dataset)

  # print training/validation statistics
  print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
      epoch, train_loss, valid_loss))

  # save model if validation loss has decreased (this is done to make sure the model doesn't overfit on the training data and detect patterns that aren't there)
  if valid_loss <= valid_loss_min:
      print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
      valid_loss_min,
      valid_loss))
      # Save the model state dict
      torch.save(obj=vgg16.state_dict(), f=MODEL_SAVE_PATH)
      valid_loss_min = valid_loss

  # if train_loss <= train_loss_min:
  #     print('Training loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
  #     train_loss_min,
  #     train_loss))
  #     # Save the model state dict
  #     torch.save(obj=vgg16.state_dict(), f=MODEL_SAVE_PATH)
  #     train_loss_min = train_loss
  # torch.save(obj=vgg16.state_dict(), f=MODEL_SAVE_PATH)

end = timer()

print(f"Time to train: {end - start}")

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 	Training Loss: 0.497714 	Validation Loss: 0.062430
Validation loss decreased (inf --> 0.062430).  Saving model ...
Epoch: 2 	Training Loss: 0.265590 	Validation Loss: 0.059148
Validation loss decreased (0.062430 --> 0.059148).  Saving model ...
Epoch: 3 	Training Loss: 0.178262 	Validation Loss: 0.056004
Validation loss decreased (0.059148 --> 0.056004).  Saving model ...
Epoch: 4 	Training Loss: 0.116954 	Validation Loss: 0.085280
Epoch: 5 	Training Loss: 0.099517 	Validation Loss: 0.153448
Epoch: 6 	Training Loss: 0.108818 	Validation Loss: 0.076947
Epoch: 7 	Training Loss: 0.094306 	Validation Loss: 0.233880
Epoch: 8 	Training Loss: 0.083714 	Validation Loss: 0.136680
Epoch: 9 	Training Loss: 0.058495 	Validation Loss: 0.172894
Epoch: 10 	Training Loss: 0.040200 	Validation Loss: 0.135620
Time to train: 1054.812142344


In [None]:
import numpy as np
import torch
import torch.nn as nn

## track test loss (Classification)
batch_size = 20

    ## if GPU is_available
#test_on_gpu = torch.cuda.is_available()
# test_on_gpu = False
# if not test_on_gpu:
#     print('CUDA is not available.  Testing on CPU ...')
# else:
#     print('CUDA is available.  Testing on GPU ...')

# Load model
vgg16.load_state_dict(torch.load(MODEL_SAVE_PATH))

    ##Model should be on GPU with dataset
# if test_on_gpu:
#     vgg16.cuda()

#same criterion as training
criterion = nn.CrossEntropyLoss()
number_of_classes = 2
class_correct = list(0. for i in range(number_of_classes))
class_total = list(0. for i in range(number_of_classes))

#Keeping track of test loss
test_loss = 0.0

vgg16.eval()
# iterate over test data
for data, target in test_loader:
    data, target = data.to(device), target.to(device)
    # move tensors to GPU if CUDA is available
    # if test_on_gpu:
    #     data, target = data.cuda(), target.cuda()
    # forward pass: compute predicted outputs by passing inputs to the model
    output = vgg16(data)
    # calculate the batch loss
    loss = criterion(output, target)
    # update test loss
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)
    # compare predictions to true label
    correct_tensor = pred.eq(target.data.view_as(pred))
    correct = np.squeeze(correct_tensor.cpu().numpy())
    print(correct)
    # calculate test accuracy for each object class
    for i in range(len(correct)):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# average test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

## Test Error in each class
classes = ['Benign', 'Malignant']

for i in range(number_of_classes):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            classes[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

#Overall Accuracy
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

[ True  True False False False  True  True  True  True  True  True  True
  True  True False  True  True  True  True  True]
[ True  True  True  True  True False  True  True  True  True  True  True
 False  True  True  True  True  True  True  True]
[ True  True False  True False  True  True  True  True False  True  True
  True  True  True  True  True  True  True  True]
[False  True  True  True  True  True  True  True  True False  True  True
  True  True  True  True  True  True  True  True]
[ True  True  True  True  True False  True  True  True  True  True  True
  True  True  True  True  True  True  True  True]
[ True  True  True False  True  True  True  True False  True False  True
  True  True  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True False  True]
[ True  True  True  True  True  True  True  True  True  True  True False
  True  True  True False  True False  True False]
[ True False  Tr

In [None]:
# Import tqdm.auto
from tqdm.auto import tqdm

# 1. Make predictions with trained model
y_preds = []
vgg16.eval()
with torch.inference_mode():
  for X, y in tqdm(test_loader, desc="Making predictions..."):
    # Send the data and targets to target device
    X, y = X.to(device), y.to(device)
    # Do the forward pass
    y_logit = vgg16(X)
    # Turn predictions from logits -> prediction probabilities -> prediction labels
    y_pred = torch.softmax(y_logit, dim=1).argmax(dim=1)
    y_preds.append(y_pred.cpu())

# Contcatenate list of predictions into tensor
# print(y_preds)
y_pred_tensor = torch.cat(y_preds)
y_pred_tensor[:10]

In [None]:
# See if require packages are installed and if not, install them...
try:
  import torchmetrics, mlxtend
  print(f"mlxtend version: {mlxtend.__version__}")
  assert int(mlxtend.__version__.splot(".")[1]) >= 19, "mlxtend version should be 0.19.0 or higher"
except:
  !pip install -q torchmetrics -U
  import torchmetrics, mlxtend

In [None]:
from torchmetrics import ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix

confmat = ConfusionMatrix(num_classes=len(test_loader), task='multiclass')
confmat_tensor = confmat(preds=y_pred_tensor, target=torch.tensor([target for data, target in test_loader]))
print(y_pred_tensor[:10])
print(test_data.targets[:10])

# 3. Plot the confusion matrix
fig, ax = plot_confusion_matrix(
    conf_mat=confmat_tensor.numpy(),    # matplotlib like numpy
    class_names=class_names,
    figsize=(10, 7)
    )

In [None]:
MODEL_NAME_2 = "vgg16_model.pt"
MODEL_SAVE_PATH_2 = MODEL_PATH / MODEL_NAME_2
torch.save(vgg16, MODEL_SAVE_PATH_2)

In [None]:
import torch
from torch.utils.mobile_optimizer import optimize_for_mobile

LIGHT_MODEL_NAME = "torchscript_small.pt"
LIGHT_MODEL_SAVE_PATH = MODEL_PATH / LIGHT_MODEL_NAME

model = torch.load(MODEL_SAVE_PATH_2, map_location="cpu")
model.eval()
example = torch.rand(1, 3, 224, 224)
traced_script_module = torch.jit.trace(model, example)
optimized_traced_model = optimize_for_mobile(traced_script_module)
optimized_traced_model._save_for_lite_interpreter(LIGHT_MODEL_NAME)


In [None]:
import os
import shutil

source_folder = r"/content/"
destination_folder = r"/content/drive/MyDrive/kaggle/models/"

file_name = "torchscript_small.pt"
# construct full file path
source = source_folder + file_name
destination = destination_folder + file_name
# move only files
if os.path.isfile(source):
    shutil.move(source, destination)
    print('Moved:', file_name)

Moved: torchscript_small.pt


In [None]:
import torch
import torchvision
vgg16 = torch.load("/content/drive/MyDrive/kaggle/models/vgg16_model.pt", map_location=torch.device('cpu'))
vgg16.eval()
example = torch.rand(1, 3, 224, 224)
traced_script_module = torch.jit.trace(vgg16, example)
traced_script_module.save("/content/drive/MyDrive/kaggle/models/vgg_16_torchscript.pt")

In [None]:
import numpy as np
import torch
import torch.nn as nn

## track test loss (Classification)
batch_size = 20

    ## if GPU is_available
#test_on_gpu = torch.cuda.is_available()
test_on_gpu = False
# if not test_on_gpu:
#     print('CUDA is not available.  Testing on CPU ...')
# else:
#     print('CUDA is available.  Testing on GPU ...')

# Load model
# vgg16.load_state_dict(torch.load(MODEL_SAVE_PATH))

    ##Model should be on GPU with dataset
# if test_on_gpu:
#     vgg16.cuda()

#same criterion as training
criterion = nn.CrossEntropyLoss()
number_of_classes = 2
class_correct = list(0. for i in range(number_of_classes))
class_total = list(0. for i in range(number_of_classes))

#Keeping track of test loss
test_loss = 0.0

vgg16.eval()
# iterate over test data
for data, target in test_loader:
    data, target = data.to(device), target.to(device)
    # move tensors to GPU if CUDA is available
    # if test_on_gpu:
    #     data, target = data.cuda(), target.cuda()
    # forward pass: compute predicted outputs by passing inputs to the model
    output = vgg16(data)
    # calculate the batch loss
    loss = criterion(output, target)
    # update test loss
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)
    # compare predictions to true label
    correct_tensor = pred.eq(target.data.view_as(pred))
    correct = np.squeeze(correct_tensor.cpu().numpy())
    print(correct)
    # calculate test accuracy for each object class
    for i in range(len(correct)):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# average test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

## Test Error in each class
classes = ['Benign', 'Malignant']

for i in range(number_of_classes):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            classes[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

#Overall Accuracy
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

[False  True  True  True  True  True  True  True  True  True False  True
  True  True False  True  True False  True False]
[False  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True]
[ True  True  True False  True  True False  True  True  True  True  True
  True  True  True  True  True False  True  True]
[ True  True  True  True False  True  True False  True  True  True  True
  True  True  True  True  True  True  True  True]
[ True  True  True  True  True False  True  True  True  True  True  True
  True  True  True False  True False  True  True]
[ True  True  True  True  True  True  True  True  True False  True False
 False  True  True  True  True  True False  True]
[ True False  True  True  True  True  True  True  True False  True False
 False  True  True  True  True  True False  True]
[ True  True  True  True False False  True  True  True False False  True
 False  True  True  True  True  True  True  True]
[ True False  Tr

In [None]:
pip install onnx



In [None]:
import torch
import onnx
import torchvision

# Load  PyTorch model
vgg16 = torch.load("/content/drive/MyDrive/kaggle/models/vgg16_model.pt", map_location=torch.device('cpu'))

# Set  input shape of the model
input_shape = (1, 3, 224, 224)

# Export  PyTorch model to ONNX format
torch.onnx.export(vgg16, torch.randn(input_shape), 'vgg16.onnx', opset_version=11)

In [None]:
!pip install onnx-tf
import onnx_tf

# Load  ONNX model
onnx_model = onnx.load('vgg16.onnx')

# Convert ONNX model to TensorFlow format
tf_model = onnx_tf.backend.prepare(onnx_model)
# Export  TensorFlow  model
tf_model.export_graph("vgg16.tf")

Collecting onnx-tf
  Downloading onnx_tf-1.10.0-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.1/226.1 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow-addons (from onnx-tf)
  Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons->onnx-tf)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons, onnx-tf
Successfully installed onnx-tf-1.10.0 tensorflow-addons-0.23.0 typeguard-2.13.3



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



KeyError: ignored

In [None]:
import onnx
from onnx import helper

onnx_model = onnx.load(onnx_model_path)

# Define a mapping from old names to new names
name_map = {"input.1": "input_1"}

# Initialize a list to hold the new inputs
new_inputs = []

# Iterate over the inputs and change their names if needed
for inp in onnx_model.graph.input:
    if inp.name in name_map:
        # Create a new ValueInfoProto with the new name
        new_inp = helper.make_tensor_value_info(name_map[inp.name],
                                                inp.type.tensor_type.elem_type,
                                                [dim.dim_value for dim in inp.type.tensor_type.shape.dim])
        new_inputs.append(new_inp)
    else:
        new_inputs.append(inp)

# Clear the old inputs and add the new ones
onnx_model.graph.ClearField("input")
onnx_model.graph.input.extend(new_inputs)

# Go through all nodes in the model and replace the old input name with the new one
for node in onnx_model.graph.node:
    for i, input_name in enumerate(node.input):
        if input_name in name_map:
            node.input[i] = name_map[input_name]

# Save the renamed ONNX model
onnx.save(onnx_model, 'resnet50-new.onnx')

In [None]:
import torch
import onnx
import tensorflow as tf
import onnx_tf
# from torchvision.models import resnet50

# Load the PyTorch ResNet50 model
vgg16 = torch.load("/content/drive/MyDrive/kaggle/models/vgg16_model.pt", map_location=torch.device('cpu'))
vgg16.eval()

# Export the PyTorch model to ONNX format
input_shape = (1, 3, 224, 224)
dummy_input = torch.randn(input_shape)
onnx_model_path = 'vgg16.onnx'
torch.onnx.export(vgg16, dummy_input, onnx_model_path, verbose=False)

# # Load the ONNX model
# onnx_model = onnx.load(onnx_model_path)

# # Convert the ONNX model to TensorFlow format
# tf_model_path = 'vgg16.pb'
# tf_rep = onnx_tf.backend.prepare(onnx_model)
# tf_rep.export_graph(tf_model_path)


# # Convert the TensorFlow model to TensorFlow Lite format
# converter = tf.compat.v1.lite.TFLiteConverter.from_saved_model(tf_model_path)
# tflite_model = converter.convert()

# # Save the TensorFlow Lite model to a file
# with open('vgg16.tflite', 'wb') as f:
#     f.write(tflite_model)


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [None]:
import onnx
from onnx import helper

onnx_model = onnx.load(onnx_model_path)

# Define a mapping from old names to new names
name_map = {"input.1": "input_1"}

# Initialize a list to hold the new inputs
new_inputs = []

# Iterate over the inputs and change their names if needed
for inp in onnx_model.graph.input:
    if inp.name in name_map:
        # Create a new ValueInfoProto with the new name
        new_inp = helper.make_tensor_value_info(name_map[inp.name],
                                                inp.type.tensor_type.elem_type,
                                                [dim.dim_value for dim in inp.type.tensor_type.shape.dim])
        new_inputs.append(new_inp)
    else:
        new_inputs.append(inp)

# Clear the old inputs and add the new ones
onnx_model.graph.ClearField("input")
onnx_model.graph.input.extend(new_inputs)

# Go through all nodes in the model and replace the old input name with the new one
for node in onnx_model.graph.node:
    for i, input_name in enumerate(node.input):
        if input_name in name_map:
            node.input[i] = name_map[input_name]

# Save the renamed ONNX model
onnx.save(onnx_model, 'vgg16-new.onnx')

In [None]:
# Load the ONNX model
onnx_model_path_new = "vgg16-new.onnx"
onnx_model = onnx.load(onnx_model_path_new)

# Convert the ONNX model to TensorFlow format
tf_model_path = 'vgg16.tf'
tf_rep = onnx_tf.backend.prepare(onnx_model)
tf_rep.export_graph(tf_model_path)


converter = tf.lite.TFLiteConverter.from_saved_model(tf_model_path)
tflite_model = converter.convert()
open('vgg16.tflite', 'wb').write(tflite_model)

INFO:absl:Function `__call__` contains input name(s) x, y with unsupported characters which will be renamed to transpose_53_x, mul_3_y in the SavedModel.
INFO:absl:Found untraced functions such as gen_tensor_dict while saving (showing 1 of 1). These functions will not be directly callable after loading.
INFO:absl:Writing fingerprint to vgg16.tf/fingerprint.pb


478324404

In [None]:
import os
import shutil

source_folder = r"/content/"
destination_folder = r"/content/drive/MyDrive/kaggle/models/"

file_name = "resnet18.tflite"
# construct full file path
source = source_folder + file_name
new_file_name = "vgg16.tflite"
destination = destination_folder + new_file_name
# move only files
if os.path.isfile(source):
    shutil.move(source, destination)
    print('Moved:', file_name)

Moved: resnet18.tflite
