<a href="https://colab.research.google.com/github/MaverickTopG/Glioma_AI_Detector_Project/blob/main/Train_Classification_Model_for_Glioma.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/dusty-nv/pytorch-classification.git

Cloning into 'pytorch-classification'...
remote: Enumerating objects: 113, done.[K
remote: Counting objects: 100% (63/63), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 113 (delta 45), reused 54 (delta 39), pack-reused 50[K
Receiving objects: 100% (113/113), 33.20 KiB | 4.15 MiB/s, done.
Resolving deltas: 100% (58/58), done.


In [None]:
!cd pytorch-classification; pip install -r requirements.txt

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->-r requirements.txt (line 1))
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->-r requirements.txt (line 1))
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->-r requirements.txt (line 1))
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->-r requirements.txt (line 1))
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->-r requirements.txt (line 1))
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch->-r requirements.txt (line 1))
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-man

In [None]:
#upload your dataset as a zip to Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp -r drive/MyDrive/Data pytorch-classification/data

In [None]:
#link your Google Drive in the Files tab to the left
!cd pytorch-classification/data; unzip -q /content/drive/MyDrive/archive.zip

In [None]:
#script to split dataset into test/train/val
import os
from math import ceil
from random import sample,choice
from shutil import move
os.chdir("/content/pytorch-classification/data")
def get_model_class_names(data_directory: str) -> list:
    class_names = os.listdir(data_directory + "/train")
    return class_names

def list_class_images(data_directory: str, class_name: str, extension_filters = ('jpg', 'png', 'jpeg', 'webp')) -> dict:
    class_images = {}
    for category in ['train', 'test', 'val']:
        class_images[category] = []
        image_directory = os.path.join(data_directory, category, class_name)
        os.makedirs(image_directory, exist_ok=True)
        if not os.listdir(image_directory) and category=="train":
            os.rmdir(image_directory)
            move(os.path.join(data_directory, class_name), os.path.join(data_directory, category))
        for file_name in os.listdir(image_directory):
            name, extension = os.path.splitext(file_name)
            extension = extension.lower().lstrip(".")
            if extension in extension_filters:
                class_images[category].append(file_name)
    return class_images


def split_class_images(data_directory: str, class_name: str, test_percent: float = 0.1, val_percent: float = 0.1):
    class_images = list_class_images(data_directory, class_name)
    total_images = sum(map(len, class_images.values()))
    print(f'There are {total_images} images of the class {class_name}.')
    test_image_count = int(ceil(test_percent * total_images))
    val_image_count = int(ceil(val_percent * total_images))
    train_image_count = total_images - test_image_count - val_image_count
    print(f'Image dataset split: Train={train_image_count}, Test={test_image_count}, Val={val_image_count}.')

    category_counts = {
        "test": test_image_count,
        "val": val_image_count
    }

    for category_name, category_count in category_counts.items():
        if len(class_images[category_name]) > category_count:
            move_image_count = len(class_images[category_name]) - category_count
            randomly_selected_images = sample(class_images[category_name], move_image_count)
            destination_folder = os.path.join(data_directory, 'train', class_name)
            source_folder = os.path.join(data_directory, category_name, class_name)
            for file_name in randomly_selected_images:
                destination_file = os.path.join(destination_folder, file_name)
                source_file = os.path.join(source_folder, file_name)
                os.rename(source_file, destination_file)
            class_images[category_name] = list(sorted(set(class_images[category_name]).difference(randomly_selected_images)))
            class_images['train'] = list(sorted(set(class_images['train']).union(randomly_selected_images)))
        elif len(class_images[category_name]) == category_count:
            print(f'No changes are necessary for class {class_name} {category_name}.')

    for category_name, category_count in category_counts.items():
        if len(class_images[category_name]) < category_count:
            move_image_count = category_count - len(class_images[category_name])
            randomly_selected_images = sample(class_images['train'], move_image_count)
            source_folder = os.path.join(data_directory, 'train', class_name)
            destination_folder = os.path.join(data_directory, category_name, class_name)
            for file_name in randomly_selected_images:
                destination_file = os.path.join(destination_folder, file_name)
                source_file = os.path.join(source_folder, file_name)
                os.rename(source_file, destination_file)
            class_images[category_name] = list(sorted(set(class_images[category_name]).union(randomly_selected_images)))
            class_images['train'] = list(sorted(set(class_images['train']).difference(randomly_selected_images)))

folder = "Data" #replace with folder name
classes = get_model_class_names(folder)
print(f"The classes in our dataset at {folder} are: {classes}")
for class_name in classes:
    split_class_images(folder, class_name)


The classes in our dataset at Data are: ['notumor', 'glioma']
There are 2000 images of the class notumor.
Image dataset split: Train=1600, Test=200, Val=200.
There are 1621 images of the class glioma.
Image dataset split: Train=1295, Test=163, Val=163.


In [None]:
!cd /content/pytorch-classification; python3 train.py --model-dir=models data/Data

2024-07-04 15:29:35.757655: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-04 15:29:35.757761: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-04 15:29:35.891748: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-04 15:29:35.899272: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
To start tensorboard run:  tensorboard --log-

In [None]:
!pip install onnx onnxruntime

Collecting onnx
  Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m69.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting onnxruntime
  Downloading onnxruntime-1.18.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m56.9 MB/s[0m eta [36m0:00:00[0m
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx, humanfriendl

In [None]:
!cd /content/pytorch-classification;python3 onnx_export.py --model-dir=models

Namespace(input='model_best.pth.tar', output='', model_dir='models', no_activation=False)
=> running on device cuda:0
=> loading checkpoint:  models/model_best.pth.tar
=> using model:  resnet18
=> reshaped ResNet fully-connected layer with: Linear(in_features=512, out_features=2, bias=True)
=> adding nn.Softmax layer to model
Sequential(
  (0): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), pa

In [None]:
import matplotlib.pyplot as plt

def plot_image(array, i, labels):
  plt.imshow(array)
  plt.title(labels[i])
  plt.xticks([])
  plt.yticks([])
  plt.show()

In [None]:
import onnxruntime as ort
import numpy as np
from PIL import Image
sess=ort.InferenceSession("/content/pytorch-classification/models/resnet18.onnx")

In [None]:
#show predictions
category=choice(classes)
name=choice(list_class_images(folder,category)['test'])
img=Image.open(os.path.join(folder,"test",category,name))
img=np.array(img.resize((224,224)),np.float32)/255
img=np.expand_dims(img,0)
img=np.swapaxes(img,1,3)
results=sess.run(['output_0'],{'input_0':img})
print(results)
img=np.swapaxes(img,1,3)
plot_image(img[0],np.argmax(results[0][0]),classes)