# 1. Dependencies


In [1]:
from torchvision import models
import torchvision
import torch 
import torch.nn as nn
from torch.utils.data import Dataset
from torchvision import transforms
import torchvision.io as io
import torch.nn.functional as F

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import os
from PIL import Image


In [15]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Print the device
print(device)

cpu


  return torch._C._cuda_getDeviceCount() > 0


# 2. Preparing dataset

In [2]:
dataset_path = os.listdir('dataset/lung')

img_type = os.listdir('dataset/lung')
print (img_type)  #what kinds of rooms are in this dataset

['Benign', 'Malignant']


In [3]:
print("Types of rooms found: ", len(dataset_path))

Types of rooms found:  2


In [4]:
imgs = []

for ctype in img_type:
 # Get all the file names
 all_imgs = os.listdir('dataset/lung' + '/' +ctype)
 #print(all_rooms)

 # Add them to the list
 for img in all_imgs:
    imgs.append((ctype, str('dataset/lung' + '/' +ctype) + '/' + img))
    #print(rooms[:3])

imgs

[('Benign', 'dataset/lung/Benign/B108.jpg'),
 ('Benign', 'dataset/lung/Benign/B56.jpg'),
 ('Benign', 'dataset/lung/Benign/B94.jpg'),
 ('Benign', 'dataset/lung/Benign/B33.jpg'),
 ('Benign', 'dataset/lung/Benign/B64.jpg'),
 ('Benign', 'dataset/lung/Benign/B19.jpg'),
 ('Benign', 'dataset/lung/Benign/B63.jpg'),
 ('Benign', 'dataset/lung/Benign/B98.jpg'),
 ('Benign', 'dataset/lung/Benign/B113.jpg'),
 ('Benign', 'dataset/lung/Benign/B15.jpg'),
 ('Benign', 'dataset/lung/Benign/B97.jpg'),
 ('Benign', 'dataset/lung/Benign/B85.jpg'),
 ('Benign', 'dataset/lung/Benign/B70.jpg'),
 ('Benign', 'dataset/lung/Benign/B39.jpg'),
 ('Benign', 'dataset/lung/Benign/B48.jpg'),
 ('Benign', 'dataset/lung/Benign/B6.jpg'),
 ('Benign', 'dataset/lung/Benign/B54.jpg'),
 ('Benign', 'dataset/lung/Benign/B84.jpg'),
 ('Benign', 'dataset/lung/Benign/B51.jpg'),
 ('Benign', 'dataset/lung/Benign/B20.jpg'),
 ('Benign', 'dataset/lung/Benign/B93.jpg'),
 ('Benign', 'dataset/lung/Benign/B78.jpg'),
 ('Benign', 'dataset/lung/Benig

In [5]:
img_df = pd.DataFrame(data=imgs, columns=['img type', 'image'])
print(img_df.head())

  img type                         image
0   Benign  dataset/lung/Benign/B108.jpg
1   Benign   dataset/lung/Benign/B56.jpg
2   Benign   dataset/lung/Benign/B94.jpg
3   Benign   dataset/lung/Benign/B33.jpg
4   Benign   dataset/lung/Benign/B64.jpg


In [6]:
img_df['img type'] = img_df['img type'].map({'Benign': 0, 'Malignant': 1})

In [7]:
img_df

Unnamed: 0,img type,image
0,0,dataset/lung/Benign/B108.jpg
1,0,dataset/lung/Benign/B56.jpg
2,0,dataset/lung/Benign/B94.jpg
3,0,dataset/lung/Benign/B33.jpg
4,0,dataset/lung/Benign/B64.jpg
...,...,...
676,1,dataset/lung/Malignant/M169.jpg
677,1,dataset/lung/Malignant/M43.jpg
678,1,dataset/lung/Malignant/M382.jpg
679,1,dataset/lung/Malignant/M541.jpg


In [8]:
# Let's check how many samples for each category are present
print("Total number of images in the dataset: ", len(img_df))

Total number of images in the dataset:  681


In [9]:
img_count = img_df['img type'].value_counts()

print("images in each category: ")
print(img_count)

images in each category: 
img type
1    561
0    120
Name: count, dtype: int64


In [10]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image = Image.open(self.image_paths[index])  # Open the image using PIL
        image = transforms.Resize((227, 227))(image)  # Apply the transformation
        image = transforms.ToTensor()(image)  # Convert the image to a tensor
        image = image.type(torch.float)  # Convert the tensor to float
        label = torch.tensor(self.labels[index])  # Return the target data as a tensor
        return image, label

In [11]:
dataset = CustomDataset(img_df['image'], img_df['img type'])
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
dataloader.batch_size

32

In [12]:
image, label = dataset[0]
image[0]

tensor([[0.1490, 0.1490, 0.1490,  ..., 0.1490, 0.1490, 0.1490],
        [0.1490, 0.1490, 0.1490,  ..., 0.1490, 0.1490, 0.1490],
        [0.1490, 0.1490, 0.1490,  ..., 0.1490, 0.1490, 0.1490],
        ...,
        [0.1490, 0.1490, 0.1490,  ..., 0.1490, 0.1490, 0.1490],
        [0.1490, 0.1490, 0.1490,  ..., 0.1490, 0.1490, 0.1490],
        [0.1490, 0.1490, 0.1490,  ..., 0.1490, 0.1490, 0.1490]])

# 3. Model

In [13]:
class ImageClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=(11,11), stride=(4,4), padding=(2,2)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
            nn.Conv2d(64, 192, kernel_size=(5,5), stride=(1,1), padding=(2,2)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
            nn.Conv2d(192, 384, kernel_size=(3,3), stride=(1,1), padding=(1,1)),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=(3,3), stride=(1,1), padding=(1,1)),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=(3,3), stride=(1,1), padding=(1,1)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6,6))

        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5, inplace=False),
            nn.Linear(256 * 6 * 6, 4096),#, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5, inplace=False),
            nn.Linear(4096, 4096),#, bias=True),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 2),#, bias=True)
        )

    def forward(self, x):
        x = x.type(torch.float)
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        logits = self.classifier(x)
        probas = F.softmax(logits, dim=1)
        return logits

In [14]:
clf = ImageClassifier().to('cuda')
opt = torch.optim.Adam(clf.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss().to('cuda')

RuntimeError: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero.

In [None]:
print(clf)

ImageClassifier(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias

# 4. Training

In [None]:
for epoch in range(20):
    for batch in dataloader:
        X, y = batch
        y = torch.tensor(y)
        X, y = X.to('cuda'), y.to('cuda')

        yhat = clf(X)
        loss = loss_fn(yhat, y)

        # backpropagation

        opt.zero_grad()
        loss.backward()
        opt.step()

    print(f"Epoch: {epoch} loss is {loss.item()}")

  y = torch.tensor(y)


Epoch: 0 loss is 0.06148592010140419
Epoch: 1 loss is 0.5718265771865845
Epoch: 2 loss is 0.19258993864059448
Epoch: 3 loss is 0.7645230889320374
Epoch: 4 loss is 0.11946531385183334
Epoch: 5 loss is 0.5352098941802979
Epoch: 6 loss is 0.4162778854370117
Epoch: 7 loss is 0.6985313892364502
Epoch: 8 loss is 0.5190131068229675
Epoch: 9 loss is 0.49405595660209656
Epoch: 10 loss is 1.1807283163070679
Epoch: 11 loss is 0.5170320868492126
Epoch: 12 loss is 0.3845941424369812
Epoch: 13 loss is 0.48044919967651367
Epoch: 14 loss is 0.4013504981994629
Epoch: 15 loss is 0.35077643394470215
Epoch: 16 loss is 0.3636312782764435
Epoch: 17 loss is 0.21407286822795868
Epoch: 18 loss is 0.43625378608703613
Epoch: 19 loss is 0.5350552201271057


In [None]:
with open('modelv1.pt', "wb") as f:
    torch.save(clf.state_dict(), f)

RuntimeError: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
