# Data Preprocessing

In [1]:
import torch
from torchvision import transforms
from PIL import Image
import os
import numpy as np

transform = transforms.Compose([
    transforms.ToTensor(),
])

folder_path = '../../data/processed/Combined/Train/'

if os.path.exists(folder_path + '.DS_Store'):
    os.remove(folder_path + '.DS_Store')

image_paths = [os.path.join(folder_path, img) for img in os.listdir(folder_path)]

print(image_paths)

mean_sum = torch.zeros(3)
std_sum = torch.zeros(3)

for img_path in image_paths:
    img = Image.open(img_path).convert('RGB')
    img_tensor = transform(img)
    mean_sum += img_tensor.mean(dim=(1, 2))
    std_sum += img_tensor.std(dim=(1, 2))

mean = mean_sum / len(image_paths)
std = std_sum / len(image_paths)

print("Mean:", mean)
print("Standard deviation:", std)


['../../data/processed/Combined/Train/BUSI_benign (33).png', '../../data/processed/Combined/Train/BUSI_benign (9).png', '../../data/processed/Combined/Train/BUSI_malignant (39).png', '../../data/processed/Combined/Train/BUSI_benign (125).png', '../../data/processed/Combined/Train/Dataset_B_benign (3).png', '../../data/processed/Combined/Train/BUSI_benign (64).png', '../../data/processed/Combined/Train/BUSI_malignant (137).png', '../../data/processed/Combined/Train/Dataset_B_benign (19).png', '../../data/processed/Combined/Train/BUSI_malignant (81).png', '../../data/processed/Combined/Train/Dataset_B_malignant (37).png', '../../data/processed/Combined/Train/BUSI_normal (7).png', '../../data/processed/Combined/Train/BUSI_benign (172).png', '../../data/processed/Combined/Train/BUSI_malignant (121).png', '../../data/processed/Combined/Train/BUSI_benign (72).png', '../../data/processed/Combined/Train/BUSI_malignant (97).png', '../../data/processed/Combined/Train/BUSI_benign (164).png', '../

In [2]:
from torch.utils.data import Dataset
from PIL import Image
import os

def pil_loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

class TrainDS(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_files = os.listdir(root_dir)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.image_files[idx])
        image = pil_loader(img_name)
        label = self._get_label(img_name)
        if self.transform:
            image = self.transform(image)
        return image, label

    def _get_label(self, filename):
        if 'normal' in filename:
            label = 0
        elif 'benign' in filename:
            label = 1
        elif 'malignant' in filename:
            label = 2
        return label

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.3137, 0.3137, 0.3136], std=[0.1942, 0.1942, 0.1941]),
])

dataset = TrainDS(root_dir=folder_path, transform=transform)

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

folder_path = '../../data/processed/Combined/Test/'

if os.path.exists(folder_path + '.DS_Store'):
    os.remove(folder_path + '.DS_Store')

image_paths = [os.path.join(folder_path, img) for img in os.listdir(folder_path)]

print(image_paths)

mean_sum = torch.zeros(3)
std_sum = torch.zeros(3)

for img_path in image_paths:
    img = Image.open(img_path).convert('RGB')
    img_tensor = transform(img)
    mean_sum += img_tensor.mean(dim=(1, 2))
    std_sum += img_tensor.std(dim=(1, 2))

mean = mean_sum / len(image_paths)
std = std_sum / len(image_paths)

print("Mean:", mean)
print("Standard deviation:", std)

['../../data/processed/Combined/Test/BUSI_malignant (160).png', '../../data/processed/Combined/Test/BUSI_benign (349).png', '../../data/processed/Combined/Test/BUSI_benign (430).png', '../../data/processed/Combined/Test/BUSI_normal (128).png', '../../data/processed/Combined/Test/BUSI_benign (308).png', '../../data/processed/Combined/Test/BUSI_malignant (208).png', '../../data/processed/Combined/Test/BUSI_malignant (176).png', '../../data/processed/Combined/Test/BUSI_malignant (199).png', '../../data/processed/Combined/Test/BUSI_benign (426).png', '../../data/processed/Combined/Test/BUSI_benign (324).png', '../../data/processed/Combined/Test/BUSI_normal (112).png', '../../data/processed/Combined/Test/BUSI_benign (373).png', '../../data/processed/Combined/Test/BUSI_benign (365).png', '../../data/processed/Combined/Test/BUSI_normal (98).png', '../../data/processed/Combined/Test/BUSI_normal (104).png', '../../data/processed/Combined/Test/BUSI_benign (332).png', '../../data/processed/Combin

In [4]:
def pil_loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

class TestDS(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_files = os.listdir(root_dir)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.image_files[idx])
        image = pil_loader(img_name)  # Load image using pil_loader
        label = self._get_label(img_name)
        if self.transform:
            image = self.transform(image)
        return image, label

    def _get_label(self, filename):
        if 'normal' in filename:
            label = 0
        elif 'benign' in filename:
            label = 1
        elif 'malignant' in filename:
            label = 2
        return label

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.3152, 0.3152, 0.3152], std=[0.1914, 0.1914, 0.1914]),
])

eval_dataset = TestDS(root_dir=folder_path, transform=transform)


In [5]:
print(eval_dataset[0])

(tensor([[[ 2.1026,  2.0002,  2.0002,  ...,  2.4510,  2.4919,  2.4305],
         [ 1.8773,  1.8978,  1.8978,  ...,  2.5329,  2.5944,  2.5124],
         [ 1.8978,  1.8978,  1.7953,  ...,  2.5534,  2.5329,  2.4510],
         ...,
         [-1.0731, -1.0526, -1.0526,  ..., -1.6263, -1.6263, -1.6263],
         [-1.0321, -1.0321, -1.0526,  ..., -1.6263, -1.6263, -1.6263],
         [-0.9502, -0.8682, -0.8477,  ..., -1.6263, -1.6263, -1.6263]],

        [[ 2.1026,  2.0002,  2.0002,  ...,  2.4510,  2.4919,  2.4305],
         [ 1.8773,  1.8978,  1.8978,  ...,  2.5329,  2.5944,  2.5124],
         [ 1.8978,  1.8978,  1.7953,  ...,  2.5534,  2.5329,  2.4510],
         ...,
         [-1.0731, -1.0526, -1.0526,  ..., -1.6263, -1.6263, -1.6263],
         [-1.0321, -1.0321, -1.0526,  ..., -1.6263, -1.6263, -1.6263],
         [-0.9502, -0.8682, -0.8477,  ..., -1.6263, -1.6263, -1.6263]],

        [[ 2.1026,  2.0002,  2.0002,  ...,  2.4510,  2.4919,  2.4305],
         [ 1.8773,  1.8978,  1.8978,  ...,  

# Loading & Modifying ResNet-50 Model

In [6]:
import torch
import ssl
from contextlib import redirect_stdout
ssl._create_default_https_context = ssl._create_unverified_context

model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)

with redirect_stdout(None):
    model.eval()

Using cache found in /Users/ahmedmahmoud/.cache/torch/hub/pytorch_vision_v0.10.0


In [7]:
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(eval_dataset, batch_size=32, shuffle=True)

In [8]:
num_features = model.fc.in_features
print(num_features)

2048


In [9]:
num_classes = 3
new_fc_layers = [
    torch.nn.Linear(num_features, 512),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(512, 256),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(256, num_classes)
]
model.fc = torch.nn.Sequential(*new_fc_layers)

In [10]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [11]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to('cpu'), labels.to('cpu')
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()


In [12]:
model.eval()

correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Accuracy on test set: {accuracy}')

Accuracy on test set: 0.5669014084507042
