# Data Preprocessing

In [1]:
import torch
from torchvision import transforms
from PIL import Image
import os

transform = transforms.Compose([
    transforms.ToTensor(),
])

folder_path = '../../data/processed/BUSI/'

if os.path.exists(folder_path + '.DS_Store'):
    os.remove(folder_path + '.DS_Store')

image_paths = [os.path.join(folder_path, img) for img in os.listdir(folder_path)]

print(image_paths)

mean_sum = torch.zeros(3)
std_sum = torch.zeros(3)

for img_path in image_paths:
    img = Image.open(img_path).convert('RGB')
    img_tensor = transform(img)
    mean_sum += img_tensor.mean(dim=(1, 2))
    std_sum += img_tensor.std(dim=(1, 2))

mean = mean_sum / len(image_paths)
std = std_sum / len(image_paths)

mean = mean.tolist()
std = std.tolist()

print("Mean:", mean)
print("Standard deviation:", std)


['../../data/processed/BUSI/benign (328).png', '../../data/processed/BUSI/benign (282).png', '../../data/processed/BUSI/malignant (206).png', '../../data/processed/BUSI/benign (144).png', '../../data/processed/BUSI/benign (82).png', '../../data/processed/BUSI/benign (390).png', '../../data/processed/BUSI/benign (113).png', '../../data/processed/BUSI/malignant (178).png', '../../data/processed/BUSI/normal (52).png', '../../data/processed/BUSI/malignant (197).png', '../../data/processed/BUSI/benign (406).png', '../../data/processed/BUSI/malignant (51).png', '../../data/processed/BUSI/benign (369).png', '../../data/processed/BUSI/benign (386).png', '../../data/processed/BUSI/normal (44).png', '../../data/processed/BUSI/benign (105).png', '../../data/processed/BUSI/malignant (47).png', '../../data/processed/BUSI/benign (410).png', '../../data/processed/BUSI/malignant (181).png', '../../data/processed/BUSI/benign (294).png', '../../data/processed/BUSI/normal (108).png', '../../data/processe

In [2]:
from torch.utils.data import Dataset
from PIL import Image
import os

def pil_loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

class BUSIDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_files = os.listdir(root_dir)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.image_files[idx])
        image = pil_loader(img_name)
        label = self._get_label(img_name)
        if self.transform:
            image = self.transform(image)
        return image, label

    def _get_label(self, filename):
        if 'normal' in filename:
            label = 0
        elif 'benign' in filename:
            label = 1
        elif 'malignant' in filename:
            label = 2
        return label

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

dataset = BUSIDataset(root_dir='../../data/processed/BUSI/', transform=transform)

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

folder_path = '../../data/processed/Dataset (B)/'

if os.path.exists(folder_path + '.DS_Store'):
    os.remove(folder_path + '.DS_Store')

image_paths = [os.path.join(folder_path, img) for img in os.listdir(folder_path)]

print(image_paths)

mean_sum = torch.zeros(3)
std_sum = torch.zeros(3)

for img_path in image_paths:
    img = Image.open(img_path).convert('RGB')
    img_tensor = transform(img)
    mean_sum += img_tensor.mean(dim=(1, 2))
    std_sum += img_tensor.std(dim=(1, 2))

mean = mean_sum / len(image_paths)
std = std_sum / len(image_paths)

mean = mean.tolist()
std = std.tolist()

print("Mean:", mean)
print("Standard deviation:", std)

['../../data/processed/Dataset (B)/benign (82).png', '../../data/processed/Dataset (B)/malignant (51).png', '../../data/processed/Dataset (B)/benign (105).png', '../../data/processed/Dataset (B)/malignant (47).png', '../../data/processed/Dataset (B)/malignant (10).png', '../../data/processed/Dataset (B)/benign (94).png', '../../data/processed/Dataset (B)/benign (57).png', '../../data/processed/Dataset (B)/benign (41).png', '../../data/processed/Dataset (B)/malignant (6).png', '../../data/processed/Dataset (B)/benign (16).png', '../../data/processed/Dataset (B)/benign (61).png', '../../data/processed/Dataset (B)/benign (36).png', '../../data/processed/Dataset (B)/benign (20).png', '../../data/processed/Dataset (B)/benign (109).png', '../../data/processed/Dataset (B)/benign (77).png', '../../data/processed/Dataset (B)/benign (98).png', '../../data/processed/Dataset (B)/benign (2).png', '../../data/processed/Dataset (B)/malignant (30).png', '../../data/processed/Dataset (B)/malignant (26)

In [4]:
def pil_loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

class DatasetB(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_files = os.listdir(root_dir)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.image_files[idx])
        image = pil_loader(img_name)  # Load image using pil_loader
        label = self._get_label(img_name)
        if self.transform:
            image = self.transform(image)
        return image, label

    def _get_label(self, filename):
        if 'normal' in filename:
            label = 0
        elif 'benign' in filename:
            label = 1
        elif 'malignant' in filename:
            label = 2
        return label

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

eval_dataset = DatasetB(root_dir='../../data/processed/Dataset (B)/', transform=transform)


In [5]:
print(eval_dataset[0])

(tensor([[[-0.3168, -0.4203, -0.4462,  ...,  1.3921,  1.0814,  0.6930],
         [-0.4721, -0.3686, -0.2909,  ...,  1.0814,  1.0814,  0.9260],
         [-0.3168, -0.1096, -0.0061,  ...,  0.1234, -0.0579, -0.0579],
         ...,
         [-1.0417, -1.1194, -1.0676,  ..., -0.5239, -0.6275, -0.7051],
         [-1.1453, -1.0417, -0.8346,  ..., -0.7828, -0.7310, -0.9382],
         [-0.8605, -0.6534, -0.4462,  ..., -1.1712, -0.8605, -0.8605]],

        [[-0.3168, -0.4203, -0.4462,  ...,  1.3921,  1.0814,  0.6930],
         [-0.4721, -0.3686, -0.2909,  ...,  1.0814,  1.0814,  0.9260],
         [-0.3168, -0.1096, -0.0061,  ...,  0.1234, -0.0579, -0.0579],
         ...,
         [-1.0417, -1.1194, -1.0676,  ..., -0.5239, -0.6275, -0.7051],
         [-1.1453, -1.0417, -0.8346,  ..., -0.7828, -0.7310, -0.9382],
         [-0.8605, -0.6534, -0.4462,  ..., -1.1712, -0.8605, -0.8605]],

        [[-0.3168, -0.4203, -0.4462,  ...,  1.3921,  1.0814,  0.6930],
         [-0.4721, -0.3686, -0.2909,  ...,  

# Loading & Modifying ResNet-50 Model

In [6]:
import torch
import ssl
from contextlib import redirect_stdout
ssl._create_default_https_context = ssl._create_unverified_context

model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)

with redirect_stdout(None):
    model.eval()


Using cache found in /Users/ahmedmahmoud/.cache/torch/hub/pytorch_vision_v0.10.0


In [7]:
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(eval_dataset, batch_size=16, shuffle=True)

In [8]:
num_features = model.fc.in_features
print(num_features)

2048


In [9]:
num_classes = 3
new_fc_layers = [
    torch.nn.Linear(num_features, 512),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(512, 256),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(256, num_classes)
]
model.fc = torch.nn.Sequential(*new_fc_layers)

In [10]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [11]:
num_epochs = 20

# model.history

for epoch in range(num_epochs):
    model.train()
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to('cpu'), labels.to('cpu')
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()


In [None]:
model.eval()

correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Accuracy on test set: {accuracy}')