In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader,Dataset, WeightedRandomSampler
import os
from PIL import Image
import pandas as pd
from torch.optim import lr_scheduler
from torch.utils.data import random_split
from collections import Counter


In [None]:
dir="/content/drive/MyDrive/histopathology_images.zip"

In [None]:
!unzip /content/drive/MyDrive/histopathology_images.zip -d /content/drive/MyDrive/project_data1


[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
 extracting: /content/drive/MyDrive/project_data1/histopathology_images/valid/0/batch_70_sample_21.png  
 extracting: /content/drive/MyDrive/project_data1/histopathology_images/valid/0/batch_70_sample_23.png  
 extracting: /content/drive/MyDrive/project_data1/histopathology_images/valid/0/batch_70_sample_25.png  
 extracting: /content/drive/MyDrive/project_data1/histopathology_images/valid/0/batch_70_sample_29.png  
 extracting: /content/drive/MyDrive/project_data1/histopathology_images/valid/0/batch_70_sample_3.png  
 extracting: /content/drive/MyDrive/project_data1/histopathology_images/valid/0/batch_70_sample_33.png  
 extracting: /content/drive/MyDrive/project_data1/histopathology_images/valid/0/batch_70_sample_35.png  
 extracting: /content/drive/MyDrive/project_data1/histopathology_images/valid/0/batch_70_sample_37.png  
 extracting: /content/drive/MyDrive/project_data1/histopathology_images/valid/0/batch_70_sample_41.p

In [None]:
data_dir="/content/drive/MyDrive/project_data1/histopathology_images"

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [None]:

val_dataset = datasets.ImageFolder(root=os.path.join(data_dir, 'valid'), transform=transform)
test_dataset = datasets.ImageFolder(root=os.path.join(data_dir, 'test'), transform=transform)


val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)


In [None]:

def make_weights_for_balanced_classes(images, nclasses):
    count = [0] * nclasses
    for item in images:
        count[item[1]] += 1
    weight_per_class = [0.] * nclasses
    N = float(sum(count))
    for i in range(nclasses):
        weight_per_class[i] = N/float(count[i])
    weight = [0] * len(images)
    for idx, val in enumerate(images):
        weight[idx] = weight_per_class[val[1]]
    return weight

In [None]:
class BatchFilterDataset(Dataset):
    def __init__(self, root_dir, batch_number, transform=None):
        self.root_dir = root_dir
        self.batch_number = batch_number
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self._load_images()

    def _load_images(self):
        for label in ['0', '1']:
            folder_path = os.path.join(self.root_dir, label)
            for filename in os.listdir(folder_path):
                if filename.startswith(f'batch_{self.batch_number}_'):
                    self.image_paths.append(os.path.join(folder_path, filename))
                    self.labels.append(int(label))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label



In [None]:
class MultiBatchDataset(Dataset):
    def __init__(self, root_dir, batch_numbers, label=1, transform=None):
        self.root_dir = root_dir
        self.batch_numbers = batch_numbers
        self.label = label
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self._load_images()

    def _load_images(self):
        for batch_number in self.batch_numbers:
            folder_path = os.path.join(self.root_dir,  str(self.label))  # Updated folder path
            for filename in os.listdir(folder_path):
                if filename.startswith(f'batch_{batch_number}_'):
                    self.image_paths.append(os.path.join(folder_path, filename))
                    self.labels.append(self.label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label


In [None]:
data_dir = '/content/drive/MyDrive/project_data1/histopathology_images/train'
batch_number = 1
train_dataset = BatchFilterDataset(root_dir=data_dir, batch_number=batch_number, transform=transform)
#train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)

batch_numbers_others = list(range(2, 8))
train_set_others = MultiBatchDataset(data_dir, batch_numbers_others, label=1, transform=transform)

In [None]:
batch_numbers_others1 = list(range(10,20))
train_set_others3 = MultiBatchDataset(data_dir, batch_numbers_others1, label=1, transform=transform)

In [None]:
bl=list(range(260,270))
o = MultiBatchDataset(data_dir, bl, label=1, transform=transform)

In [None]:
bn=150
tdm=BatchFilterDataset(root_dir=data_dir, batch_number=bn, transform=transform)

In [None]:
batch_number2 = 9
td=BatchFilterDataset(root_dir=data_dir, batch_number=batch_number2, transform=transform)

In [None]:
batch_number3 = 277
td2=BatchFilterDataset(root_dir=data_dir, batch_number=batch_number3, transform=transform)

In [None]:
class_counts = {}
for _, label in td:
    class_counts[label] = class_counts.get(label, 0) + 1

# Print the class counts
for class_label, count in class_counts.items():
    print(f"Class {class_label}: {count} images")

Class 0: 442 images
Class 1: 458 images


In [None]:
from torch.utils.data.dataset import ConcatDataset

train_set_combined = ConcatDataset([train_dataset, train_set_others])

In [None]:
train_set_combined=ConcatDataset([train_set_combined,td])
train_set_combined=ConcatDataset([train_set_combined,train_set_others3])
train_set_combined=ConcatDataset([train_set_combined,tdm])

In [None]:
train_set_combined=ConcatDataset([train_set_combined,td2])

In [None]:
train_loader = DataLoader(train_set_combined, batch_size=32, shuffle=True, num_workers=2)

In [None]:
#weights = make_weights_for_balanced_classes(train_dataset,2)
#sampler = WeightedRandomSampler(weights, len(train_dataset), replacement=True)

In [None]:
class_counts = {}
for _, label in train_set_combined:
    class_counts[label] = class_counts.get(label, 0) + 1

# Print the class counts
for class_label, count in class_counts.items():
    print(f"Class {class_label}: {count} images")

Class 0: 1868 images
Class 1: 4446 images


In [None]:
train_dataset[0][0].shape

torch.Size([3, 224, 224])

In [None]:
val_dataset[0][0].shape

torch.Size([3, 224, 224])

In [None]:
test_dataset[0][0].shape

torch.Size([3, 224, 224])

In [None]:
model=torchvision.models.alexnet(pretrained=True)

for param in model.features.parameters():
    param.requires_grad=False





In [None]:

num_ftrs = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_ftrs, 2)


In [None]:
print(model)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [None]:
total_samples=len(train_dataset)
num_classes=2
class_weights = [total_samples / (num_classes * class_counts[i]) for i in range(num_classes)]
class_weights = torch.tensor(class_weights, dtype=torch.float32)
#class_weights.to(device)
# Define loss function with class weights
#criterion = nn.CrossEntropyLoss(weight=class_weights)

In [None]:

#criterion=torch.nn.CrossEntropyLoss()
#criterion=torch.nn.functional.cross_entropy
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_weights=class_weights.to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)

#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=model.to(device)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


In [None]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    class_correct_train = [0] * 2
    class_total_train = [0] * 2
#    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    for i, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        inputs, labels = inputs.to(device), labels.to(device)
#        class_weights=class_weights.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        for label, prediction in zip(labels, predicted):
            if label == prediction:
                class_correct_train[label] += 1
            class_total_train[label] += 1

        if (i+1) % 100 == 0:
           # print(f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
           print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))
    scheduler.step()
    epoch_loss = running_loss / len(train_dataset)
    train_acc = correct / total
#    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss:.4f}, Train Accuracy: {train_acc:.4f}')
    print('Epoch [{}/{}], Train Loss: {:.4f}, Train Accuracy: {:.4f}'.format(epoch+1, num_epochs, epoch_loss, train_acc))

    model.eval()
    correct = 0
    total = 0
    class_correct_val = [0] * 2
    class_total_val = [0] * 2
    with torch.no_grad():
        for i, (inputs,labels) in enumerate(val_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            for label, prediction in zip(labels, predicted):
                if label == prediction:
                    class_correct_val[label] += 1
                class_total_val[label] += 1

    val_acc = correct / total
    print('Epoch [{}/{}], Validation Accuracy: {:.4f}'.format(epoch+1, num_epochs, val_acc))
    scheduler.step()
print('Training finished.')

correct=0
total=0
true_positives = 0
false_positives = 0
false_negatives = 0
class_correct = [0] * 2
class_total = [0] * 2
model.eval()
with torch.no_grad():
    for i, (inputs,labels) in enumerate(test_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)

        # Get predictions
        _, predicted = torch.max(outputs, 1)


        # Update counts
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        for label, prediction in zip(labels, predicted):
            if label == prediction:
                class_correct[label] += 1
            class_total[label] += 1

        true_positives += ((predicted == 1) & (labels == 1)).sum().item()
        false_positives += ((predicted == 1) & (labels == 0)).sum().item()
        false_negatives += ((predicted == 0) & (labels == 1)).sum().item()
accuracy = correct / total
print(correct)
print(total)
print('Test Accuracy: {:.4f}%'.format(accuracy))

precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0

# Calculate Recall
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0

# Calculate F1-score
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

print("Correct:", correct)
print("Total:", total)
print('Test Accuracy: {:.4f}%'.format(accuracy))
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1_score)

print("Class Accuracies - Training:")
for i in range(2):
        class_accuracy_train = class_correct_train[i] / class_total_train[i] if class_total_train[i] > 0 else 0
        print('Class {} Accuracy: {:.4f}%'.format(i, class_accuracy_train))

print("Class Accuracies - Validation:")
for i in range(2):
        class_accuracy_val = class_correct_val[i] / class_total_val[i] if class_total_val[i] > 0 else 0
        print('Class {} Accuracy: {:.4f}%'.format(i, class_accuracy_val))

print("Class Accuracies - Test:")
for i in range(2):
    class_accuracy = class_correct[i] / class_total[i] if class_total[i] > 0 else 0
    print('Class {} Accuracy: {:.4f}%'.format(i, class_accuracy))


Epoch [1/10], Batch [100/198], Loss: 0.3108
Epoch [1/10], Train Loss: 2.5475, Train Accuracy: 0.8470
Epoch [1/10], Validation Accuracy: 0.8107
Epoch [2/10], Batch [100/198], Loss: 0.2446
Epoch [2/10], Train Loss: 2.0833, Train Accuracy: 0.8830
Epoch [2/10], Validation Accuracy: 0.8149
Epoch [3/10], Batch [100/198], Loss: 0.1855
Epoch [3/10], Train Loss: 1.8890, Train Accuracy: 0.8926
Epoch [3/10], Validation Accuracy: 0.8160
Epoch [4/10], Batch [100/198], Loss: 0.3300
Epoch [4/10], Train Loss: 1.6975, Train Accuracy: 0.9009
Epoch [4/10], Validation Accuracy: 0.8137
Epoch [5/10], Batch [100/198], Loss: 0.2029
Epoch [5/10], Train Loss: 1.5314, Train Accuracy: 0.9157
Epoch [5/10], Validation Accuracy: 0.8168
Epoch [6/10], Batch [100/198], Loss: 0.3875
Epoch [6/10], Train Loss: 1.5179, Train Accuracy: 0.9169
Epoch [6/10], Validation Accuracy: 0.8175
Epoch [7/10], Batch [100/198], Loss: 0.1873
Epoch [7/10], Train Loss: 1.5045, Train Accuracy: 0.9167
Epoch [7/10], Validation Accuracy: 0.8159