In [1]:
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
%pip install scikit-learn
%pip install matplotlib

Looking in indexes: https://download.pytorch.org/whl/cu118
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils import resample
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np

In [3]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transformations for the dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [4]:
# Define dataset paths
train_data_path = 'C:\\projects\\BloodNets\\blood_data\\train'
test_data_path = 'C:\\projects\\BloodNets\\blood_data\\test'

# Load datasets
train_data = datasets.ImageFolder(train_data_path, transform=transform)
test_data = datasets.ImageFolder(test_data_path, transform=transform)


In [17]:
# Define and initialize models
def initialize_model(model_name, num_classes):
    if model_name == 'shufflenet':
        model = models.shufflenet_v2_x1_0(pretrained=True)
        num_features = model.fc.in_features
        model.fc = nn.Linear(num_features, num_classes)
    elif model_name == 'mobilenet':
        model = models.mobilenet_v2(pretrained=True)
        num_features = model.classifier[1].in_features
        model.classifier[1] = nn.Linear(num_features, num_classes)
    elif model_name == 'resnet50':
        model = models.resnet50(pretrained=True)
        num_features = model.fc.in_features
        model.fc = nn.Linear(num_features, num_classes)
    else:
        raise ValueError(f"Unsupported model name: {model_name}")
    return model


In [18]:
# Training function
def train(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0.0
    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * inputs.size(0)
    return total_loss / len(loader.dataset)

In [19]:
# Testing function
def test(model, loader, criterion, device):
    model.eval()
    total_loss = 0.0
    all_labels, all_preds = [], []
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    metrics = {
        'loss': total_loss / len(loader.dataset),
        'accuracy': accuracy_score(all_labels, all_preds),
        'precision': precision_score(all_labels, all_preds, average='weighted'),
        'recall': recall_score(all_labels, all_preds, average='weighted'),
        'f1_score': f1_score(all_labels, all_preds, average='weighted')
    }
    return metrics

In [20]:
# Define a custom boosting class
class DeepBoosting:
    def __init__(self, models, n_estimators=3):
        self.models = models
        self.n_estimators = n_estimators
        self.model_weights = np.zeros(len(models))
        
    def fit(self, X_train):
        sample_weights = np.ones(len(X_train)) / len(X_train)
        
        for i, model in enumerate(self.models):
            train_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, len(sample_weights))
            train_loader = torch.utils.data.DataLoader(X_train, batch_size=32, sampler=train_sampler)
            optimizer = optim.Adam(model.parameters(), lr=0.001)
            criterion = nn.CrossEntropyLoss()
            
            for epoch in range(5):  # Adjust number of epochs as needed
                train_loss = train(model, train_loader, criterion, optimizer, device)
                print(f"Training model {i + 1} - Epoch {epoch + 1}/{5} - Loss: {train_loss:.4f}")

            # Calculate errors and update sample weights
            all_labels, all_preds = [], []
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(preds.cpu().numpy())
            
            errors = np.array(all_labels) != np.array(all_preds)
            err_m = np.mean(errors)
            self.model_weights[i] = np.log((1 - err_m) / err_m)
            sample_weights = sample_weights * np.exp(self.model_weights[i] * errors)
            sample_weights /= np.sum(sample_weights)
        
    def predict(self, X_test):
        all_preds = np.zeros((len(X_test), len(self.models)))
        for i, model in enumerate(self.models):
            test_loader = torch.utils.data.DataLoader(X_test, batch_size=32)
            model.eval()
            preds = []
            with torch.no_grad():
                for inputs, _ in test_loader:
                    inputs = inputs.to(device)
                    outputs = model(inputs)
                    _, pred = torch.max(outputs, 1)
                    preds.extend(pred.cpu().numpy())
            all_preds[:, i] = preds
        
        weighted_preds = np.average(all_preds, axis=1, weights=self.model_weights)
        return np.round(weighted_preds).astype(int)

In [21]:
# Initialize models and DeepBoosting
model_names = ['shufflenet', 'mobilenet', 'resnet50']
models_list = [initialize_model(name, len(train_data.classes)).to(device) for name in model_names]

# Initialize DeepBoosting
boosting_model = DeepBoosting(models=models_list, n_estimators=len(models_list))

# Train and evaluate
boosting_model.fit(train_data)

# Predict and evaluate
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32)
all_labels = np.array([label for _, label in test_loader.dataset.samples])
preds = boosting_model.predict(test_loader)



Training model 1 - Epoch 1/5 - Loss: 0.6149
Training model 1 - Epoch 2/5 - Loss: 0.1471
Training model 1 - Epoch 3/5 - Loss: 0.1014
Training model 1 - Epoch 4/5 - Loss: 0.0885
Training model 1 - Epoch 5/5 - Loss: 0.0682
Training model 2 - Epoch 1/5 - Loss: 0.2526
Training model 2 - Epoch 2/5 - Loss: 0.1254
Training model 2 - Epoch 3/5 - Loss: 0.0845
Training model 2 - Epoch 4/5 - Loss: 0.0898
Training model 2 - Epoch 5/5 - Loss: 0.0862
Training model 3 - Epoch 1/5 - Loss: 0.3485
Training model 3 - Epoch 2/5 - Loss: 0.1877
Training model 3 - Epoch 3/5 - Loss: 0.1252
Training model 3 - Epoch 4/5 - Loss: 0.1072
Training model 3 - Epoch 5/5 - Loss: 0.0758


TypeError: 'DataLoader' object is not subscriptable

In [None]:
# Evaluate metrics
accuracy = accuracy_score(all_labels, preds)
precision = precision_score(all_labels, preds, average='weighted')
recall = recall_score(all_labels, preds, average='weighted')
f1 = f1_score(all_labels, preds, average='weighted')

print(f"Boosting Ensemble Metrics\nAccuracy: {accuracy:.4f}\nPrecision: {precision:.4f}\nRecall: {recall:.4f}\nF1 Score: {f1:.4f}")