In [5]:
import h5py
import pandas as pd
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split
import pickle
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import argparse
import json
import hashlib
from sklearn.metrics import f1_score, precision_score, recall_score, roc_curve, confusion_matrix
from netcal.metrics import ECE
import sys

import sys
import os

# Add the current directory (or the parent directory) to PYTHONPATH
sys.path.append(os.path.abspath(os.getcwd()))

# Import methods from the MEDFAIR project
from models.CFair import CFairModel

from models.erm import ERMModel
from models.laftr import LAFTRModel
from models.CFair import CFairModel
from models.resample import ResampleModel
from models.domainlnd import DomainLndModel
from models.lnl import LNLModel
from models.end import EnDModel
from models.odr import ODRModel
from models.groupdro import GroupDROModel
from models.swad import SWADModel

ImportError: cannot import name 'CFairModel' from 'models.CFair' (/Users/amir/PycharmProjects/Medfair/MEDFAIR/models/CFair/__init__.py)

In [3]:

# Define paths and read dataset
path = '/Users/amir/PycharmProjects/Medfair/MEDFAIR/'
demo_data = pd.read_excel(path + 'BrEaST-Lesions-USG-clinical-data-Dec-15-2023.xlsx')
images_path = os.path.join(path, '/MEDFAIR/BrEaST-Lesions_USG-images_and_masks/')
pathlist = demo_data['Image_filename'].values.tolist()
paths = ['/Users/amir/PycharmProjects/Medfair/MEDFAIR/BrEaST-Lesions_USG-images_and_masks/' + i for i in pathlist]
demo_data['Path'] = paths

# Define the attribute for evaluation (choose 'age', 'sex', or 'shape')
selected_attribute = 'age'  # or 'sex' or 'shape'

# Preprocess data based on the selected attribute
if selected_attribute == 'age':
    demo_data = demo_data[~demo_data['Age'].isnull()]
    age_bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    age_labels = ['0-10', '11-20', '21-30', '31-40', '41-50', '51-60', '61-70', '71-80', '81-90', '91-100']
    demo_data['Category'] = pd.cut(demo_data['Age'], bins=age_bins, labels=age_labels, right=False)
    attribute_mapping = {label: idx for idx, label in enumerate(age_labels)}
elif selected_attribute == 'sex':
    demo_data = demo_data[~demo_data['Sex'].isnull()]
    demo_data['Category'] = demo_data['Sex']
    attribute_mapping = {'Male': 0, 'Female': 1}
elif selected_attribute == 'shape':
    demo_data = demo_data[~demo_data['Shape'].isnull()]
    demo_data['Category'] = demo_data['Shape']
    attribute_mapping = {shape: idx for idx, shape in enumerate(demo_data['Shape'].unique())}

demo_data['Category_Code'] = demo_data['Category'].map(attribute_mapping)

# Custom Dataset Class
class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['Path']
        image = Image.open(img_path).convert('RGB')
        label = self.dataframe.iloc[idx]['binaryLabel']
        protected_attr = self.dataframe.iloc[idx]['Category_Code']

        if self.transform:
            image = self.transform(image)

        protected_attr = torch.tensor(protected_attr)

        return image, label, protected_attr

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Split data
sub_train, sub_val, sub_test = split_811(demo_data, np.unique(demo_data['CaseID']))
sub_train.to_csv('/Users/amir/PycharmProjects/Medfair/MEDFAIR/split/new_train.csv')
sub_val.to_csv('/Users/amir/PycharmProjects/Medfair/MEDFAIR/split/new_val.csv')
sub_test.to_csv('/Users/amir/PycharmProjects/Medfair/MEDFAIR/split/new_test.csv')

train_dataset = CustomDataset(dataframe=sub_train, transform=transform)
val_dataset = CustomDataset(dataframe=sub_val, transform=transform)
test_dataset = CustomDataset(dataframe=sub_test, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define and initialize models
models_list = [
    ERMModel(base_model=models.resnet18(pretrained=True)), 
    ResampleModel(base_model=models.resnet18(pretrained=True)),
    DomainLndModel(base_model=models.resnet18(pretrained=True)),
    LAFTRModel(base_model=models.resnet18(pretrained=True)),
    CFairModel(base_model=models.resnet18(pretrained=True)),
    LNLModel(base_model=models.resnet18(pretrained=True)),
    EnDModel(base_model=models.resnet18(pretrained=True)),
    ODRModel(base_model=models.resnet18(pretrained=True)),
    GroupDROModel(base_model=models.resnet18(pretrained=True)),
    SWADModel(base_model=models.resnet18(pretrained=True))
]

model_names = [
    'ERM', 'Resample', 'DomainLnd', 'LAFTR', 'CFair', 
    'LNL', 'EnD', 'ODR', 'GroupDRO', 'SWAD'
]

# Training and evaluation
final_results = pd.DataFrame()

for model_name, model in zip(model_names, models_list):
    print(f"Training and evaluating {model_name} on {selected_attribute}...")
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    trained_model = train_model(model, {'train': train_loader, 'val': val_loader}, criterion, optimizer, opt, num_epochs=5)
    fairness_metrics = evaluate_model(trained_model, test_loader, opt)
    
    for group, metrics in fairness_metrics.items():
        metrics['Model'] = model_name
        metrics['Group'] = f'Grp. {group}'
        metrics['Attribute'] = selected_attribute
        metrics_df = pd.DataFrame([metrics])
        final_results = pd.concat([final_results, metrics_df], ignore_index=True)

# Sort and display the final results
final_results = final_results.sort_values(by=['Model', 'Group'])
print(final_results)

# Export the results to a CSV file
final_results.to_csv(f'/path/to/save/final_results_{selected_attribute}.csv', index=False)

NameError: name '__file__' is not defined

In [3]:
pip install torchio

Collecting torchio
  Downloading torchio-0.19.9-py2.py3-none-any.whl.metadata (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.9/49.9 kB[0m [31m464.5 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting Deprecated (from torchio)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl.metadata (5.4 kB)
Collecting SimpleITK!=2.0.*,!=2.1.1.1 (from torchio)
  Downloading SimpleITK-2.3.1-cp39-cp39-macosx_11_0_arm64.whl.metadata (7.9 kB)
Collecting humanize (from torchio)
  Downloading humanize-4.10.0-py3-none-any.whl.metadata (7.9 kB)
Collecting nibabel (from torchio)
  Downloading nibabel-5.2.1-py3-none-any.whl.metadata (8.8 kB)
Collecting typer[all] (from torchio)
  Downloading typer-0.12.3-py3-none-any.whl.metadata (15 kB)
Collecting wrapt<2,>=1.10 (from Deprecated->torchio)
  Downloading wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl.metadata (6.6 kB)
[0mCollecting click>=8.0.0 (from typer[all]->torchio)
  Downloading click-8.1.7-py3-none-any.whl.metad