In [7]:
import h5py
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from netcal.metrics import ECE

# Import models
from parse_args import collect_args

from models.CFair import CFair
from models.DomainInd import DomainInd
from models.EnD import EnD
from models.GroupDRO import GroupDRO
from models.LAFTR import LAFTR
from models.LNL import LNL
from models.ODR import ODR
from models.resampling import resampling
from models.SWAD import SWAD

In [8]:
# بارگذاری داده‌ها
path_to_data = 'BrEaST-Lesions-USG-clinical-data-Dec-15-2023.xlsx'
demo_data = pd.read_excel(path_to_data)

opt, wandb = collect_args()

usage: ipykernel_launcher.py [-h]
                             [--experiment {baseline,CFair,LAFTR,LNL,EnD,DomainInd,resampling,ODR,SWA,SWAD,SAM,GSAM,SAMSWAD,GroupDRO,BayesCNN,resamplingSWAD}]
                             [--experiment_name EXPERIMENT_NAME]
                             [--wandb_name WANDB_NAME] [--if_wandb IF_WANDB]
                             [--dataset_name {CXP,NIH,MIMIC_CXR,RadFusion,RadFusion4,HAM10000,HAM100004,Fitz17k,OCT,PAPILA,ADNI,ADNI3T,COVID_CT_MD,RadFusion_EHR,MIMIC_III,eICU}]
                             [--resume_path RESUME_PATH]
                             [--sensitive_name {Sex,Age,Race,skin_type,Insurance}]
                             [--is_3d IS_3D] [--is_tabular IS_TABULAR]
                             [--random_seed RANDOM_SEED]
                             [--batch_size BATCH_SIZE] [--no_cuda] [--lr LR]
                             [--weight_decay WEIGHT_DECAY]
                             [--lr_decay_rate LR_DECAY_RATE]
                      

SystemExit: 2

In [21]:
# Define age bins
age_bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
age_labels = ['0-10', '11-20', '21-30', '31-40', '41-50', '51-60', '61-70', '71-80', '81-90', '91-100']
demo_data['Age_Category'] = pd.cut(demo_data['Age'], bins=age_bins, labels=age_labels, right=False)

In [22]:
# تقسیم داده‌ها به بخش‌های آموزش، اعتبارسنجی و تست
train_data, test_data = train_test_split(demo_data, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42)  # 0.25 * 0.8 = 0.2

# ایجاد DataFrame های مربوط به بخش‌های مختلف
sub_train = train_data.reset_index(drop=True)
sub_val = val_data.reset_index(drop=True)
sub_test = test_data.reset_index(drop=True)

In [23]:
# Custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['Path']
        image = Image.open(img_path).convert('RGB')
        label = self.dataframe.iloc[idx]['binaryLabel']
        protected_attr = self.dataframe.iloc[idx]['Age_Category']

        if self.transform:
            image = self.transform(image)

        protected_attr = torch.tensor(protected_attr)

        return image, label, protected_attr

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [24]:

# Load data
train_dataset = CustomDataset(dataframe=sub_train, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [25]:
# Define fairness metrics calculation
def calculate_fairness_metrics(y_true, y_pred, protected_attr):
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()

    FPR = fp / (fp + tn)
    FNR = fn / (fn + tp)
    TPR = tp / (tp + fn)
    TNR = tn / (tn + fp)
    ece = ECE().measure(y_pred, y_true)

    metrics = {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred),
        'Recall': recall_score(y_true, y_pred),
        'F1-Score': f1_score(y_true, y_pred),
        'BCE': (FPR + FNR) / 2,
        'ECE': ece,
        'TPR@80': TPR,
        'TNR': TNR,
        'FPR': FPR,
        'FNR': FNR,
        'EqOdd': (FPR + FNR) / 2  # EqOdd شبیه‌سازی با استفاده از BCE
    }
    
    return metrics

In [27]:
# Train and evaluate models
model_names = ['CFair', 'DomainInd', 'EnD', 'GroupDRO', 'LAFTR', 'LNL', 'ODR', 'Resampling', 'SWAD']
models_list = [CFair(), DomainInd(), EnD(), GroupDRO(), LAFTR(), LNL(), ODR(), resampling(), SWAD()]

final_results = pd.DataFrame()

for model_name, model in zip(model_names, models_list):
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    trained_model = train_model(model, {'train': train_loader}, criterion, optimizer, opt, num_epochs=5)
    
    for age_group in demo_data['Age_Category'].unique():
        group_data = demo_data[demo_data['Age_Category'] == age_group]
        y_true = group_data['binaryLabel']
        y_pred = trained_model.predict(group_data['Path'])  # فرض کنید مدل این تابع را داشته باشد
        metrics = calculate_fairness_metrics(y_true, y_pred, age_group)
        metrics['Model'] = model_name
        metrics['Group'] = f'Grp. {age_group}'
        metrics_df = pd.DataFrame([metrics])
        final_results = pd.concat([final_results, metrics_df], ignore_index=True)

TypeError: __init__() missing 2 required positional arguments: 'opt' and 'wandb'

In [16]:
# Display final results
final_results = final_results.pivot(index='Model', columns='Group')
print(final_results)

# Save final results to CSV
final_results.to_csv('final_results.csv')

NameError: name 'model_names' is not defined