# GPUs testing

In [None]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.device_count())
device = torch.cuda.current_device()
cuda = torch.device("cuda:0")
print(torch.cuda.nccl.version())

# Federated learning

In [None]:
%run main.py

In [None]:
%run dataset.py

In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
X, y = load_iris(return_X_y=True)
clf = LogisticRegression(solver="liblinear").fit(X, y)
out = roc_auc_score(y, clf.predict_proba(X), multi_class='ovr') #(150,),(150,3)
print("y", print(type(y)))
print("clf_X", print(type(out)))
print("out", out)

In [None]:
import torch
import torch.nn as nn
from torchvision import models

class Net(nn.Module):
    def __init__(self, num_classes: int) -> None:
        super(Net, self).__init__()
        self.model = models.resnet18(pretrained=True)
        for param in self.model.parameters():
            param.requires_grad = False

        self.input_features = self.model.fc.in_features
        self.model.fc = nn.Linear(self.input_features, num_classes)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.model(x)

        return x 
    
a = Net(3)
simu_in = torch.randn(4, 3, 224, 224)
print(a(simu_in).shape)

params_dict = zip(a.state_dict().keys(), parameters)
state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
a.load_state_dict(state_dict, strict=True) 

# Preprocess

In [None]:
%run preprocessing/divide_class.py

In [None]:
%run preprocessing/generate_json.py

In [None]:
import os
import numpy as np
import random
import json

def allocate_data_dirichlet(labels, num_classes, num_clients, alpha, train_ratio, val_ratio, test_ratio):
    """
    Allocate data to clients using Dirichlet distribution.
    
    :param labels: Array of data labels.
    :param num_clients: Number of clients to distribute data across.
    :param alpha: Concentration parameter for the Dirichlet distribution.
    :return: A list of indices for each client representing their data.
    """                                  
    # Generating proportions for each class across clients
    class_proportions = np.random.dirichlet([alpha]*num_clients, num_classes)  #[[0.2 0.8],[0.1 0.9],[0.5 0.5]]
    client_data_indices = [[[],[],[]] for _ in range(num_clients)]                     #[[] []]
    for class_label in range(num_classes):
        sub_dict = labels[class_label]        
        sub_dict = list(sub_dict.items())
        random.shuffle(sub_dict)
        proportions = class_proportions[class_label]                           #[0.2 0.8]
        allocations = np.round(proportions * len(sub_dict)).astype(int)
        # Ensure that rounding doesn't cause more allocations than available samples
        allocations[-1] = len(sub_dict) - np.sum(allocations[:-1])        #[2 3]
        print("allocations", allocations)
        # Allocate data based on calculated proportions
        start = 0
        #print(sub_dict)
        for client_id, allocation in enumerate(allocations):                   #client_id=0|allocation=2            
            client_data_indices[client_id][0].extend(sub_dict[start:(start+round(allocation*train_ratio))])
            client_data_indices[client_id][1].extend(sub_dict[(start+round(allocation*train_ratio)):(start+round(allocation*(train_ratio+val_ratio)))])
            client_data_indices[client_id][2].extend(sub_dict[(start+round(allocation*(train_ratio+val_ratio))):(start+allocation)])
            start += allocation
            
    return client_data_indices

file_list = []
num_classes = 3
num_clients = 2
alpha = 1.5
train_ratio = 0.7
val_ratio = 0.1
test_ratio = 0.2
for cls in range(num_classes):
    file_dict = {}
    files = [s.split(".")[0] for s in os.listdir(os.path.join("isic2017",str(cls))) if '.jpg' in s]
    for file in files:
        file_dict[file] = cls
    file_list.append(file_dict)

clients = allocate_data_dirichlet(file_list, num_classes, num_clients, alpha, train_ratio, val_ratio, test_ratio)

json_data = {}
for num in range(num_clients):
    json_data["client "+str(num+1)] = {}
    json_data["client "+str(num+1)]["train"] = []
    json_data["client "+str(num+1)]["val"] = []
    json_data["client "+str(num+1)]["test"] = []
    
    print("train_len", len(clients[num][0]))
    print("val_len", len(clients[num][1]))
    print("test_len", len(clients[num][2]))
    
    for (name, _) in clients[num][0]: 
        json_data["client "+str(num+1)]["train"].append(str(name)+".jpg")
    for (name, _) in clients[num][1]: 
        json_data["client "+str(num+1)]["val"].append(str(name)+".jpg")
    for (name, _) in clients[num][2]: 
        json_data["client "+str(num+1)]["test"].append(str(name)+".jpg")        

with open("FL_divide.json", 'w') as file:
    json.dump(json_data, file, indent=4)        


In [None]:
import numpy as np 
  
arr = np.array([]) 
#arr = np.hstack((arr, np.array(['G', 'F', 'G']))) 
print(arr.size) 
  
arr = np.vstack((arr, np.array(['G', 'F', 'G']))) 
print(arr.shape) 
arr = np.vstack((arr, np.array(['G', 'F', 'G']))) 
print(arr.shape) 

In [None]:
read_csv = open("isic2019/ISIC_2019_Training_GroundTruth.csv").read().splitlines()
file_dict = {}
for line in read_csv[1:]:
    filename,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK = line.strip().split(",") 
    if UNK == "0.0":
        print("!")