## Imports

In [104]:
import pandas as pd
from pandas import DataFrame, Series

import numpy as np

# import ast
from typing import Literal

## AHP

In [105]:
relations_BM = [
    [1,     3,     3/2,   9,     7,     9],
    [1/3,   1,     1/3,   3,     3,     5],
    [2/3,   3,     1,     7,     5,     7],
    [1/9,   1/9,   1/7,   1,     1,     3],
    [1/7,   1/7,   1/5,   1,     1,     3],
    [1/9,   1/5,   1/7,   1/3,   1/3,   1]
]

relations_PB = [
    [1,     3,     3/2,   9,     7,     9],
    [1/3,   1,     1/3,   3,     3,     5],
    [2/3,   3,     1,     7,     5,     7],
    [1/9,   1/9,   1/7,   1,     1,     3],
    [1/9,   1/5,   1/7,   1/3,   1/3,   1], 
    [1/7,   1/7,   1/5,   1,     1,     3]    
]

ordered_columns = [
    'HLA_match',
    'CMV_serostatus',
    'donor_age_group',
    'gender_match',
    'ABO_match',
    'expected_survival_time'
]

In [106]:
def compute_weights(data_AHP: DataFrame):
    column_sums = data_AHP.sum(axis=0)

    data_AHP_normalized = data_AHP / column_sums

    weights = data_AHP_normalized.mean(axis=1)

    return np.array(weights)

In [107]:
def get_criteria_weights_AHP(stem_cell_source: Literal["bone marrow", "pheripheral blood"]):
     if stem_cell_source == "bone marrow":
          relation_matrix = relations_BM
     else:
          relation_matrix = relations_PB
         
     data_AHP = DataFrame(relation_matrix, index=ordered_columns, columns=ordered_columns)
    
     weights = compute_weights(data_AHP)

     return weights

## TOPSIS

In [108]:
def normalize_matrix(matrix):
    matrix_rows = len(matrix)
    matrix_columns = len(matrix[0])
    column_sums = [0] * matrix_columns

    for j in range(matrix_columns):
        for i in range(matrix_rows):
            column_sums[j] += matrix[i][j] ** 2
    column_sums = [value ** 0.5 for value in column_sums]

    normalized_matrix = []
    for i in range(matrix_rows):
        normalized_matrix_rows = []
        for j in range(matrix_columns):
            normalized_matrix_rows.append(matrix[i][j]/column_sums[j])
        normalized_matrix.append(normalized_matrix_rows)
    
    return normalized_matrix

In [109]:
def weight_matrix(weights, matrix_normalized):
    rows_normalized = len(matrix_normalized)
    cols_normalized = len(matrix_normalized[0])
    
    matrix_weighted = []
    for i in range(rows_normalized):
        matrix_weighted_rows = []
        for j in range(cols_normalized):
            matrix_weighted_rows.append(matrix_normalized [i][j]* weights[j])
        matrix_weighted.append(matrix_weighted_rows)

    return matrix_weighted

In [110]:
def ideal_best_worst(matrix_weighted, impact):
    weighted_column = len(matrix_weighted[0])
    positive_ideal= [] 
    negative_ideal = [] 

    for j in range(weighted_column):
        max_value = matrix_weighted[0][j]
        min_value = matrix_weighted[0][j]

        for i in range(len(matrix_weighted)):
            if matrix_weighted[i][j] > max_value:
                max_value = matrix_weighted [i][j]
            if matrix_weighted[i][j] < min_value:
                min_value = matrix_weighted [i][j]
        if impact[j] == 1:
            positive_ideal.append(max_value)
            negative_ideal.append(min_value)
        else:
            positive_ideal.append(min_value)
            negative_ideal.append(max_value)

    return positive_ideal, negative_ideal

In [111]:
def deviation_from_ideal(matrix_weighted, positive_ideal, negative_ideal):
    rows_weighted = len(matrix_weighted)
    positive_deviation = []
    negative_deviation = []

    for i in range(rows_weighted):
        pos_sep = 0
        neg_sep = 0
        
        for j in range(len(positive_ideal)):
            pos_sep += (matrix_weighted[i][j] - positive_ideal[j]) ** 2
            neg_sep += (matrix_weighted[i][j] - negative_ideal[j]) ** 2

        positive_deviation.append(pos_sep ** 0.5)
        negative_deviation.append(neg_sep ** 0.5)

    return positive_deviation, negative_deviation

In [112]:
def similarity_to_PIS(positive_separation, negative_separation):
    num_rows = len(positive_separation)
    relative_similarity = []

    for i in range(num_rows):
        pos_sep = positive_separation[i]
        neg_sep = negative_separation[i]
        similarity = neg_sep/(pos_sep + neg_sep)
        relative_similarity.append(similarity)

    return relative_similarity

In [113]:
def get_deviation_from_ideal_col_TOPSIS(data_criteria_encoded: DataFrame, stem_cell_source: Literal["bone marrow", "pheripheral blood"]):
    ordered_columns = [
        "HLA_match",
        "CMV_serostatus",
        "donor_age_group",
        "gender_match",
        "ABO_match",
        "expected_survival_time"
    ]

    data_criteria_encoded = data_criteria_encoded.loc[:, ordered_columns].copy()

    impact = np.array([1, -1, 1, 1, 1, 1])

    weights = get_criteria_weights_AHP(stem_cell_source)

    criteria_matrix = data_criteria_encoded.to_numpy()
    
    matrix_normalized = normalize_matrix(criteria_matrix)
    matrix_weighted = weight_matrix(weights, matrix_normalized)
    
    pos_ideal, neg_ideal = ideal_best_worst(matrix_weighted, impact)
    pos_sep, neg_sep = deviation_from_ideal(matrix_weighted, pos_ideal, neg_ideal)
    deviations = similarity_to_PIS(pos_sep, neg_sep)

    deviation_from_ideal_col = Series(deviations, name="deviation_from_ideal")
    return deviation_from_ideal_col

## Testing

In [118]:
%run ../dataset-manipulation/data-utils.ipynb

donors_CSV_PATH = "../../../datasets/raw/donor_list_raw.csv"
recipient_CSV_PATH = "../../../datasets/raw/recipient_waiting_list_raw.csv"

df_recipients = read_df(recipient_CSV_PATH)
df_donors = read_df(donors_CSV_PATH)

data_aggregated = aggregate_data("IR001", df_recipients, df_donors)
data_criteria_encoded = encode_data(data_aggregated)
data_criteria_encoded["expected_survival_time"] = 1
data_criteria_encoded["deviation_from_ideal"] = get_deviation_from_ideal_col_TOPSIS(data_criteria_encoded, "bone marrow")

encode_data(data_criteria_encoded, mode="decode")

Unnamed: 0,donor_id,donor_age,donor_ABO,donor_CMV,donor_gender,donor_tissue_type,donor_name,recipient_id,recipient_age,recipient_ABO,...,antigen,CMV_serostatus,gender_match,ABO_match,disease_group,donor_age_below_35,recipient_age_below_10,HLA_mismatch,expected_survival_time,deviation_from_ideal
0,ID001,32,A,absent,male,"[['A*01:01','A*02:01'],['B*08:01','B*35:01'],[...",Afonso Miguel Torres Lima,IR001,8.2,A,...,0,2,matched,matched,malignant,yes,yes,matched,1,1.0
1,ID002,45,B,present,female,"[['A*24:02','A*03:01'],['B*07:02','B*44:02'],[...",Ana Margarida Lousada Pinto,IR001,8.2,A,...,5,3,matched,mismatched,malignant,no,yes,mismatched,1,0.411115
2,ID003,28,O,absent,male,"[['A*02:01','A*11:01'],['B*15:01','B*51:01'],[...",Gonï¿½alo Tiago Miguï¿½is Pï¿½voas,IR001,8.2,A,...,2,2,matched,matched,malignant,yes,yes,mismatched,1,0.281867
3,ID004,51,AB,present,female,"[['A*29:02','A*30:01'],['B*44:03','B*18:01'],[...",Maria Joana Calheiros Rocha,IR001,8.2,A,...,7,3,matched,mismatched,malignant,no,yes,mismatched,1,0.0
4,ID005,39,A,absent,female,"[['A*01:01','A*26:01'],['B*57:01','B*38:01'],[...",Beatriz Inï¿½s Valenï¿½a Ribeiro,IR001,8.2,A,...,4,2,matched,matched,malignant,no,yes,mismatched,1,0.26022
5,ID006,22,O,present,male,"[['A*24:02','A*02:01'],['B*07:02','B*44:02'],[...",Daniel Rui Espinheira Barroso,IR001,8.2,A,...,3,3,matched,matched,malignant,yes,yes,mismatched,1,0.639499
6,ID007,48,B,absent,male,"[['A*03:01','A*68:01'],['B*35:01','B*53:01'],[...",Miguel Afonso Norberto Veiga,IR001,8.2,A,...,3,2,matched,mismatched,malignant,no,yes,mismatched,1,0.194015
7,ID008,35,A,present,female,"[['A*11:01','A*23:01'],['B*40:01','B*49:01'],[...",Rita Sofia Castanheira Lopes,IR001,8.2,A,...,5,3,matched,matched,malignant,no,yes,mismatched,1,0.279925
8,ID009,41,O,absent,male,"[['A*02:01','A*32:01'],['B*13:02','B*27:05'],[...",Luï¿½s Filipe Antï¿½o Barata,IR001,8.2,A,...,5,2,matched,matched,malignant,no,yes,mismatched,1,0.170395
9,ID010,30,AB,present,female,"[['A*01:01','A*29:02'],['B*08:01','B*44:02'],[...",Liliana Inï¿½s Gouveia Monteiro,IR001,8.2,A,...,6,3,matched,mismatched,malignant,yes,yes,mismatched,1,0.408533
