In [3]:
import pandas as pd
import numpy as np

In [4]:
# Read CSV data from files
df1 = pd.read_csv('labels.csv', index_col=0)
df2 = pd.read_csv('data.csv', index_col=0)

In [5]:
# Merge DataFrames based on index (sample names)
merged_df = pd.merge(df1, df2, left_index=True, right_index=True)

In [33]:
def euclidean_distance(df1, df2):
    return np.sqrt(((df1 - df2) ** 2).sum())

In [34]:
def mahalanobis_distance(df1, df2):
    if df1.shape[0] == 1 or df2.shape[0] == 1:
        return 0  # Distance nulle pour une classe avec un seul échantillon

    cov_matrix = np.cov(df1, rowvar=False)

    try:
        np.linalg.cholesky(cov_matrix)
    except np.linalg.LinAlgError:
        return 0  # Retourner 0 si la matrice de covariance n'est pas définie positive

    diff = df1 - df2
    return np.sqrt(np.dot(np.dot(diff, np.linalg.inv(cov_matrix)), diff.T))


In [35]:
def cosine_distance(df1, df2):
    return np.dot(df1, df2) / (np.linalg.norm(df1) * np.linalg.norm(df2))


In [36]:
def distance_intra(Class, type_de_distance='euclidienne'):

    if Class not in merged_df['Class'].unique():
        return "Class not found"
    if type_de_distance not in ['euclidienne', 'mahalanobis', 'cosinus']:
        return "Type de distance non reconnu"

    df_class = merged_df[merged_df['Class'] == Class].iloc[:, 1:]

    centre_de_la_classe = df_class.mean()

    if type_de_distance == 'euclidienne':
        dist_intra = df_class.apply(lambda x: euclidean_distance(x, centre_de_la_classe), axis=1)
    elif type_de_distance == 'mahalanobis':
        dist_intra = df_class.apply(lambda x: mahalanobis_distance(x, centre_de_la_classe), axis=1)
    elif type_de_distance == 'cosinus':
        dist_intra = df_class.apply(lambda x: cosine_distance(x, centre_de_la_classe), axis=1)

    max_intra = dist_intra.max()

    return max_intra


In [37]:
def distance_inter(Class1, Class2, type_de_distance='euclidienne'):

    if Class1 not in merged_df['Class'].unique() or Class2 not in merged_df['Class'].unique():
        return "Class not found"
    if type_de_distance not in ['euclidienne', 'mahalanobis', 'cosinus']:
        return "Type de distance non reconnu"

    df_class1 = merged_df[merged_df['Class'] == Class1].iloc[:, 1:]
    df_class2 = merged_df[merged_df['Class'] == Class2].iloc[:, 1:]

    centre_de_la_classe1 = df_class1.mean()
    centre_de_la_classe2 = df_class2.mean()

    if type_de_distance == 'euclidienne':
        dist_inter = euclidean_distance(centre_de_la_classe1, centre_de_la_classe2)
    elif type_de_distance == 'mahalanobis':
        dist_inter = mahalanobis_distance(centre_de_la_classe1, centre_de_la_classe2)
    elif type_de_distance == 'cosinus':
        dist_inter = cosine_distance(centre_de_la_classe1, centre_de_la_classe2)

    return dist_inter


In [40]:
def calculate_overlap(Class1, Class2, type_de_distance='euclidienne'):
    dist_intra1 = distance_intra(Class1, type_de_distance)
    dist_intra2 = distance_intra(Class2, type_de_distance)
    dist_inter = distance_inter(Class1, Class2, type_de_distance)

    if dist_inter == 0:
        overlap = np.nan  # Évitez la division par zéro
    else:
        overlap = (dist_intra1 + dist_intra2) / (2 * dist_inter)

    return dist_intra1, dist_inter, overlap


In [42]:
# Obtenir la liste unique de toutes les classes
classes = merged_df['Class'].unique()

# Exemple d'utilisation pour calculer l'overlap entre toutes les classes pour chaque type de distance
types_de_distance = ['euclidienne', 'mahalanobis', 'cosinus']

for distance_type in types_de_distance:
    print(f"Type de distance : {distance_type}")
    for i in range(len(classes)):
        for j in range(i + 1, len(classes)):
            class1 = classes[i]
            class2 = classes[j]
            dist_intra1, dist_inter, overlap_value = calculate_overlap(class1, class2, distance_type)
            print(f'Classes ({class1} et {class2})')
            print(f'Distance intra-classe : {dist_intra1}')
            print(f'Distance inter-classe : {dist_inter}')
            print(f'Overlap : {overlap_value}')
            print('-' * 30)


Type de distance : euclidienne
Classes (PRAD et LUAD)
Distance intra-classe : 239.5735886229157
Distance inter-classe : 189.2993080817103
Overlap : 1.3153543601019686
------------------------------
Classes (PRAD et BRCA)
Distance intra-classe : 239.5735886229157
Distance inter-classe : 169.03709953165458
Overlap : 1.468707767016454
------------------------------
Classes (PRAD et KIRC)
Distance intra-classe : 239.5735886229157
Distance inter-classe : 233.49057187997073
Overlap : 1.0914940588221385
------------------------------
Classes (PRAD et COAD)
Distance intra-classe : 239.5735886229157
Distance inter-classe : 225.6811423230833
Overlap : 1.0968992447257282
------------------------------
Classes (LUAD et BRCA)
Distance intra-classe : 258.41775187621124
Distance inter-classe : 142.87497834593188
Overlap : 1.8035921027319581
------------------------------
Classes (LUAD et KIRC)
Distance intra-classe : 258.41775187621124
Distance inter-classe : 197.57728310670484
Overlap : 1.3375811706