Notebook to give a quick introduction into the pairwise and higher order colocalization scores

In [None]:
import itertools

import numpy as np
import pandas as pd

from upsetplot import UpSet
from scipy.stats import spearmanr

from colocalization_measures.higher_order_similarity import calculate_higher_order_similarity

marker_names = ["CD37", "CD43", "CD44", "CD50", "CD54", "CD82", "CD102", "CD162", "CD11a", "CD18","mIgG1"]
path_data = "PXL00018/single_cells/code_paper/paper_data/S3"

Plotting function for the colocalization scores

In [None]:
def create_upset_plot(marker_names, similarity_measure_df):
    # create new dataframe for the upset plot as this requires a specific type of structure
    upset_plot_df = pd.DataFrame()

    # iterate through all maker names to get boolean values if they are part of the current combination
    for marker in marker_names:
        # true or false values for each marker combination
        true_false_values = [marker in set(combination_name.split("_")) for combination_name in similarity_measure_df.index.values.tolist()]
        
        # add boolean values to dataframe
        upset_plot_df[marker] = true_false_values

    # combine scores with a chosen metric and save as scores (required by UpSet plot)
    upset_plot_df["scores"] = similarity_measure_df.mean(axis=1).values

    # set index to combination names
    upset_plot_df = upset_plot_df.set_index(marker_names)

    # plot upset diagram
    upset_plot = UpSet(upset_plot_df, sum_over="scores", sort_categories_by="input")

    return upset_plot

Function for pairwise colocalization scores 

In [None]:
def create_colocalization_scores(marker_names, path_data):
    """_summary_

    :param marker_names: _description_
    :param path_data: _description_
    :return: _description_
    """

    # create new dataframe to keep track of similarity measures
    similarity_measure_df = pd.DataFrame()

    # iterate through the first 100 components in the current sample
    for comp_index, local_assort_score_df in enumerate(sorted(os.listdir(path_data))):
        local_assort_score_df = pd.read_csv(path_data + "/" + str(comp_index))
        names, comp_values = [], []
        
        # iterate through all combinations of the current length 
        # this picks all combinations of dataframe columns
        for combination_df_columns in itertools.combinations(marker_names, 2):

                # append values to marker values list
                loc_assort_marker_0 = local_assort_score_df[combination_df_columns[0]].fillna(0).values
                loc_assort_marker_1 = local_assort_score_df[combination_df_columns[1]].fillna(0).values

                # name all combinations accordingly 
                combination_names = str(combination_df_columns[0]) + "_" + combination_df_columns[1]
                        
                # calculate pairwise similarity score
                if np.mean(loc_assort_marker_0) != 0 and np.mean(loc_assort_marker_1) != 0:
                    new_similarity_measure = spearmanr(loc_assort_marker_0, loc_assort_marker_1)[0]
                else:
                    new_similarity_measure = 0
                
                # append names and scores to list
                names.append(combination_names)
                comp_values.append(new_similarity_measure)

        # add higher order component values to list
        similarity_measure_df["CMP_" + str(comp_index)] = comp_values
            
    # reset index to the combination of names
    similarity_measure_df["Marker_combinations"] = names
    similarity_measure_df = similarity_measure_df.set_index("Marker_combinations")

    return similarity_measure_df

Calculate pairwise colocalization scores 

In [None]:
# calculate pairwise colocalization scores
similarity_measure_df = create_colocalization_scores(marker_names, path_data)

# plot scores
create_upset_plot(similarity_measure_df).plot()

Function for higher order similarity colocalization

In [None]:
def create_higher_order_similarity_df(marker_names, path_data, order):
    """_summary_

    :param marker_names: _description_
    :param path_data: _description_
    :param order: _description_
    :return: _description_
    """

    # create new dataframe to keep track of similarity measures
    similarity_measure_df = pd.DataFrame()

    # iterate through the first 100 components in the current sample
    for comp_index, local_assort_score_df in enumerate(sorted(os.listdir(path_data))):
        local_assort_score_df = pd.read_csv(path_data + "/" + str(comp_index))
        names, comp_values = [], []
        
        # iterate through all combinations of the current length 
        # this picks all combinations of dataframe columns
        for combination_df_columns in itertools.combinations(marker_names, order):
                
                list_combination_columns = []
                combination_names = ""
                for column_name in combination_df_columns:

                    # append values to marker values list
                    values_column = local_assort_score_df[column_name].fillna(0).values
                    list_combination_columns.append(values_column)

                    # name all combinations accordingly 
                    combination_names += str(column_name) + "_"

                # remove last underscore
                combination_names = combination_names[:-1]
                        
                # calculate higher order similarity score
                new_similarity_measure = calculate_higher_order_similarity(list_combination_columns)
                
                # append names and scores to list
                names.append(combination_names)
                comp_values.append(new_similarity_measure)

        # add higher order component values to list
        similarity_measure_df["CMP_" + str(comp_index)] = comp_values

    # defragment df
    similarity_measure_df = similarity_measure_df.copy()
            
    # reset index to the combination of names
    similarity_measure_df["Marker_combinations"] = names
    similarity_measure_df = similarity_measure_df.set_index("Marker_combinations")

    return similarity_measure_df

Higher order similarity order 3

In [None]:
marker_names = ["CD37", "CD43", "CD44", "CD50", "CD54", "CD82", "CD102", "CD162", "CD11a", "CD18","mIgG1"]
path_data = "PXL00018/single_cells/code_paper/paper_data/S3"

# calculate pairwise colocalization scores
similarity_measure_df = create_higher_order_similarity_df(marker_names, path_data, 3)

# plot scores
create_upset_plot(similarity_measure_df).plot()

Higher order similarity order 4

In [None]:
marker_names = ["CD37", "CD43", "CD44", "CD50", "CD54", "CD82", "CD102", "CD162", "mIgG1"]
path_data = "PXL00018/single_cells/code_paper/paper_data/S3"

# calculate pairwise colocalization scores
similarity_measure_df = create_higher_order_similarity_df(marker_names, path_data, 4)

# plot scores
create_upset_plot(similarity_measure_df).plot()