# Esophageal Cancer Research - Association Rule Mining
* By Sangwon Baek
* Samsung Medical Center
* September 6th, 2023

### Import necessary packages and read data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.frequent_patterns import fpgrowth
from matplotlib.colors import LinearSegmentedColormap
from itertools import combinations

from statsmodels.graphics.mosaicplot import mosaic
import itertools
from ..src.utils.arm_utils import *

pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)
pd.options.mode.chained_assignment = None

In [2]:
df = pd.read_csv("../Data/Preprocessed/ECA_Dataset.csv").drop(columns="Unnamed: 0")

#Rename my columns
df.rename(columns={'pos_105/108/110': 'pos_105-108-110', 
                'total_105/108/110': 'total_105-108-110', 
                'pos_1/2/7':'pos_1-2-7', 'total_1/2/7':'total_1-2-7'}, inplace=True)

In [48]:
def N_categorize(x):
    if x == 0:
        return '0'
    elif 1 <= x <= 2:
        return '1'
    elif 3 <= x <= 6:
        return '2'
    else: # x > 7
        return '3'

# Listing the columns that start with "pos_" and "total_"
pos_columns = [col for col in df.columns if col.startswith("pos_")]
total_columns = [col for col in df.columns if col.startswith("total_")]
    
# Extract T, N, and M categories using regular expressions and then remove the prefixes
df['T_category'] = df['pTNM7_1'].str.extract('(TX|T0|Tis|T1a|T1b|T2|T3|T4a|T4b)').replace('T', '', regex=True)
df['N_category'] = df.total_pos_LN.apply(N_categorize)
df['M_category'] = df['pTNM7_1'].str.extract('(M0|M1)').replace('M', '', regex=True)

#Create the df with the whole dataset
Whole_df = df.copy() 

#Create the subgroup dfs for the subgroup analysis
Whole_upper_df = df.loc[df.Primary_Site=='upper']
Whole_mid_df = df.loc[df.Primary_Site=='mid']
Whole_lower_df = df.loc[df.Primary_Site=='lower']

# Filtering out 'is' category and creating the two groups
T1_df = df[df['T_category'].isin(['1a', '1b'])]
T24_df = df[df['T_category'].isin(['2', '3', '4a', '4b'])]
T2_df = df[df['T_category'].isin(['2'])]
T3_df = df[df['T_category'].isin(['3'])]
T4_df = df[df['T_category'].isin(['4a', '4b'])]

#Create the subgroup dfs for the subgroup analysis
T1_upper_df = T1_df.loc[T1_df.Primary_Site=='upper']
T1_mid_df = T1_df.loc[T1_df.Primary_Site=='mid']
T1_lower_df = T1_df.loc[T1_df.Primary_Site=='lower']

T24_upper_df = T24_df.loc[T24_df.Primary_Site=='upper']
T24_mid_df = T24_df.loc[T24_df.Primary_Site=='mid']
T24_lower_df = T24_df.loc[T24_df.Primary_Site=='lower']

T2_upper_df = T2_df.loc[T2_df.Primary_Site=='upper']
T2_mid_df = T2_df.loc[T2_df.Primary_Site=='mid']
T2_lower_df = T2_df.loc[T2_df.Primary_Site=='lower']

T3_upper_df = T3_df.loc[T3_df.Primary_Site=='upper']
T3_mid_df = T3_df.loc[T3_df.Primary_Site=='mid']
T3_lower_df = T3_df.loc[T3_df.Primary_Site=='lower']

T4_upper_df = T4_df.loc[T4_df.Primary_Site=='upper']
T4_mid_df = T4_df.loc[T4_df.Primary_Site=='mid']
T4_lower_df = T4_df.loc[T4_df.Primary_Site=='lower']

In [49]:
#Describe the datsets 
dataset_descriptors = ["Whole", "Whole_upper", "Whole_mid", "Whole_lower", 
                       "T1", "T1_upper", "T1_mid", "T1_lower", 
                       "T24", "T24_upper", "T24_mid", "T24_lower",
                       "T2", "T2_upper", "T2_mid", "T2_lower",
                       "T3", "T3_upper", "T3_mid", "T3_lower",
                       "T4", "T4_upper", "T4_mid", "T4_lower"]

#Define the numbers and N_categories I want to test 
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 44]
N_categories = ['0','1','2','3']

#Predefined order of the lymphnodes
predefined_order = ['101R', '101L', '102R', '102L', '104R', '104L', '106preR', '106preL', '106recR', 
                    '106recL', '107', '105-108-110', '112pulR', '112pulL', '1-2-7', '8', '9']

# Use the function
dfs = [
    ("Whole", Whole_df), 
    ("Whole Upper", Whole_upper_df), 
    ("Whole Mid", Whole_mid_df), 
    ("Whole Lower", Whole_lower_df), 
    ("T1", T1_df), 
    ("T1 upper", T1_upper_df), 
    ("T1 mid", T1_mid_df), 
    ("T1 lower", T1_lower_df), 
    ("T2", T2_df), 
    ("T2 upper", T2_upper_df), 
    ("T2 mid", T2_mid_df), 
    ("T2 lower", T2_lower_df), 
    ("T3", T3_df), 
    ("T3 upper", T3_upper_df), 
    ("T3 mid", T3_mid_df), 
    ("T3 lower", T3_lower_df), 
    ("T4", T4_df), 
    ("T4 upper", T4_upper_df), 
    ("T4 mid", T4_mid_df), 
    ("T4 lower", T4_lower_df), 
    ("T24", T24_df), 
    ("T24 upper", T24_upper_df),
    ("T24 mid", T24_mid_df),
    ("T24 lower", T24_lower_df)
]

In [None]:
result_df = patient_counts(*dfs)
result_df.to_csv("../Results/data_lengths.csv")
result_df

In [None]:
result2_df = patient_counts_by_category(*dfs)
result2_df.to_csv("../Results/N_category_lengths.csv")
result2_df

### Association Rule Mining PS

In [None]:
for descriptor in dataset_descriptors:
    for num in numbers:
        label = 'all' if num == 44 else str(num)
        globals()[f"associationRule_PS_{label}_{descriptor}"] = association_rule_mining_PS(globals()[f"{descriptor}_df"], num, None, 0.001, 0.001, 1)
        globals()[f"associationRule_PS_2_{label}_{descriptor}"] = association_rule_mining_PS(globals()[f"{descriptor}_df"], num, None, 0.001, 0.001, 2)

In [None]:
#Get association rule based on N_Category
for descriptor in dataset_descriptors:
    for category in N_categories:
        label = f'N{category}'
        globals()[f"associationRule_PS_{label}_{descriptor}"] = association_rule_mining_PS(globals()[f"{descriptor}_df"], None, category, 0.001, 0.001, 1)
        globals()[f"associationRule_PS_2_{label}_{descriptor}"] = association_rule_mining_PS(globals()[f"{descriptor}_df"], None, category, 0.001, 0.001, 2)

In [57]:
# 1. Combine datasets using loops and dictionaries
sections_PS = ['Whole', 'T1', 'T24', 'T2', 'T3', 'T4']
subsections_PS = ['', '_upper', '_mid', '_lower']

combined_data_PS = {}
combined_data_PS_N = {}

for section in sections_PS:
    for sub in subsections_PS:
        key = f"combined_PS_{section}{sub}"
        combined_data_PS[key] = combine_association_rules_PS(f"{section}{sub}")
        combined_data_PS_N[key] = combine_AR_PS_N_category(f"{section}{sub}")

# 2. Create the association_rules_PS_dfs list from the dictionary
association_rules_PS_dfs = [(key.replace("combined_PS_", ""), combined_data_PS[key]) for key in combined_data_PS]
association_rules_PS_N_dfs = [(key.replace("combined_PS_", ""), combined_data_PS_N[key]) for key in combined_data_PS_N]

# Save the dataframes to separate sheets in a single Excel file
save_to_excel('../Results/association_rules_PS.xlsx', association_rules_PS_dfs)
save_to_excel('../Results/association_rules_PS_Ncategory.xlsx', association_rules_PS_N_dfs)

In [58]:
combined_data_PS_2 = {}
combined_data_PS_2_N = {}

for section in sections_PS:
    for sub in subsections_PS:
        key = f"combined_PS_{section}{sub}"
        combined_data_PS_2[key] = combine_association_rules_PS_2(f"{section}{sub}")
        combined_data_PS_2_N[key] = combine_AR_PS_N_category_2(f"{section}{sub}")

# 2. Create the association_rules_PS_dfs list from the dictionary
association_rules_PS_2_dfs = [(key.replace("combined_PS_", ""), combined_data_PS_2[key]) for key in combined_data_PS_2]
association_rules_PS_2_N_dfs = [(key.replace("combined_PS_", ""), combined_data_PS_2_N[key]) for key in combined_data_PS_2_N]

# Save the dataframes to separate sheets in a single Excel file
save_to_excel('../Results/association_rules_PS_2.xlsx', association_rules_PS_2_dfs)
save_to_excel('../Results/association_rules_PS_Ncategory_2.xlsx', association_rules_PS_2_N_dfs)

### Modify the existing function from the mlxtend library to serve my purpose

### Association Rule Mining TN

In [None]:
#Get association rule based on Counts
for descriptor in dataset_descriptors:
    for num in numbers:
        label = 'all' if num == 44 else str(num)
        globals()[f"associationRule_TN_{label}_{descriptor}"] = association_rule_mining_TN(globals()[f"{descriptor}_df"], num, None, 0.01, 0.001, 1)
        globals()[f"associationRule_TN_2_{label}_{descriptor}"] = association_rule_mining_TN(globals()[f"{descriptor}_df"], num, None, 0.015, 0.01, 2)

In [None]:
np.seterr(divide='ignore', invalid='ignore')

#Get association rule based on N_Category
for descriptor in dataset_descriptors:
    for category in N_categories:
        label = f'N{category}'
        globals()[f"associationRule_TN_{label}_{descriptor}"] = association_rule_mining_TN(globals()[f"{descriptor}_df"], None, category, 0.01, 0.001, 1)
        globals()[f"associationRule_TN_2_{label}_{descriptor}"] = association_rule_mining_TN(globals()[f"{descriptor}_df"], None, category, 0.01, 0.01, 2)

In [63]:
# 1. Combine datasets using loops and dictionaries
sections = ['Whole', 'T1', 'T24', 'T2', 'T3', 'T4']
subsections = ['', '_upper', '_mid', '_lower']

combined_data = {}
combined_data_N = {}
for section in sections:
    for sub in subsections:
        key = f"combined_{section}{sub}"
        combined_data[key] = combine_association_rules(f"{section}{sub}")
        combined_data_N[key] = combine_AR_TN_N_Category(f"{section}{sub}")

# 2. Create the association_rules_dfs list from the dictionary
association_rules_TN_dfs = [(key.replace("combined_", ""), combined_data[key]) for key in combined_data]
association_rules_TN_N_dfs = [(key.replace("combined_", ""), combined_data_N[key]) for key in combined_data_N]

# 4. Save the dataframes to separate sheets in a single Excel file
save_to_excel('../Results/association_rules_TN.xlsx', association_rules_TN_dfs)
save_to_excel('../Results/association_rules_TN_Ncategory.xlsx', association_rules_TN_N_dfs)

In [64]:
combined_data_2 = {}
combined_data_2_N = {}
for section in sections:
    for sub in subsections:
        key = f"combined_{section}{sub}"
        combined_data_2[key] = combine_association_rules_2(f"{section}{sub}")
        combined_data_2_N[key] = combine_AR_TN_N_Category_2(f"{section}{sub}")

# 2. Create the association_rules_dfs list from the dictionary
association_rules_TN_2_dfs = [(key.replace("combined_", ""), combined_data_2[key]) for key in combined_data_2]
association_rules_TN_2_N_dfs = [(key.replace("combined_", ""), combined_data_2_N[key]) for key in combined_data_2_N]

# 4. Save the dataframes to separate sheets in a single Excel file
save_to_excel('../Results/association_rules_TN_2.xlsx', association_rules_TN_2_dfs)
save_to_excel('../Results/association_rules_TN_Ncategory_2.xlsx', association_rules_TN_2_N_dfs)

#### 2D Heatmap for PS LN 

In [68]:
#Get edges frequencies based on Numbers
for descriptor in dataset_descriptors:
    for num in numbers:
        label = 'all' if num == 44 else str(num)
        # Create the global variable name
        var_name = f"EF_PS_{label}_{descriptor}"
        # Create the dataframe variable name
        df_name = f"{descriptor}_df"
        # Assign the result to the global variable
        globals()[var_name] = get_frequency_PS_LN(globals()[df_name], num, None)
        
#Get edges frequency based on N categories
for descriptor in dataset_descriptors:
    for category in N_categories:
        label = f'N{category}'
        # Create the global variable name
        var_name = f"EF_PS_{label}_{descriptor}"
        # Create the dataframe variable name
        df_name = f"{descriptor}_df"
        # Assign the result to the global variable
        globals()[var_name] = get_frequency_PS_LN(globals()[df_name], None, category)
        
transformed_df_T1 = transform_for_heatmap('T1')
transformed_df_T24 = transform_for_heatmap('T24')

In [69]:
plot_transformed_heatmap(transformed_df_T1, 'T1')

In [70]:
plot_transformed_heatmap(transformed_df_T24, 'T24')

In [71]:
# Create the transformed DataFrames for T1_upper, T1_mid, and T1_lower
transformed_df_T1_upper = transform_for_heatmap('T1_upper')
transformed_df_T1_mid = transform_for_heatmap('T1_mid')
transformed_df_T1_lower = transform_for_heatmap('T1_lower')

# Create the transformed DataFrames for T24_upper, T24_mid, and T24_lower
transformed_df_T24_upper = transform_for_heatmap('T24_upper')
transformed_df_T24_mid = transform_for_heatmap('T24_mid')
transformed_df_T24_lower = transform_for_heatmap('T24_lower')

transformed_dfs = [transformed_df_T1_upper, transformed_df_T24_upper, transformed_df_T1_mid, transformed_df_T24_mid, transformed_df_T1_lower, transformed_df_T24_lower]
transformed_descriptors = ['T1_upper', 'T24_upper', 'T1_mid', 'T24_mid', 'T1_lower', 'T24_lower']

In [None]:
plot_multiple_transformed_heatmaps(transformed_dfs, transformed_descriptors, 'lower_confidence')

#### 2D Heat map for Hub Nodes

In [76]:
#Get edges frequencies based on Numbers
for descriptor in dataset_descriptors:
    for num in numbers:
        label = 'all' if num == 44 else str(num)
        # Create the global variable name
        var_name = f"EF_TN_{label}_{descriptor}"
        # Create the dataframe variable name
        df_name = f"{descriptor}_df"
        # Assign the result to the global variable
        globals()[var_name] = get_frequency_TN(globals()[df_name], num, None)
        
#Get edges frequency based on N categories
for descriptor in dataset_descriptors:
    for category in N_categories:
        label = f'N{category}'
        # Create the global variable name
        var_name = f"EF_TN_{label}_{descriptor}"
        # Create the dataframe variable name
        df_name = f"{descriptor}_df"
        # Assign the result to the global variable
        globals()[var_name] = get_frequency_TN(globals()[df_name], None, category)

In [None]:
transformed_TN_df_T1_all, transformed_TN_df_T24_all = transform_for_heatmap_TN('T1', 'T24', 'all', 'support')

In [None]:
#Support 
transformed_TN_df_T1_upper_all_Support, transformed_TN_df_T24_upper_all_Support = transform_for_heatmap_TN('T1_upper', 'T24_upper', 'all', 'support')
transformed_TN_df_T1_mid_all_Support, transformed_TN_df_T24_mid_all_Support = transform_for_heatmap_TN('T1_mid', 'T24_mid', 'all', 'support')
transformed_TN_df_T1_lower_all_Support, transformed_TN_df_T24_lower_all_Support = transform_for_heatmap_TN('T1_lower', 'T24_lower', 'all', 'support')

#Net Confidence
transformed_TN_df_T1_upper_all_NC, transformed_TN_df_T24_upper_all_NC = transform_for_heatmap_TN('T1_upper', 'T24_upper', 'all', 'net confidence')
transformed_TN_df_T1_mid_all_NC, transformed_TN_df_T24_mid_all_NC = transform_for_heatmap_TN('T1_mid', 'T24_mid', 'all', 'net confidence')
transformed_TN_df_T1_lower_all_NC, transformed_TN_df_T24_lower_all_NC = transform_for_heatmap_TN('T1_lower', 'T24_lower', 'all', 'net confidence')

#Attributable Confidence
transformed_TN_df_T1_upper_all_AC, transformed_TN_df_T24_upper_all_AC = transform_for_heatmap_TN('T1_upper', 'T24_upper', 'all', 'attributable confidence')
transformed_TN_df_T1_mid_all_AC, transformed_TN_df_T24_mid_all_AC = transform_for_heatmap_TN('T1_mid', 'T24_mid', 'all', 'attributable confidence')
transformed_TN_df_T1_lower_all_AC, transformed_TN_df_T24_lower_all_AC = transform_for_heatmap_TN('T1_lower', 'T24_lower', 'all', 'attributable confidence')

#Zhangs Metric
transformed_TN_df_T1_upper_all_ZM, transformed_TN_df_T24_upper_all_ZM = transform_for_heatmap_TN('T1_upper', 'T24_upper', 'all', 'zhangs_metric')
transformed_TN_df_T1_mid_all_ZM, transformed_TN_df_T24_mid_all_ZM = transform_for_heatmap_TN('T1_mid', 'T24_mid', 'all', 'zhangs_metric')
transformed_TN_df_T1_lower_all_ZM, transformed_TN_df_T24_lower_all_ZM = transform_for_heatmap_TN('T1_lower', 'T24_lower', 'all', 'zhangs_metric')

#Conviction
transformed_TN_df_T1_upper_all_Conviction, transformed_TN_df_T24_upper_all_Conviction = transform_for_heatmap_TN('T1_upper', 'T24_upper', 'all', 'conviction')
transformed_TN_df_T1_mid_all_Conviction, transformed_TN_df_T24_mid_all_Conviction = transform_for_heatmap_TN('T1_mid', 'T24_mid', 'all', 'conviction')
transformed_TN_df_T1_lower_all_Conviction, transformed_TN_df_T24_lower_all_Conviction = transform_for_heatmap_TN('T1_lower', 'T24_lower', 'all', 'conviction')

#Certain Factor
transformed_TN_df_T1_upper_all_CF, transformed_TN_df_T24_upper_all_CF = transform_for_heatmap_TN('T1_upper', 'T24_upper', 'all', 'certain factor')
transformed_TN_df_T1_mid_all_CF, transformed_TN_df_T24_mid_all_CF = transform_for_heatmap_TN('T1_mid', 'T24_mid', 'all', 'certain factor')
transformed_TN_df_T1_lower_all_CF, transformed_TN_df_T24_lower_all_CF = transform_for_heatmap_TN('T1_lower', 'T24_lower', 'all', 'certain factor')

#Interestingness
transformed_TN_df_T1_upper_all_Interestingness, transformed_TN_df_T24_upper_all_Interestingness = transform_for_heatmap_TN('T1_upper', 'T24_upper', 'all', 'interestingness')
transformed_TN_df_T1_mid_all_Interestingness, transformed_TN_df_T24_mid_all_Interestingness = transform_for_heatmap_TN('T1_mid', 'T24_mid', 'all', 'interestingness')
transformed_TN_df_T1_lower_all_Interestingness, transformed_TN_df_T24_lower_all_Interestingness = transform_for_heatmap_TN('T1_lower', 'T24_lower', 'all', 'interestingness')

In [79]:
transformed_support_dfs = [transformed_TN_df_T1_upper_all_Support, transformed_TN_df_T24_upper_all_Support, transformed_TN_df_T1_mid_all_Support, transformed_TN_df_T24_mid_all_Support, transformed_TN_df_T1_lower_all_Support, transformed_TN_df_T24_lower_all_Support]
transformed_ZM_dfs = [transformed_TN_df_T1_upper_all_ZM, transformed_TN_df_T24_upper_all_ZM, transformed_TN_df_T1_mid_all_ZM, transformed_TN_df_T24_mid_all_ZM, transformed_TN_df_T1_lower_all_ZM, transformed_TN_df_T24_lower_all_ZM]
transformed_Interestingness_dfs = [transformed_TN_df_T1_upper_all_Interestingness, transformed_TN_df_T24_upper_all_Interestingness, transformed_TN_df_T1_mid_all_Interestingness, transformed_TN_df_T24_mid_all_Interestingness, transformed_TN_df_T1_lower_all_Interestingness, transformed_TN_df_T24_lower_all_Interestingness]

In [None]:
plot_multiple_transformed_heatmaps_TN(transformed_support_dfs,  transformed_descriptors, 'Support', 'support')

In [None]:
# Create the multiple heatmaps (Zhangs Metric) 
plot_multiple_transformed_heatmaps_TN(transformed_ZM_dfs, transformed_descriptors, 'Zhangs_Metric', 'zhangs_metric')

In [None]:
# Create the multiple heatmaps (interestingness) 
plot_multiple_transformed_heatmaps_TN(transformed_Interestingness_dfs, transformed_descriptors, 'Interestingness', 'interestingness')

#### Network of Zhang's metric and Support values 

In [113]:
#Custom Positions of the Lymphnodes (Referring to diagram) 
#ON = other neck; OM=other medial; OA=other abdominal; REG=regional
Lymphnode_Positions = {
    'upper': (0, 3),
    'mid': (0, 0),
    'lower': (0, -3),
    "neckLN": (0, 3),
    "mediaLN": (0, 0),
    "abdoLN": (0, -3),
    '101R': (-0.2, 4),
    '101L': (0.2, 4),
    '102R': (-0.4, 4.3),
    '102L': (0.4, 4.3),
    '104R': (-0.6, 4),
    '104L': (0.6, 4),
    '106recR': (-0.3, 3),
    '106recL': (0.3, 3),
    '106preR': (-0.05, 2.5),
    '106preL': (0.5, 2.5),
    '107': (0.1, 1.5), 
    '105-108-110': (-0.4, 0.5), #*105
    '112pulR': (0, -0.3),
    '112pulL': (0.3, -0.3),
    '1-2-7': (0.4, -2.5),
    '8': (-0.4, -2.5),
    '9': (0, -3)
}

Label_Positions = {
    'upper': (0, 2.9),
    'mid': (0, -0.1),
    'lower': (0, -3.1),
    "neckLN": (0, 2.9),
    "mediaLN": (0, -0.1),
    "abdoLN": (0, -3.1),
    '101R': (-0.2, 3.9),
    '101L': (0.2, 3.9),
    '102R': (-0.4, 4.2),
    '102L': (0.4, 4.2),
    '104R': (-0.6, 3.9),
    '104L': (0.6, 3.9),
    '106recR': (-0.3, 2.9),
    '106recL': (0.3, 2.9),
    '106preR': (-0.05, 2.4),
    '106preL': (0.5, 2.4),
    '107': (0.1, 1.4), 
    '105-108-110': (-0.4, 0.4), #*105
    '112pulR': (0, -0.4),
    '112pulL': (0.3, -0.4),
    '1-2-7': (0.4, -2.6),
    '8': (-0.4, -2.6),
    '9': (0, -3.1)
}

#Create the subgroup dfs for the subgroup analysis
Whole_upper_df = df.loc[df.Primary_Site=='upper']
Whole_mid_df = df.loc[df.Primary_Site=='mid']
Whole_lower_df = df.loc[df.Primary_Site=='lower']

In [None]:
#Zhangs Metric
transformed_TN_df_Whole_upper_all_ZM = transform_for_heatmap_TN_Whole('Whole_upper', 'all', 'zhangs_metric')
transformed_TN_df_Whole_mid_all_ZM = transform_for_heatmap_TN_Whole('Whole_mid', 'all', 'zhangs_metric')
transformed_TN_df_Whole_lower_all_ZM = transform_for_heatmap_TN_Whole('Whole_lower', 'all', 'zhangs_metric')

In [122]:
df_106recR_Upper = transformed_TN_df_Whole_upper_all_ZM.loc[transformed_TN_df_Whole_upper_all_ZM['HubNodes'] == '106recR']
df_106recL_Upper = transformed_TN_df_Whole_upper_all_ZM.loc[transformed_TN_df_Whole_upper_all_ZM['HubNodes'] == '106recL']
df_106recR_Mid = transformed_TN_df_Whole_mid_all_ZM.loc[transformed_TN_df_Whole_mid_all_ZM['HubNodes'] == '106recR']
df_106recL_Mid = transformed_TN_df_Whole_mid_all_ZM.loc[transformed_TN_df_Whole_mid_all_ZM['HubNodes'] == '106recL']
df_106recR_Lower = transformed_TN_df_Whole_lower_all_ZM.loc[transformed_TN_df_Whole_lower_all_ZM['HubNodes'] == '106recR']

In [None]:
# Create a dictionary of dataframes with the desired column names
dfs = {
    '106recR_Upper': df_106recR_Upper[["HubNodes", "LymphNode", "zhangs_metric"]],
    '106recL_Upper': df_106recL_Upper[["HubNodes", "LymphNode", "zhangs_metric"]],
    '106recR_Mid': df_106recR_Mid[["HubNodes", "LymphNode", "zhangs_metric"]],
    '106recL_Mid': df_106recL_Mid[["HubNodes", "LymphNode", "zhangs_metric"]]
}

# Combine dataframes horizontally with multi-level columns
combined_df = pd.concat(dfs, axis=1)

# Display the combined dataframe
combined_df.to_csv("../Results/figure5_table.csv")
combined_df

In [None]:
visualize_Network_TN_whole(df_106recR_Upper, '106recR', Lymphnode_Positions)

In [None]:
visualize_Network_TN_whole(df_106recL_Upper, '106recL', Lymphnode_Positions)

In [None]:
visualize_Network_TN_whole(df_106recR_Mid, '106recR', Lymphnode_Positions)

In [None]:
visualize_Network_TN_whole(df_106recL_Mid, '106recL', Lymphnode_Positions)

In [None]:
visualize_Network_TN_whole(df_106recR_Lower, '106recR', Lymphnode_Positions)

In [None]:
hub_nodes = ['106recR', '106recL']
descriptors = ['Upper', 'Mid']
transformed_dataframes = {
    'df_106recR_Upper': df_106recR_Upper,
    'df_106recR_Mid': df_106recR_Mid,
    'df_106recL_Upper': df_106recL_Upper,
    'df_106recL_Mid': df_106recL_Mid,
}

plot_TN_specific_networks_2x2(hub_nodes, descriptors, Lymphnode_Positions, transformed_dataframes)

In [96]:
transformed_TN_df_Whole_upper_all_ZM.to_csv('abc.csv')

### Plot multiple 2D Heatmaps with different confidence

In [98]:
transformed_TN_df_T1_upper_C_all, transformed_TN_df_T24_upper_C_all = transform_for_heatmap_TN('T1_upper', 'T24_upper', 'all', 'Confidence', 4)
transformed_TN_df_T1_mid_C_all, transformed_TN_df_T24_mid_C_all = transform_for_heatmap_TN('T1_mid', 'T24_mid', 'all', 'Confidence', 5)
transformed_TN_df_T1_lower_C_all, transformed_TN_df_T24_lower_C_all = transform_for_heatmap_TN('T1_lower', 'T24_lower', 'all', 'Confidence', 4)

transformed_TN_df_T1_upper_O_C_all, transformed_TN_df_T24_upper_O_C_all = transform_for_heatmap_opposite_TN('T1_upper', 'T24_upper', 'all', 'Confidence', 4)
transformed_TN_df_T1_mid_O_C_all, transformed_TN_df_T24_mid_O_C_all = transform_for_heatmap_opposite_TN('T1_mid', 'T24_mid', 'all', 'Confidence', 5)
transformed_TN_df_T1_lower_O_C_all, transformed_TN_df_T24_lower_O_C_all = transform_for_heatmap_opposite_TN('T1_lower', 'T24_lower', 'all', 'Confidence', 4)

In [None]:
# Create the multiple heatmaps (Difference in Confidence) 
plot_multiple_transformed_heatmaps_TN_confidence([transformed_TN_df_T1_upper_C_all, transformed_TN_df_T24_upper_C_all], [transformed_TN_df_T1_upper_O_C_all, transformed_TN_df_T24_upper_O_C_all], ['T1_upper', 'T24_upper'], 'upper_Confidence')

In [None]:
# Create the multiple heatmaps (Difference in Confidence) 
plot_multiple_transformed_heatmaps_TN_confidence([transformed_TN_df_T1_mid_C_all, transformed_TN_df_T24_mid_C_all], [transformed_TN_df_T1_mid_O_C_all, transformed_TN_df_T24_mid_O_C_all], ['T1_mid', 'T24_mid'], 'mid_Confidence')

In [None]:
# Create the multiple heatmaps (Difference in Confidence) 
plot_multiple_transformed_heatmaps_TN_confidence([transformed_TN_df_T1_lower_C_all, transformed_TN_df_T24_lower_C_all], [transformed_TN_df_T1_lower_O_C_all, transformed_TN_df_T24_lower_O_C_all], ['T1_lower', 'T24_lower'], 'lower_Confidence')