# Load the stLearn results

In [None]:
import pandas as pd
import stlearn as st
import pickle
import scanpy as sc
import random
import math
import seaborn as sns
from pyvis.network import Network
from bokeh.io import output_notebook
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu

In [None]:
samples = {
    'visium_scc': {
        'B18_SCC': 'B18_SCC_stlearn.pkl',
        'E15_SCC': 'E15_SCC_stlearn.pkl',
        'F21_SCC': 'F21_SCC_stlearn.pkl',
        'P13_SCC': 'P13_SCC_stlearn.pkl',
        'P30_SCC': 'P30_SCC_stlearn.pkl'
    },
    'visium_bcc': {
        'B18_BCC': 'B18_BCC_stlearn.pkl',
        'E15_BCC': 'E15_BCC_stlearn.pkl',
        'F21_BCC': 'F21_BCC_stlearn.pkl'
    },
    'visium_mela': {
        '6767_Mel': '6767_Mel_stlearn.pkl',
        '21031_Mel': '21031_Mel_stlearn.pkl',
        '48974_Mel': '48974_Mel_stlearn.pkl',
        '66487_Mel': '66487_Mel_stlearn.pkl'
    }
}

data_dir = "/QRISdata/Q2051/SCC_Paper/resources/data/Data_For_Github_Codes/Onkar_Levi/SkinCancerAtlas/cci_results/"
for tech, sample in samples.items():
    for sample_name, file_name in samples[tech].items():
        with open(data_dir + file_name, 'rb') as f:
            samples[tech][sample_name] = pickle.load(f)
anndata = list({key: list(samples.values()) for key, samples in samples.items()}.values())
anndata = [item for sublist in anndata for item in sublist]

In [None]:
anndata[0].obs["library_id"] = "B18_SCC"
anndata[1].obs["library_id"] = "E15_SCC"
anndata[2].obs["library_id"] = "F21_SCC"
anndata[3].obs["library_id"] = "P13_SCC"
anndata[4].obs["library_id"] = "P30_SCC"
anndata[5].obs["library_id"] = "B18_BCC"
anndata[6].obs["library_id"] = "E15_BCC"
anndata[7].obs["library_id"] = "F21_BCC"
anndata[8].obs["library_id"] = "6767_Mel"
anndata[9].obs["library_id"] = "21031_Mel"
anndata[10].obs["library_id"] = "48974_Mel"
anndata[11].obs["library_id"] = "66487_Mel"

keys = ["B18_SCC","E15_SCC","F21_SCC","P13_SCC","P30_SCC", 
        "B18_BCC","E15_BCC","F21_BCC", 
        "6767_Mel","21031_Mel","48974_Mel","66487_Mel"]
combat_adata = anndata[0].concatenate(anndata[1:],batch_key="library_id", 
                                               uns_merge="unique", batch_categories=[keys][0])  
combat_adata.obs["cancer_type"] = combat_adata.obs["library_id"].str.split("_").str[1]

# Load Melanoma up-reg LRs

In [None]:
Mel_upreg_LRs = list(set(["COL1A1","DDR2","COL1A1","ITGB1","COL1A1","CD44","COL1A1","CD36",     
                   "COL1A1","ITGA5","COL1A1","ITGA2","COL1A1","DDR1","COL3A1","DDR2",
                   "COL3A1","DDR1","COL1A2","ITGB1","COL1A2","ITGA2","COL1A2","CD44",
                   "CCL19","CXCR3","CCL19","CCRL2","CCL5","ACKR1","AZGP1","ITGAV",
                   "CCL5","CCR1",  "FGF18","FGFR1","FGF2","FGFR1","VCAN","CD44", 
                   "FGF1","FGFR1", "FGF9","FGFR1","LTB","CD40","IL34","CSF1R",
                   "ICAM3","ITGB2","FGF1","CD44", "LTB","LTBR","IL15","IL2RG",
                   "EGF","ERBB3","ICAM2","ITGB2","TSLP","IL7R","FGF2","CD44", 
                   "ADM2","RAMP3","IL15","IL15RA","CLCF1","LIFR","PDCD1LG2","PDCD1",  
                   "TNFSF14","TNFRSF14"]))

Mel_upreg_genes = pd.read_csv("/QRISdata/Q2051/SCC_Paper/resources/data/Data_For_Github_Codes/Onkar_Levi/mel_vs_norm_genes_Mel_Upregulated.txt", 
                              sep="\t")
Mel_upreg_genes = list(set(list(Mel_upreg_genes[('Xen_N Vs X_Mel')])+list(Mel_upreg_genes['scRNA Melanoma Vs Normal melanocytes'])))
Mel_upreg_genes = [x for x in Mel_upreg_genes if not (isinstance(x, float) and math.isnan(x))]
Mel_upreg_genes = [x for x in Mel_upreg_genes if x not in ['CCL19', 'CXCR3']]

In [None]:
sc.tl.score_genes(combat_adata, Mel_upreg_genes)

# Separate data for each category
group1 = combat_adata.obs[combat_adata.obs["cancer_type"] == "BCC"]["score"]
group2 = combat_adata.obs[combat_adata.obs["cancer_type"] == "SCC"]["score"]
group3 = combat_adata.obs[combat_adata.obs["cancer_type"] == "Mel"]["score"]

# Compare Category 3 with 1 and 2
stat1, pval1 = mannwhitneyu(group3, group1, alternative="greater")
stat2, pval2 = mannwhitneyu(group3, group2, alternative="greater")

print(f"Category 3 vs 1: p-value = {pval1}")
print(f"Category 3 vs 2: p-value = {pval2}")

In [None]:
# Plotting the violin plot
sns.violinplot(x="cancer_type", y="score", data=combat_adata.obs, inner="point", palette="Set2")

# Annotate with p-values
plt.text(2, max(combat_adata.obs["score"]), f"p(Mel vs Scc) = {pval1:.3f}", ha="center", fontsize=10, color="blue")
plt.text(2, max(combat_adata.obs["score"]) - 0.5, f"p(Mel vs Bcc) = {pval2:.3f}", ha="center", fontsize=10, color="blue")

# Customize plot
plt.title("Violin Plot with Wilcoxon Rank-Sum Test p-values")
plt.xlabel("Category")
plt.ylabel("Value")
plt.show()

# Fisher's exact test

In [None]:
drug_db = pd.read_csv("/QRISdata/Q2051/SCC_Paper/resources/data/Data_For_Github_Codes/Onkar_Levi/Onkar_DrugDb/DrugBank_5112_and_Drug_Repurpose_Hub.csv",index_col=0)
TxGNN = pd.read_csv("/QRISdata/Q2051/SCC_Paper/resources/data/Data_For_Github_Codes/Onkar_Levi/Onkar_DrugDb/SkinCancer_TxGNN.csv",index_col=0)
Hetionet = pd.read_csv("/QRISdata/Q2051/SCC_Paper/resources/data/Data_For_Github_Codes/Onkar_Levi/Onkar_DrugDb/SkinCancer_Hetionet.csv",index_col=0)

drug_db = drug_db[drug_db["Indication"].str.contains("melanoma", case=False, na=False)]
TxGNN = TxGNN[TxGNN["Indication"].str.contains("melanoma", case=False, na=False)]
Hetionet = Hetionet[Hetionet["Indication"].str.contains("melanoma", case=False, na=False)]

In [None]:
mela_drug_genes = list(drug_db["Target_Gene"].unique())+list(TxGNN["Target_Gene"].unique())+list(Hetionet["Target_Gene"].unique())
filtered_list = [item for item in mela_drug_genes if item and not (isinstance(item, float) and math.isnan(item))]
separated_values = [item.strip() for sublist in filtered_list for item in sublist.split(',')]
druggable = set(separated_values)

In [None]:
connectomeDB = pd.read_csv("/QRISdata/Q2051/SCC_Paper/resources/data/Data_For_Github_Codes/Onkar_Levi/Onkar_DrugDb/connectomedb2020.txt",sep="\t",encoding='unicode_escape')
All_LRs = set(list(set(connectomeDB["Ligand gene symbol"]))+list(set(connectomeDB["Receptor gene symbol"])))
no_druggable = All_LRs.difference(druggable)
mela_non_up = All_LRs.difference(set(Mel_upreg_genes))

druggable_mela_up = druggable.intersection(set(Mel_upreg_genes))
druggable_mela_non_up = mela_non_up.intersection(druggable)
no_druggable_mel_up = set(Mel_upreg_genes).intersection(no_druggable)
no_druggable_mel_no_up = no_druggable.intersection(mela_non_up)

In [None]:
# Values for the table
data = [[len(druggable_mela_up),len(no_druggable_mel_up)],
         [len(druggable_mela_non_up),len(no_druggable_mel_no_up)]]  
contingency_table = pd.DataFrame(data, columns=["Druggable", "Not-Drugaable"], index=["Mel-Up", "Mel-Const"])

In [None]:
print(contingency_table)

In [None]:
from scipy.stats import fisher_exact
odds_ratio, p_value = fisher_exact(contingency_table,alternative='greater')
print(f"Odds Ratio: {odds_ratio}")
print(f"p-value: {p_value}")

In [None]:
druggable_mela_up

## Plotting FGF2 and CD44 on Knowledge Graph

In [None]:
hetionet_complete = pd.read_csv("/QRISdata/Q2051/SCC_Paper/resources/data/Data_For_Github_Codes/Onkar_Levi/Onkar_DrugDb/Hetionet_complete.csv",index_col=0)

In [None]:
mel_treat_drugs = hetionet_complete[(hetionet_complete["target"]=="melanoma")&(hetionet_complete["metaedge"]=="CtD")]
# mel_treat_drugs_connections = hetionet_complete[hetionet_complete["source"].isin(list(mel_treat_drugs["source"].unique()))]
# mel_treat_drugs_connections = mel_treat_drugs_connections[mel_treat_drugs_connections["metaedge"]=="CbG"]
################################################################################################################################################

mel_treat_drugs_gene_connections = hetionet_complete[(hetionet_complete['source'].isin(list(mel_treat_drugs['source'].unique())))&(hetionet_complete["target"].isin(Mel_upreg_genes+Mel_upreg_LRs))&(hetionet_complete["metaedge"]=="CdG")]
################################################################################################################################################

mel_gene_connections = hetionet_complete[hetionet_complete["source"]=="melanoma"]
mel_gene_connections = mel_gene_connections[mel_gene_connections["metaedge"]=="DaG"]
mel_gene_connections = mel_gene_connections[mel_gene_connections["target"].isin(Mel_upreg_genes+Mel_upreg_LRs)]
################################################################################################################################################

mel_up_genes_drugs = hetionet_complete[(hetionet_complete['target'].isin(list(mel_gene_connections['target'].unique())))&(hetionet_complete["metaedge"]=="CbG")]

In [None]:
import copy
def filter_common_connections(df, source1, source2):
    
    df_source1 = df[df['source'] == source1]
    df_source2 = df[df['source'] == source2]
    
    common_targets = set(df_source1['target']).intersection(set(df_source2['target']))    
    # Keep only rows with common targets for both sources
    common_df = df[df['target'].isin(common_targets) & df['source'].isin([source1, source2])]
    
    return common_df
common_df = filter_common_connections(hetionet_complete, "FGF2", "CD44")
common_df = common_df[["metaedge","target","source_type","target_type"]].drop_duplicates(keep="first")
common_df["target"] = np.where(common_df["target"].str.contains("::"),common_df["target"].str.split("::").str[1],common_df["target"])

common_df['source'] = "FGF2"
common_df_FGF2 = copy.copy(common_df)
common_df['source'] = "CD44"
common_df_CD44 = copy.copy(common_df)
common_df = pd.concat([common_df_FGF2,common_df_CD44])

In [None]:
from goatools.base import get_godag
from goatools.semantic import TermCounts, semantic_similarity
from sklearn.cluster import KMeans
import numpy as np

# Load GO DAG (directed acyclic graph)
godag = get_godag("/QRISdata/Q2051/SCC_Paper/resources/data/Data_For_Github_Codes/Onkar_Levi/Onkar_DrugDb/go-basic.obo")
go_terms = list(common_df["target"].unique())

go_terms_updated = []
for i in range(0,len(go_terms)):
    if go_terms[i] in godag.keys():
        go_terms_updated.append(go_terms[i])

# Calculate semantic similarity between GO terms
similarities = []
for i, go_term_i in enumerate(go_terms_updated):
    row_sim = []
    for go_term_j in go_terms_updated:
        sim = semantic_similarity(go_term_i, go_term_j, godag)
        row_sim.append(sim)
    similarities.append(row_sim)
    
################################################################################################################################################

similarities = [[0 if x is None else x for x in sublist] for sublist in similarities]
num_clusters = 3  # Define the number of clusters
kmeans = KMeans(n_clusters=num_clusters,random_state=0)
kmeans.fit(similarities)

clusters = {}
# Print the clusters for each GO term
for cluster_num in range(num_clusters):
    print(f"Cluster {cluster_num + 1}:")
    cluster_terms = np.array(go_terms_updated)[kmeans.labels_ == cluster_num]
    clusters[cluster_num] = cluster_terms
    print(cluster_terms)

In [None]:
common_df["target_type"] = np.where(common_df["target"].isin(clusters[0]),"Cluster1",
                           np.where(common_df["target"].isin(clusters[1]),"Cluster2",
                           np.where(common_df["target"].isin(clusters[2]),"Cluster3", 
                                    common_df["target_type"])))
################################################################################################################################################
common_df = common_df[["source","metaedge","target","source_type","target_type"]]
mel_treat_drugs = mel_treat_drugs[["source","metaedge","target","source_type","target_type"]]
mel_gene_connections = mel_gene_connections[["source","metaedge","target","source_type","target_type"]]

In [None]:
mel_treat_drugs['source_type'] = 'Mel_treat_drugs'
mel_treat_drugs_gene_connections["target_type"] = "Mel_drug_target_gene"
mel_gene_connections['target_type'] = "Mel_connected_genes"
mel_up_genes_drugs['source_type'] = "Drug_for_Mel_genes"

mel_up_genes_drugs['source'], mel_up_genes_drugs['target'] = mel_up_genes_drugs['target'], mel_up_genes_drugs['source']
mel_up_genes_drugs['source_type'], mel_up_genes_drugs['target_type'] = mel_up_genes_drugs['target_type'],mel_up_genes_drugs['source_type']

mel_treat_drugs_gene_connections['source_type']="Mel_treat_drugs"
mel_up_genes_drugs["source_type"] = "Mel_connected_genes"


graph_to_plot = pd.concat([common_df,mel_treat_drugs,mel_gene_connections,
                           mel_treat_drugs_gene_connections, mel_up_genes_drugs])

graph_to_plot = graph_to_plot[(graph_to_plot['target_type'] != 'Gene') | (graph_to_plot['target'].isin(Mel_upreg_genes+Mel_upreg_LRs))]
graph_to_plot['target'] = np.where(graph_to_plot['target'].str.contains("GO"),graph_to_plot['target_type'],graph_to_plot['target'])
graph_to_plot["source_type"] = np.where(graph_to_plot["source_type"]=="Gene","Main_targets",graph_to_plot["source_type"])

In [None]:
print(graph_to_plot["target_type"].unique())
print("===")
print(graph_to_plot["source_type"].unique())

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

# Example DataFrame (replace 'graph_to_plot' with your actual DataFrame)
df = graph_to_plot

# Create a directed graph
G = nx.DiGraph()

# Define node colors
node_colors = {
    'Disease': '#FF3131',
    'Main_targets': "#E4D00A",
    'Mel_connected_genes': '#4169E1',
    'Mel_treat_drugs': '#008000',
    'Mel_drug_target_gene': '#87CEEB',
    'Drug_for_Mel_genes': '#9FE2BF',
    'Cluster3': '#FFDEAD',
    'Cluster2': '#997950',
    'Cluster1': '#8F7AEC',
    'BiologicalProcess': '#FFC0CB',
    'Pathway': '#A9A9A9'
}

# Add nodes and edges with metaedge priority
for index, row in df.iterrows():
    source_type = row['source_type']
    target_type = row['target_type']
    
    # Add source and target nodes
    G.add_node(row['source'], color=node_colors.get(source_type, '#000000'))
    G.add_node(row['target'], color=node_colors.get(target_type, '#000000'))
    
    # Add edge with metaedge attribute
    G.add_edge(row['source'], row['target'], metaedge=row['metaedge'])

# Custom layout function for distinct circular layouts
def custom_circular_layout(G, radius_increment=2):
    pos = {}
    center_node = None
    upper_nodes = []
    lower_nodes = []
    other_nodes = []
    
    # Define upper and lower metaedge categories
    upper_metaedges = {'CtD', 'CdG'}
    lower_metaedges = {'DaG', 'CbG', 'GpBP', 'GpMF', 'GpPW'}
    
    # Separate nodes into layers
    for node in G.nodes():
        # Check if the node is the "Disease" node
        if G.nodes[node].get('color') == node_colors['Disease']:
            center_node = node
        else:
            # Check the metaedge priority of edges connected to the node
            connected_metaedges = {G.edges[edge]['metaedge'] for edge in G.edges(node) if 'metaedge' in G.edges[edge]}
            if connected_metaedges & upper_metaedges:
                upper_nodes.append(node)
            elif connected_metaedges & lower_metaedges:
                lower_nodes.append(node)
            else:
                other_nodes.append(node)
    
    # Place the center node
    if center_node:
        pos[center_node] = (0, 0)
    
    # Position upper nodes in a circle
    num_upper_nodes = len(upper_nodes)
    upper_radius = radius_increment * 3
    upper_angle_step = 2 * np.pi / num_upper_nodes if num_upper_nodes > 0 else 0
    for i, node in enumerate(upper_nodes):
        theta = i * upper_angle_step
        pos[node] = (upper_radius * np.cos(theta), upper_radius * np.sin(theta))
    
    # Position lower nodes in a circle
    num_lower_nodes = len(lower_nodes)
    lower_radius = radius_increment * 5
    lower_angle_step = 2 * np.pi / num_lower_nodes if num_lower_nodes > 0 else 0
    for i, node in enumerate(lower_nodes):
        theta = i * lower_angle_step
        pos[node] = (lower_radius * np.cos(theta), -lower_radius * np.sin(theta))  # Flip y-axis for bottom circle
    
    # Position other nodes in an outermost circle
    num_other_nodes = len(other_nodes)
    outer_radius = radius_increment * 7
    outer_angle_step = 2 * np.pi / num_other_nodes if num_other_nodes > 0 else 0
    for i, node in enumerate(other_nodes):
        theta = i * outer_angle_step
        pos[node] = (outer_radius * np.cos(theta), outer_radius * np.sin(theta))
    
    return pos

# Generate the custom layout
pos = custom_circular_layout(G, radius_increment=2)

# Get node colors from the graph
node_colors_list = [data.get('color', '#000000') for _, data in G.nodes(data=True)]

# Draw the graph
plt.figure(figsize=(16, 16))

# Draw nodes
nx.draw_networkx_nodes(G, pos, node_color=node_colors_list, node_size=700, alpha=0.8)

# Draw labels
nx.draw_networkx_labels(G, pos, font_size=16.5, font_color='black', font_weight='bold')

# Draw edges
nx.draw_networkx_edges(G, pos, width=2, alpha=0.6, edge_color='gray', arrows=True, arrowsize=15)

# Set title and remove axes
plt.title("Circular Graph with Center and Upper/Lower Layouts", fontsize=16)
plt.axis('off')

# Save the graph as PDF
plt.savefig("custom_circular_layout_graph.pdf", format="pdf")
plt.show()
