# Overview

This notebook is designed to be used alongside the [ProteinCartography pipeline](https://github.com/Arcadia-Science/ProteinCartography) to update the `plot_interactive.py` script to allow for custom overlays.


## Setup

Import dependencies.


In [35]:
import os
import pandas as pd
import sys
import plotly.express as px
import arcadia_pycolor as apc

sys.path.append("./../../ProteinCartography/ProteinCartography/")
from plot_interactive import (
    apply_coordinates,
    plot_interactive,
    generate_plotting_rules,
)


## Combine ProteinCartography results with results from the Actin Prediction pipeline

ProteinCartography produces an aggregated features file containing information for plotting as well as the relevant metadata. The Actin Prediction pipeline also produced a summary file. The first thing we need to do is combine these files.


In [36]:
# Renames query column to protid in Actin Prediction results
all_outputs = pd.read_csv("../input/all_outputs_summarized.tsv", sep="\t")
all_outputs.insert(13, "protid", all_outputs["query"].values)

In [39]:
# Merge outputs from each pipeline
aggregated_features = pd.read_csv(
    "../../ProteinCartography/Actin/output/clusteringresults/actin_aggregated_features_pca_umap.tsv",
    sep="\t",
)
combined_aggregated_features = aggregated_features.merge(
    all_outputs, how="outer", on="protid"
)

# Save file
combined_aggregated_features.to_csv("../output/combined_data.tsv", sep="\t", index=None)

display(combined_aggregated_features)


Unnamed: 0,protid,UMAP1,UMAP2,StruCluster,LeidenCluster,pdb_origin,pdb_confidence,pdb_chains,TMscore_v_P60709,Entry,...,best_domain_bis,domain_number_exp,domain_number_reg,domain_number_clu,domain_number_ov,domain_number_env,domain_number_dom,domain_number_rep,domain_number_inc,description
0,A0A010QW85,5.711124,-0.868634,SC31,LC10,AlphaFold,69.878344,['A'],0.0,A0A010QW85,...,0.0,1.1,1.0,0.0,0.0,1.0,1.0,1.0,1.0,Actin
1,A0A010RHW1,6.842225,-5.490983,SC39,LC15,AlphaFold,90.340917,['A'],0.0,A0A010RHW1,...,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,Actin
2,A0A010SEV7,15.114400,-1.790521,SC22,LC21,AlphaFold,88.601931,['A'],0.0,A0A010SEV7,...,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,Actin
3,A0A015ICG5,-4.460266,13.477738,SC10,LC14,AlphaFold,87.167435,['A'],0.0,A0A015ICG5,...,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,Actin
4,A0A015J9J4,0.146874,1.547979,SC10,LC14,AlphaFold,87.762448,['A'],0.0,A0A015J9J4,...,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,Actin
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52545,Q8I8C3,,,,,,,,,,...,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,Actin
52546,X6LKS7,,,,,,,,,,...,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,Actin
52547,H7CD47,,,,,,,,,,...,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,Actin
52548,A0A7G5CEI4,,,,,,,,,,...,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,Actin


## Create plotting rules


In [40]:
# Standard Arcadia plotting colors

arcadia_viridis = apc.Gradients["arcadia:viridis"].grad_nested_list
arcadia_viridis_r = apc.Gradients["arcadia:viridis_r"].grad_nested_list

arcadia_magma = apc.Gradients["arcadia:magma"].grad_nested_list
arcadia_magma_r = apc.Gradients["arcadia:magma_r"].grad_nested_list

arcadia_cividis = apc.Gradients["arcadia:cividis"].grad_nested_list
arcadia_cividis_r = apc.Gradients["arcadia:cividis_r"].grad_nested_list

arcadia_poppies = apc.Gradients["arcadia:poppies"].grad_nested_list
arcadia_poppies_r = apc.Gradients["arcadia:poppies_r"].grad_nested_list

arcadia_pansies = apc.Gradients["arcadia:pansies"].grad_nested_list
arcadia_pansies_r = apc.Gradients["arcadia:pansies_r"].grad_nested_list

arcadia_dahlias = apc.Gradients["arcadia:dahlias"].grad_nested_list
arcadia_dahlias_r = apc.Gradients["arcadia:dahlias_r"].grad_nested_list

In [41]:
# Set up annotation colors
from arcadia_pycolor import adjust_lightness

annotation_colors = {
    "LC00": apc.All["arcadia:periwinkle"],  # actin
    "LC01": adjust_lightness(apc.All["arcadia:periwinkle"], 0.9),  # actin
    "LC02": adjust_lightness(apc.All["arcadia:periwinkle"], 1.1),  # actin
    "LC03": adjust_lightness(apc.All["arcadia:periwinkle"], 1.2),  # actin
    "LC04": apc.All["arcadia:aegean"],  # ARP4
    "LC05": adjust_lightness(apc.All["arcadia:periwinkle"], 0.8),  # actin
    "LC06": apc.All["arcadia:canary"],  # ARP7
    "LC07": apc.All["arcadia:mars"],  # ARP2
    "LC08": adjust_lightness(apc.All["arcadia:canary"], 1.2),  # ARP7
    "LC09": adjust_lightness(apc.All["arcadia:mars"], 1.1),  # ARP2
    "LC10": apc.All["arcadia:brightgrey"],  # ARP6
    "LC11": apc.All["arcadia:mint"],  # ARP3
    "LC12": adjust_lightness(apc.All["arcadia:mint"], 1.1),  # ARP3
    "LC13": adjust_lightness(apc.All["arcadia:aegean"], 1.2),  # ARP4
    "LC14": apc.All["arcadia:rose"],  # actin-like
    "LC15": adjust_lightness(apc.All["arcadia:brightgrey"], 0.9),  # ARP6
    "LC16": adjust_lightness(apc.All["arcadia:periwinkle"], 0.95),  # actin
    "LC17": adjust_lightness(apc.All["arcadia:mint"], 0.9),  # ARP3
    "LC18": adjust_lightness(apc.All["arcadia:periwinkle"], 1.15),  # actin
    "LC19": apc.All["arcadia:lime"],  # ARP1
    "LC20": adjust_lightness(apc.All["arcadia:mars"], 0.9),  # ARP2
    "LC21": adjust_lightness(apc.All["arcadia:mint"], 0.8),  # ARP3
    "LC22": adjust_lightness(apc.All["arcadia:seaweed"], 0.9),  # ARP6
    "": apc.All["arcadia:seaweed"],
}


In [42]:
# Import plotting rules
plotting_rules = generate_plotting_rules(
    taxon_focus="euk", keyids=["P60709"], version="current"
)


In [43]:
# Define plotting rules
custom_plotting_rules = {
    "lon_fraction_matching": {
        "type": "continuous",
        "color_scale": arcadia_viridis,
        "cmin": 0,
        "cmax": 1,
        "textlabel": "Longitudinal contact conservation",
    },
    "lat_fraction_matching": {
        "type": "continuous",
        "color_scale": arcadia_viridis,
        "cmin": 0,
        "cmax": 1,
        "textlabel": "Lateral contact conservation",
    },
    "atp_fraction_matching": {
        "type": "continuous",
        "color_scale": arcadia_viridis,
        "cmin": 0,
        "cmax": 1,
        "textlabel": "ATP contact conservation",
    },
    "w_avg_contacts": {
        "type": "continuous",
        "color_scale": arcadia_viridis,
        "cmin": 0,
        "cmax": 1,
        "textlabel": "Total polymerization conservation",
    },
    "LeidenCluster": {
        "type": "categorical",
        "color_dict": annotation_colors,
        "apply": lambda x: str(x),
        "textlabel": "Top Annotation",
        "fillna": "",
    },
}


In [44]:
# Merge plotting rules
combined_plotting_rules = plotting_rules | custom_plotting_rules
combined_plotting_rules

{'Protein names': {'type': 'hovertext',
  'fillna': '',
  'textlabel': 'Protein name'},
 'Gene Names (primary)': {'type': 'hovertext',
  'fillna': '',
  'textlabel': 'Gene name'},
 'Organism': {'type': 'hovertext', 'fillna': '', 'textlabel': 'Organism'},
 'LeidenCluster': {'type': 'categorical',
  'color_dict': {'LC00': '#DCBFFC',
   'LC01': '#DCBFFC',
   'LC02': '#DCBFFC',
   'LC03': '#DCBFFC',
   'LC04': '#5088C5',
   'LC05': '#DCBFFC',
   'LC06': '#F7B846',
   'LC07': '#DA9085',
   'LC08': '#F7B846',
   'LC09': '#DA9085',
   'LC10': '#3B9886',
   'LC11': '#D1EADF',
   'LC12': '#D1EADF',
   'LC13': '#5088C5',
   'LC14': '#F898AE',
   'LC15': '#3B9886',
   'LC16': '#DCBFFC',
   'LC17': '#D1EADF',
   'LC18': '#DCBFFC',
   'LC19': '#73B5E3',
   'LC20': '#DA9085',
   'LC21': '#D1EADF',
   'LC22': '#3B9886',
   '': '#3B9886'},
  'apply': <function __main__.<lambda>(x)>,
  'textlabel': 'Top Annotation',
  'fillna': ''},
 'Annotation': {'type': 'categorical',
  'fillna': 0,
  'apply': <func

## Plotting

We created the interactive plot using the `plot_interactive` command from the `plot_interactive.py` script within ProteinCartography.


In [46]:
plot_interactive(
    coordinates_file="../output/combined_data.tsv",
    plotting_rules=combined_plotting_rules,
    output_file="../output/aggregated_maps.html",
    keyids=["P60709"],
    show=False,
)


Columns (0,3,4,5,7,9,10,11,12,14,15,18,19,20,21,23,24,25,26,27,28,29,30,44,45,56,57,58,59,60,63) have mixed types. Specify dtype option on import or set low_memory=False.

