<a href="https://colab.research.google.com/github/Palaeoprot/ProteoParc_Colab/blob/main/ProteoParc_in_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##License

The [ProteoParc](https://github.com/guillecarrillo/proteoparc) pipeline was written by [Guillermo Carrillo Martín](https://github.com/guillecarrillo) and this is his license from Github. "[Palaeoparc] "*is the result of many collective work, so its citation in any study in which it is used would be appreciated.


 Every user is free to change any section of the code so it can be adjusted to individual necessities, if you do so, please indicate it in your manuscripts writing the changes to avoid confusion. For more information, read the License This pi[link text](https://)peline has been possible thanks to a lot of collective work, so its citation in any study in which it is used would be appreciated. Every user is free to change any section of the code so it can be adjusted to individual necessities, if you do so, please indicate it in your manuscripts to avoid confusion.*"

Here is my take on it in Colab

<img src='https://drive.google.com/uc?export=view&id=1FqSr3E9UQKtRO4KwR6LjAU8TPrD8Oq6q' width=800px align=centre>





In [None]:
!pip install biopython
import requests, sys, json, os, time  #data handling
import argparse  # command-line arguments
import shlex  # parsing shell commands
import subprocess  # For system commands (already imported twice)
import ipywidgets as widgets  # For interactive elements
from IPython.display import display, HTML, clear_output  # For interactive output
import shutil
import re
import numpy as np
import pandas as pd
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from requests.adapters import HTTPAdapter, Retry
from datetime import datetime
# Mount Google Drive (Optional, you can comment out if not using)
from google.colab import drive
drive.mount('/content/drive')

# Global constants
WEBSITE_API = "https://rest.uniprot.org/"

In [None]:
#@title ##Load the dictionaries
# List of gene names for different tissues
bone_protein_genes = [
    ("ALB", "serum albumin"),
    ("AEBP1", "AE binding protein 1"),
    ("AHSG", "alpha-2-HS-glycoprotein (fetuin-A)"),
    ("ALPL", "alkaline phosphatase, liver/bone/kidney"),
    ("APOA1", "apolipoprotein A-I"),
    ("APOA4", "apolipoprotein A-IV"),
    ("APOE", "apolipoprotein E"),
    ("APOC1", "apolipoprotein C-I"),
    ("APP", "amyloid beta precursor protein"),
    ("ASPN", "asporin"),
    ("BGLAP", "bone gamma-carboxyglutamate protein (osteocalcin)"),
    ("BGN", "biglycan"),
    ("C3", "complement component 3"),
    ("C8B", "complement component 8, beta polypeptide"),
    ("C9", "complement component 9"),
    ("CFH", "complement factor H"),
    ("CHAD", "chondroadherin"),
    ("CHGA", "chromogranin A"),
    ("CLEC11A", "C-type lectin domain family 11, member A"),
    ("CLEC3B", "C-type lectin domain family 3, member B"),
    ("CLU", "clusterin"),
    ("COL10A1", "collagen type X alpha 1 chain"),
    ("COL11A1", "collagen type XI alpha 1 chain"),
    ("COL11A2", "collagen type XI alpha 2 chain"),
    ("COL12A1", "collagen type XII alpha 1 chain"),
    ("COL16A1", "collagen type XVI alpha 1 chain"),
    ("COL1A1", "collagen type I alpha 1 chain"),
    ("COL1A2", "collagen type I alpha 2 chain"),
    ("COL21A1", "collagen type XXI alpha 1 chain"),
    ("COL22A1", "collagen type XXII alpha 1 chain"),
    ("COL2A1", "collagen type II alpha 1 chain"),
    ("COL3A1", "collagen type III alpha 1 chain"),
    ("COL4A3", "collagen type IV alpha 3 chain"),
    ("COL4A4", "collagen type IV alpha 4 chain"),
    ("COL4A5", "collagen type IV alpha 5 chain"),
    ("COL5A1", "collagen type V alpha 1 chain"),
    ("COL5A2", "collagen type V alpha 2 chain"),
    ("COL5A3", "collagen type V alpha 3 chain"),
    ("COL6A1", "collagen type VI alpha 1 chain"),
    ("COL6A3", "collagen type VI alpha 3 chain"),
    ("COL8A1", "collagen type VIII alpha 1 chain"),
    ("CRP", "C-reactive protein"),
    ("DCN", "decorin"),
    ("DPT", "dermatopontin"),
    ("EEF1A1", "eukaryotic translation elongation factor 1 alpha 1"),
    ("EMILIN1", "elastin microfibril interfacer 1"),
    ("EZR", "ezrin"),
    ("F10", "coagulation factor X"),
    ("F2", "coagulation factor II (thrombin)"),
    ("F7", "coagulation factor VII"),
    ("F9", "coagulation factor IX"),
    ("FGL2", "fibrinogen-like 2"),
    ("FMOD", "fibromodulin"),
    ("FN1", "fibronectin 1"),
    ("GAPDH", "glyceraldehyde-3-phosphate dehydrogenase"),
    ("GAS6", "growth arrest-specific 6"),
    ("GC", "group-specific component (vitamin D binding protein)"),
    ("HAPLN3", "hyaluronan and proteoglycan link protein 3"),
    ("HSP90B1", "heat shock protein 90 beta family member 1"),
    ("HSPA5", "heat shock protein family A (Hsp70) member 5"),
    ("HTRA1", "HtrA serine peptidase 1"),
    ("IBSP", "integrin binding sialoprotein"),
    ("IGF1", "insulin-like growth factor 1"),
    ("IGF2", "insulin-like growth factor 2"),
    ("IGFALS", "insulin-like growth factor binding protein, acid labile subunit"),
    ("IGFBP1", "insulin-like growth factor binding protein 1"),
    ("KNG1", "kininogen 1"),
    ("KRT2", "keratin 2"),
    ("LOX", "lysyl oxidase"),
    ("LRRC15", "leucine rich repeat containing 15"),
    ("LUM", "lumican"),
    ("MGP", "matrix Gla protein"),
    ("MMP2", "matrix metallopeptidase 2"),
    ("MSN", "moesin"),
    ("MYO1B", "myosin IB"),
    ("NUCB1", "nucleobindin 1"),
    ("NUCB2", "nucleobindin 2"),
    ("OGN", "osteoglycin"),
    ("OLFML1", "olfactomedin-like 1"),
    ("OLFML3", "olfactomedin-like 3"),
    ("OMD", "osteomodulin"),
    ("P4HB", "prolyl 4-hydroxylase subunit beta"),
    ("PAM", "peptidylglycine alpha-amidating monooxygenase"),
    ("PCOLCE", "procollagen C-endopeptidase enhancer"),
    ("PHOSPHO1", "phosphatase, orphan 1"),
    ("POSTN", "periostin"),
    ("PROC", "protein C, inactivator of coagulation factors Va and VIIIa"),
    ("PROS1", "protein S, vitamin K-dependent"),
    ("PRSS2", "protease, serine 2"),
    ("SERPINC1", "serpin family C member 1 (antithrombin)"),
    ("SERPIND1", "serpin family D member 1 (heparin cofactor II)"),
    ("SERPINF1", "serpin family F member 1"),
    ("SLC8A3", "solute carrier family 8 member A3"),
    ("SPARC", "secreted protein acidic and rich in cysteine (osteonectin)"),
    ("SPARCL1", "SPARC-like 1 (hevin)"),
    ("SPP1", "secreted phosphoprotein 1 (osteopontin)"),
    ("SPP2", "secreted phosphoprotein 2"),
    ("TGFB1", "transforming growth factor beta 1"),
    ("THBS1", "thrombospondin 1"),
    ("TNC", "tenascin C"),
    ("TPP1", "tripeptidyl peptidase I"),
    ("TUBA1B", "tubulin alpha 1b"),
    ("VCAN", "versican"),
    ("VIT", "vitrin"),
    ("VTN", "vitronectin")
]

enamel_protein_genes = [
    ("ALB", "serum albumin"),
    ("COL1A2", "collagen type I alpha 2 chain"),
    ("COL1A1", "collagen type I alpha 1 chain"),
    ("COL2A1", "collagen type II alpha 1 chain"),
    ("AMBN", "ameloblastin"),
    ("AMELX", "amelogenin X-linked"),
    ("ENAM", "enamelin"),
    ("AMELY", "amelogenin Y-linked"),
    ("AMTN", "amelotin"),
    ("MMP20", "matrix metallopeptidase 20"),
    ("KLK4", "kallikrein related peptidase 4"),
    ("TUFT1", "tuftelin 1"),
    ("SERPINC1", "serpin family C member 1 (antithrombin)"),
    ("ODAM", "odontogenic, ameloblast associated"),
    ("COL17A1", "collagen type XVII alpha 1 chain")
]

food_crust_protein_genes = [
    ("CSN1S1", "alpha-S1 casein"),
    ("CSN1S2", "alpha-S2 casein"),
    ("CSN2", "beta-casein"),
    ("CSN3", "kappa-casein"),
    ("LALBA", "alpha-lactalbumin"),
    ("LGB", "beta-lactoglobulin"),
    ("IGH@", "immunoglobulin heavy chains"),
    ("IGK@", "immunoglobulin kappa light chains"),
    ("IGL@", "immunoglobulin lambda light chains"),
    ("LTF", "lactoferrin"),
    ("LYZ", "lysozyme"),
    ("ALB", "serum albumin"),
    ("MUC1", "mucin 1"),
    ("BTN1A1", "butyrophilin subfamily 1 member A1"),
    ("XDH", "xanthine dehydrogenase/oxidase"),
    ("MFGE8", "milk fat globule-EGF factor 8 protein")
]

zooms_genes = [
    ("COL1A2", "collagen type I alpha 2 chain"),
    ("COL1A1", "collagen type I alpha 1 chain"),
    ("COL3A1", "collagen type III alpha 1 chain")
]

# Define a combined dictionary of all genes
all_genes = {
    "Bone": bone_protein_genes,
    "Enamel": enamel_protein_genes,
    "Food Crust": food_crust_protein_genes,
    "Zooms": zooms_genes
}

# Define the taxonomy IDs and their names
taxonomy_dict = {
    # Carnivora
    33554: "Carnivora", 379584: "Caniformia", 379583: "Feliformia", 9632: "Ursidae", 9615: "Canis lupus familiaris",

    # Lagomorpha
    9975: "Lagomorpha",

    # Humans
    9606: "Homo sapiens", 9605: "Homo", 9604: "Hominidae", 9443: "Primates", 40674: "Mammalia", 7711: "Chordata", 33208: "Animalia",

    # Ostriches
    8839: "Struthio camelus", 8838: "Struthio", 8837: "Struthionidae", 30498: "Struthioniformes", 8782: "Aves",

    # Cod
    8049: "Gadus morhua", 8048: "Gadus", 27502: "Gadidae", 27682: "Gadiformes", 7898: "Actinopterygii",

    # Geckos
    103697: "Gekko gecko", 8489: "Gekko", 8483: "Gekkonidae", 8509: "Squamata", 8457: "Reptilia"
}

# print("all_genes:")
# for tissue, genes in all_genes.items():
#     print(f"  {tissue}: {len(genes)} genes")

# print("\ntaxonomy_dict:")
# for tax_id, tax_name in taxonomy_dict.items():
#     print(f"  {tax_id}: {tax_name}")

In [None]:
#@title ##Functions
import os
import re
import shutil
import pandas as pd
import requests
from requests.adapters import HTTPAdapter, Retry
from datetime import datetime
from Bio import SeqIO
import glob

# Global variables
WEBSITE_API = "https://rest.uniprot.org/"

def setup_api_session():
    retries = Retry(total=5, backoff_factor=0.25, status_forcelist=[500, 502, 503, 504])
    session = requests.Session()
    session.mount("https://", HTTPAdapter(max_retries=retries))
    return session

def get_next_link(headers):
    re_next_link = re.compile(r'<(.+)>; rel="next"')
    if "Link" in headers:
        match = re_next_link.match(headers["Link"])
        if match:
            return match.group(1)

def get_batch(session, batch_url):
    while batch_url:
        response = session.get(batch_url)
        response.raise_for_status()
        total = response.headers["x-total-results"]
        yield response, total
        batch_url = get_next_link(response.headers)

def json_to_fasta(record, tax_id_list, gene_name=None):
    for database in record["uniParcCrossReferences"]:
        if "organism" in database and database["organism"]["taxonId"] in tax_id_list:
            header = f">{database['database']}|{record['uniParcId']}|{database['lastUpdated']} "
            if "proteinName" in database:
                header += f"{database['proteinName']} "
            header += f"OS={database['organism']['scientificName']} OX={database['organism']['taxonId']}"
            if gene_name:
                header += f" GN={gene_name}"
            elif "geneName" in database:
                header += f" GN={database['geneName']}"
            header += f" SV={record['uniParcCrossReferences'][0]['versionI']}"
            sequence = record["sequence"]["value"]
            return header.replace(',', '').replace(';', '').replace(':', ''), sequence
    return None, None

def download_proteins(tax_id, gene_list, output_file):
    session = setup_api_session()
    tax_id_list = get_taxid_descendants(tax_id)

    with open(output_file, "w") as file:
        if gene_list:
            for gene in gene_list:
                url = f"{WEBSITE_API}uniparc/search?compressed=false&format=json&query=((gene:{gene}) AND (taxonomy_id:{tax_id}))&size=500"
                process_batch(session, url, file, tax_id_list, gene)
        else:
            url = f"{WEBSITE_API}uniparc/search?compressed=false&format=json&query=((taxonomy_id:{tax_id}))&size=500"
            process_batch(session, url, file, tax_id_list)

def process_batch(session, url, file, tax_id_list, gene=None):
    for batch, total in get_batch(session, url):
        json_retrieve = batch.json()
        for protein in json_retrieve["results"]:
            header, sequence = json_to_fasta(protein, tax_id_list, gene)
            if header and sequence:
                file.write(f"{header}\n")
                for i in range(0, len(sequence), 60):
                    file.write(f"{sequence[i:i+60]}\n")

def get_taxid_descendants(tax_id):
    url = f"{WEBSITE_API}taxonomy/stream?format=list&query=((ancestor:{tax_id}))"
    response = requests.get(url)
    tax_id_list = [int(tid) for tid in response.text.strip().split("\n") if tid]
    return tax_id_list

def remove_duplicates(input_file, output_folder):
    records = []
    with open(input_file, "r") as f:
        for record in SeqIO.parse(f, "fasta"):
            records.append(record)

    unique_records = []
    duplicate_records = []
    sequences = set()

    for record in records:
        if str(record.seq) not in sequences:
            sequences.add(str(record.seq))
            unique_records.append(record)
        else:
            duplicate_records.append(record)

    filtered_file = os.path.join(output_folder, "filtered_mf.fasta")
    duplicates_file = os.path.join(output_folder, "duplicate_records.fasta")

    SeqIO.write(unique_records, filtered_file, "fasta")
    SeqIO.write(duplicate_records, duplicates_file, "fasta")

    return len(duplicate_records)

import os
import re
import shutil
import pandas as pd
import requests
from requests.adapters import HTTPAdapter, Retry
from datetime import datetime
from Bio import SeqIO
import glob
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# Global variables
WEBSITE_API = "https://rest.uniprot.org/"

def create_taxonomy_buttons(taxonomy_dict):
    buttons = []
    for tax_id, name in taxonomy_dict.items():
        button = widgets.Button(
            description=name,
            tooltip=f"{name} (ID: {tax_id})",
            layout=widgets.Layout(width='auto', height='auto'),
            style={'button_color': '#e7e7e7', 'font_weight': 'normal'}
        )

        def on_button_click(b, tax_id=tax_id, name=name):
            global selected_taxonomy_id
            selected_taxonomy_id = tax_id
            output_dir = create_output_directory(taxonomy_dict, tax_id)
            create_gene_list_file.output_dir = output_dir  # Store output_dir for later use
            run_proteoparc.taxonomy_id = tax_id  # Store taxonomy_id for later use
            url = f"https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id={tax_id}"
            clear_output()
            display(HTML("<h2>Selected Taxonomy</h2>"))
            display(HTML(f"<h3>{name} (ID: {tax_id})</h3>"))
            display(HTML(f"<a href='{url}' target='_blank'>Open NCBI Taxonomy Browser</a>"))
            display(HTML(f"<p>Output directory: {output_dir}</p>"))
            display(HTML("<h2>Select Taxonomy</h2>"))
            display(button_grid)
            display(HTML("<h2>Gene Selection by Tissue</h2>"))
            display(widgets.HBox([select_all_button, deselect_all_button]))
            display(tissue_selection)
            display(widgets.HBox([print_button, save_button, run_proteoparc_button]))

        button.on_click(on_button_click)
        buttons.append(button)

    button_grid = widgets.GridBox(
        buttons,
        layout=widgets.Layout(
            grid_template_columns='repeat(3, auto)',
            grid_gap='10px',
            width='100%'
        )
    )

    return button_grid

def create_gene_checkboxes(gene_list):
    return [widgets.Checkbox(
        value=False,
        description=f"{gene} ({desc})",
        indent=False,
        layout=widgets.Layout(width='auto')
    ) for gene, desc in gene_list]

def create_tissue_selection(all_genes):
    tissue_checkboxes = {}
    gene_checkboxes = {}
    tissue_accordions = {}

    for tissue, genes in all_genes.items():
        tissue_checkbox = widgets.Checkbox(
            value=False,
            description=f"Select all {tissue} genes",
            indent=False,
            layout=widgets.Layout(width='auto')
        )
        tissue_checkboxes[tissue] = tissue_checkbox

        checkboxes = create_gene_checkboxes(genes)
        gene_checkboxes.update({f"{tissue}_{gene}": checkbox for (gene, _), checkbox in zip(genes, checkboxes)})

        grid = widgets.GridBox(
            checkboxes,
            layout=widgets.Layout(
                grid_template_columns='repeat(3, minmax(200px, 1fr))',
                grid_gap='10px',
                width='100%'
            )
        )

        accordion = widgets.Accordion([grid], titles=[f"{tissue} Genes"])
        accordion.selected_index = None
        tissue_accordions[tissue] = accordion

        def on_tissue_change(change, t=tissue):
            if change['name'] == 'value':
                for gene, checkbox in gene_checkboxes.items():
                    if gene.startswith(f"{t}_"):
                        checkbox.value = change['new']
        tissue_checkbox.observe(on_tissue_change)

        def on_gene_change(change, t=tissue):
            tissue_checkbox.unobserve(on_tissue_change)
            tissue_checkbox.value = all(checkbox.value for gene, checkbox in gene_checkboxes.items() if gene.startswith(f"{t}_"))
            tissue_checkbox.observe(on_tissue_change)
        for gene, checkbox in gene_checkboxes.items():
            if gene.startswith(f"{tissue}_"):
                checkbox.observe(on_gene_change)

    tissue_vbox = widgets.VBox([
        widgets.HBox([tissue_checkboxes[tissue], tissue_accordions[tissue]])
        for tissue in all_genes.keys()
    ])

    return tissue_vbox, gene_checkboxes

def select_all(b):
    for checkbox in all_gene_checkboxes.values():
        checkbox.value = True

def deselect_all(b):
    for checkbox in all_gene_checkboxes.values():
        checkbox.value = False

def get_selected_genes():
    return [gene.split('_')[1] for gene, checkbox in all_gene_checkboxes.items() if checkbox.value]

def create_gene_list_file(b, custom_path=None):
    selected_genes = get_selected_genes()
    if not selected_genes:
        print("No genes selected. Please select at least one gene.")
        return None

    file_path = custom_path or os.path.join(create_gene_list_file.output_dir, "genelist.txt")
    with open(file_path, 'w') as f:
        for gene in selected_genes:
            f.write(f"{gene}\n")
    print(f"Gene list has been saved to {file_path}")
    return file_path

def create_output_directory(taxonomy_dict, taxonomy_id):
    name = taxonomy_dict.get(taxonomy_id, "Unknown_Taxonomy")
    # Calculate project_name here instead of using the undefined variable
    project_name = get_project_name(taxonomy_id, taxonomy_dict, "Unknown") # Assuming gene_type is unknown at this point
    output_dir = f"/content/drive/MyDrive/Colab Notebooks/proteoparc/{project_name}"
    os.makedirs(output_dir, exist_ok=True)
    return output_dir

def get_project_name(taxonomy_id, taxonomy_dict, gene_type):
    taxonomy_name = taxonomy_dict.get(taxonomy_id, "Unknown")
    return f"{taxonomy_name.lower().replace(' ', '_')}_{gene_type.lower()}_genes"

def run_proteoparc(b):
    if not hasattr(create_gene_list_file, 'output_dir') or not hasattr(run_proteoparc, 'taxonomy_id'):
        print("Please select a taxonomy first.")
        return

    output_dir = create_gene_list_file.output_dir

    gene_list_path = os.path.join(output_dir, "genelist.txt")
    create_gene_list_file(None, gene_list_path)

    if not os.path.exists(gene_list_path):
        print(f"Error: Gene list file not created at {gene_list_path}")
        return

    print("Gene list contents:", get_selected_genes())

    selected_genes = get_selected_genes()
    gene_type = "ZooMS" if any(gene in selected_genes for gene in all_genes.get("ZooMS", [])) else "mixed"

    project_name = get_project_name(run_proteoparc.taxonomy_id, taxonomy_dict, gene_type)
    output_dir = f"/content/proteoparc/{project_name}"
    os.makedirs(output_dir, exist_ok=True)

    print("Running ProteoParc pipeline...")

    run_integrated_proteoparc(run_proteoparc.taxonomy_id, gene_list_path, output_dir)

    if os.path.exists(output_dir):
        print(f"Output directory: {output_dir}")
        fasta_files = glob.glob(os.path.join(output_dir, "**", "*.fasta"), recursive=True)
        if fasta_files:
            print(f"Found {len(fasta_files)} FASTA file(s):")
            for fasta_file in fasta_files:
                print(f"  - {os.path.basename(fasta_file)}")
        else:
            print("No FASTA files found in the output directory.")
    else:
        print(f"Output directory not found: {output_dir}")

    print("\nSummary of gene search results:")
    gene_list = get_selected_genes()
    for gene in gene_list:
        found = any(gene in open(f).read() for f in fasta_files)
        print(f"  - {gene}: {'Found' if found else 'Not found'}")

    colab_output_dir = f"/content/drive/MyDrive/Colab Notebooks/proteoparc/{project_name}"
    if os.path.exists(output_dir):
        shutil.copytree(output_dir, colab_output_dir, dirs_exist_ok=True)
        print(f"\nResults copied to Colab environment: {colab_output_dir}")
    else:
        print("\nNo results to copy to Colab environment")

def run_integrated_proteoparc(tax_id, gene_list_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    # Step 1: Download proteins
    raw_fasta = os.path.join(output_folder, "raw_proteins.fasta")
    gene_list = []
    if gene_list_path:
        with open(gene_list_path, "r") as f:
            gene_list = [line.strip() for line in f]
    download_proteins(tax_id, gene_list, raw_fasta)

    # Step 2: Remove duplicates
    duplicates_removed = remove_duplicates(raw_fasta, output_folder)
    filtered_fasta = os.path.join(output_folder, "filtered_mf.fasta")

    # Step 3: Generate metadata
    metadata_folder = os.path.join(output_folder, "metadata")
    generate_metadata(filtered_fasta, metadata_folder, gene_list_path)

    print(f"ProteoParc pipeline completed. Output folder: {output_folder}")
    print(f"Duplicates removed: {duplicates_removed}")


In [None]:
#@title ##Select genes and run the analysis

# Global variables for UI
all_gene_checkboxes = {}

# Create UI elements
tissue_selection, all_gene_checkboxes = create_tissue_selection(all_genes)
select_all_button = widgets.Button(description="Select All Genes")
deselect_all_button = widgets.Button(description="Deselect All Genes")
print_button = widgets.Button(description="Print Selected Genes")
save_button = widgets.Button(description="Save Gene List")
run_proteoparc_button = widgets.Button(description="Run ProteoParc")

# Attach functions to buttons
select_all_button.on_click(select_all)
deselect_all_button.on_click(deselect_all)
print_button.on_click(lambda b: print("Selected genes:", get_selected_genes()))
save_button.on_click(create_gene_list_file)
run_proteoparc_button.on_click(run_proteoparc)


# Create the button grid
button_grid = create_taxonomy_buttons(taxonomy_dict)

# Initial display
display(HTML("<h2>Select Taxonomy</h2>"))
display(button_grid)
display(HTML("<h2>Gene Selection by Tissue</h2>"))
display(widgets.HBox([select_all_button, deselect_all_button]))
display(tissue_selection)
display(widgets.HBox([print_button, save_button, run_proteoparc_button]))