# GTEx Aging Signatures
This notebook contains the processing scripts for the GTEx Tissue-Specific Aging Signatures dataset. A gene count matrix was downloaded from the [GTEx Data Portal](https://gtexportal.org/home/downloads/adult-gtex). The limma-voom algorithm was then applied to generate aging signatures for each tissue. The final edgelist contains 67,500 associations between 135 aging signatures and 16,047 genes.

In [None]:
import pandas as pd
import datetime
import math
import numpy as np
import os
import scipy.spatial.distance as dist
import seaborn as sns
import sys
import json
import scanpy as sc
from tqdm import tqdm

# UMAP
from sklearn.feature_extraction.text import TfidfVectorizer
import anndata
from collections import OrderedDict

# Bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show, save, output_file
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.palettes import Category20
output_notebook()

from IPython.display import display, HTML, Markdown
sys.setrecursionlimit(100000)

In [None]:
gtexagingsigs = pd.read_csv('../GTExTissue/bulk-gex_v8_rna-seq_GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_reads.gct', sep='\t', skiprows=2)
gtexagingsigs

In [None]:
gene_info = pd.read_csv('../../tables/gene_info', sep='\t')
gene_info = gene_info[gene_info['#tax_id']==9606][['Symbol', 'dbXrefs']]
gene_info['ensembl'] = [row.dbXrefs.split('Ensembl:')[1] if 'Ensembl' in row.dbXrefs else np.nan for row in gene_info.itertuples()]
gene_info = gene_info[['Symbol', 'ensembl']].dropna(subset=['ensembl']).set_index('ensembl')
gene_info.index = gene_info.index.map(lambda x: x.split('|')[0])
gene_info

In [None]:
to_keep = []
for g in gtexagingsigs['Name']:
    if g.split('.')[0] in gene_info.index:
        to_keep.append(g)

len(to_keep)

In [None]:
gtexagingsigs = gtexagingsigs.set_index('Name').drop(columns=['Description'])
gtexagingsigs = gtexagingsigs.T.get(to_keep).T
gtexagingsigs

In [None]:
var_df = gtexagingsigs.var(axis=1).to_frame(name='Var')
var_df['Ens'] = var_df.index.map(lambda x: x.split('.')[0])
var_df

In [None]:
keep = var_df.sort_values(by=['Ens', 'Var'], ascending=True).drop_duplicates(subset=['Ens'], keep='last').index
gtexagingsigs = gtexagingsigs.T.get(keep).T
gtexagingsigs

In [None]:
gene_info = gene_info.reset_index().drop_duplicates('ensembl').set_index('ensembl')
gtexagingsigs.index = gtexagingsigs.index.map(lambda x: gene_info.loc[x.split('.')[0], 'Symbol'])
gtexagingsigs

In [None]:
sample_meta = pd.read_csv('../GTExTissue/GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt', sep='\t')
sample_meta['sub'] = sample_meta['SAMPID'].apply(lambda x: '-'.join(x.split('-')[:2]))
sample_meta = sample_meta[['SAMPID', 'SMTS', 'sub']].set_index('SAMPID')
sample_meta

In [None]:
meta = pd.read_csv('GTEx_Analysis_v8_Annotations_SubjectPhenotypesDS.txt', sep='\t')
meta = meta.set_index('SUBJID')

sample_meta['age'] = [meta.loc[row.sub, 'AGE'] for row in sample_meta.itertuples()]
sample_meta['sex'] = [meta.loc[row.sub, 'SEX'] for row in sample_meta.itertuples()]
sample_meta = sample_meta[sample_meta.index.map(lambda x: x in gtexagingsigs.columns)]
sample_meta

In [None]:
from maayanlab_bioinformatics.dge import limma_voom_differential_expression
from maayanlab_bioinformatics.normalization.filter import filter_by_expr

In [None]:
comparisons = {}
for tissue in sample_meta['SMTS'].unique():
    sub_meta = sample_meta[sample_meta['SMTS'] == tissue]
    data_df = gtexagingsigs[sub_meta.index.tolist()]
    data_df = filter_by_expr(data_df)
    if not sub_meta[sub_meta['age'] == '20-29'].shape[0] >= 3: 
        print(tissue, "not enough healthy samples")
        continue
    for agegrp in sub_meta['age'].unique():
        if agegrp == '20-29': continue
        if sub_meta[sub_meta['age'] == agegrp].shape[0] >= 3:
            min_samp = min(sub_meta[sub_meta['age']=='20-29'].shape[0], sub_meta[sub_meta['age']==agegrp].shape[0])
            ctl_ids = sub_meta[sub_meta['age'] == '20-29'].sample(n=min_samp, random_state=1).index.tolist()
            pert_ids = sub_meta[sub_meta['age'] == agegrp].sample(n=min_samp, random_state=1).index.tolist()
            ctl_df = data_df[ctl_ids]
            pert_df = data_df[pert_ids]
            limma_voom_differential_expression(ctl_df, pert_df).sort_index() \
                .to_csv(f"GTEx_AgeComparison_Tissue_unfiltered/GTEx_{tissue.replace(' ', '')}_20-29_vs_{agegrp}.tsv", sep='\t')
            comparisons[f"GTEx_{tissue.replace(' ', '')}_20-29_vs_{agegrp}"] = {'controls': ctl_ids, 'cases': pert_ids}

In [None]:
gtexagingsigs = pd.DataFrame(columns=['gene_symbol', 'adj.P.Val', 'Aging Signature', 'threshold'])
sigsdir = 'GTEx_AgeComparison_Tissue_filtered_all/'
for sig in os.listdir(sigsdir):
    sigframe = pd.read_csv(sigsdir+sig, sep='\t', index_col='gene_symbol')
    sig = sig.replace('.tsv', '')
    up = sigframe[sigframe['logFC']>0]['adj.P.Val'].sort_values()[:250].reset_index()
    up['Aging Signature'] = sig.replace('_', ' ')
    up['threshold'] = 1
    down = sigframe[sigframe['logFC']<0]['adj.P.Val'].sort_values()[:250].reset_index()
    down['Aging Signature'] = sig.replace('_', ' ')
    down['threshold'] = -1
    gtexagingsigs = pd.concat([gtexagingsigs, up, down])
gtexagingsigs

In [None]:
len(gtexagingsigs['gene_symbol'].unique()), len(gtexagingsigs['Aging Signature'].unique()), len(gtexagingsigs) 

## Process Data for SQL

### Dataset

In [None]:
#(id, name, name_without_resource, description, association, gene_set_description, gene_sets_description, attribute_set_description, positive_association, negative_association, is_signed, is_continuous_valued, last_updated, directory, num_page_views, resource_fk, measurement_fk, dataset_group_fk, attribute_type_fk, attribute_group_fk, evidence_type, evidence_group, measurement_bias, attribute_type_plural, is_archived)
(152, 'GTEx Tissue-Specific Aging Signatures', 'Tissue-Specific Aging Signatures', 'tissue-specific aging signatures createed from GTEx RNA-seq gene expression profiles', 'gene-tissue sample associations by differential expression of gene across tissue samples', 'genes with high or low expression in {0} relative to other tissue samples from the GTEx Tissue-Specific Aging Signatures dataset.', 'sets of genes with high or low expression in each tissue sample relative to other tissue samples from the GTEx Tissue-Specific Aging Signatures dataset.', 'tissue samples with high or low expression of {0} gene relative to other tissue samples from the GTEx Tissue-Specific Aging Signatures dataset.', 'increased expression', 'decreased expression', 1, 1, '2023-11-14', 'gtexagingsignatures', 0, 29, 16, 7, 5, 1, 'gene expression by RNA-seq', 'primary experimental data', 'high throughout, data driven', 'tissue samples', 0)

### Publication

### Genes

In [None]:
geneinfo = pd.read_csv('../../tables/gene_info', sep='\t')
geneinfo['Symbol'] = geneinfo['Symbol'].apply(str.upper)
geneinfo = geneinfo[geneinfo['#tax_id']==9606].set_index('Symbol').get(['GeneID','description'])

In [None]:
index = 57851

genes = pd.read_csv('../../tables/gene.csv')
genes['symbol'] = genes['symbol'].apply(str.upper)
geneslist = genes['symbol'].tolist()
genefks = genes.set_index('symbol')['id'].to_dict()
for gene in gtexagingsigs['gene_symbol'].apply(str.upper).unique():
    if gene not in geneslist:
        print((index, gene, geneinfo.loc[gene, 'GeneID'], geneinfo.loc[gene, 'description'], 'https://ncbi.nlm.nih.gov/gene/'+str(geneinfo.loc[gene, 'GeneID'])), end=',\n')
        genefks[gene] = index
        index += 1

### Attributes

In [None]:
index = 392612
attributefks = {}

for agingsig in gtexagingsigs['Aging Signature'].unique():
    print((index, agingsig,'aging signature described by "GTEx [Tissue] [Background Age] vs [Sample Age]"', 53), end=',\n')
    attributefks[agingsig] = index
    index += 1

### Gene Sets

In [None]:
index = 135300000
genesetfks = {}

for agingsig in gtexagingsigs['Aging Signature'].unique():
    print((index, agingsig, 'aging signature described by "GTEx [Tissue] [Background Age] vs [Sample Age]"', 152, 5, attributefks[agingsig]), end='\n')
    genesetfks[agingsig] = index
    index += 1

### Associations

In [None]:
associations = gtexagingsigs.get(['gene_symbol', 'Aging Signature', 'adj.P.Val', 'threshold']).reset_index(drop=True)
associations.columns = ['gene_fk', 'gene_set_fk', 'standardized_value', 'threshold_value']
associations['gene_fk'] = associations['gene_fk'].map(genefks)
associations['gene_set_fk'] = associations['gene_set_fk'].map(genesetfks)
associations['standardized_value'] = associations['standardized_value'].apply(np.log)*-1*associations['threshold_value']
associations.index += 39000000
associations.to_csv('../../harmonizome-update/gtexagingsignatures.csv')
associations

## Create Downloads

In [None]:
output_path = 'downloads/'
gtexagingsigs = gtexagingsigs.get(['gene_symbol', 'Aging Signature', 'adj.P.Val', 'threshold'])
gtexagingsigs.columns = ['Gene', 'Aging Signature', '-logP', 'threshold']
gtexagingsigs['-logP'] = gtexagingsigs['-logP'].apply(np.log)*-1*gtexagingsigs['threshold']
gtexagingsigs = gtexagingsigs.reset_index(drop=True)
gtexagingsigs

In [None]:
gtexagingsigs

### Gene Attribute Ternary Matrix

In [None]:
ternarymatrix = pd.crosstab(gtexagingsigs['Gene'], gtexagingsigs['Aging Signature'], gtexagingsigs['threshold'], aggfunc=max).replace(np.nan, 0)
ternarymatrixT = ternarymatrix.T
ternarymatrix.to_csv(output_path+'gene_attribute_matrix.txt.gz', sep='\t', compression='gzip')
ternarymatrix

### Gene Attribute Edge List

In [None]:
geneids = geneinfo['GeneID'].to_dict()
edgelist = gtexagingsigs.copy()
edgelist['Gene ID'] = edgelist['Gene'].apply(lambda x: geneids[x])
edgelist = edgelist.get(['Gene', 'Gene ID', 'Aging Signature', '-logP', 'threshold'])
edgelist.columns = ['Gene', 'Gene ID', 'Aging Signature', 'Standardized Value', 'Threshold Value']
edgelist.to_csv(output_path+'gene_attribute_edges.txt.gz', sep='\t', compression='gzip')
edgelist

### Gene List

In [None]:
geneslist = edgelist.get(['Gene', 'Gene ID']).drop_duplicates().reset_index(drop=True)
geneslist.to_csv(output_path+'gene_list_terms.txt.gz', sep='\t', compression='gzip')
geneslist

### Attribute List

In [None]:
attributeslist = edgelist.get(['Aging Signature']).drop_duplicates().reset_index(drop=True)
attributeslist.to_csv(output_path+'attribute_list_entries.txt.gz', sep='\t', compression='gzip')
attributeslist

### Up Gene Set Library

In [None]:
with open(output_path+'gene_set_library_up_crisp.gmt', 'w') as f:
    arr = ternarymatrix.reset_index(drop=True).to_numpy(dtype=np.int_)
    attributes = ternarymatrix.columns

    w, h = arr.shape
    for i in tqdm(range(h)):
        if len([*ternarymatrix.index[arr[:, i] == 1]])>= 5:
            print(attributes[i], *ternarymatrix.index[arr[:, i] == 1], sep='\t', end='\n', file=f)

### Down Gene Set Library

In [None]:
with open(output_path+'gene_set_library_dn_crisp.gmt', 'w') as f:
    arr = ternarymatrix.reset_index(drop=True).to_numpy(dtype=np.int_)
    attributes = ternarymatrix.columns

    w, h = arr.shape
    for i in tqdm(range(h)):
        if len([*ternarymatrix.index[arr[:, i] == -1]])>= 5:
            print(attributes[i], *ternarymatrix.index[arr[:, i] == -1], sep='\t', end='\n', file=f)

### Up Attribute Set Library

In [None]:
with open(output_path+'attribute_set_library_up_crisp.gmt', 'w') as f:
    arr = ternarymatrixT.reset_index(drop=True).to_numpy(dtype=np.int_)
    genes = ternarymatrixT.columns

    w, h = arr.shape
    for i in tqdm(range(h)):
        if len([*ternarymatrixT.index[arr[:, i] == 1]])>= 5:
            print(genes[i], *ternarymatrixT.index[arr[:, i] == 1], sep='\t', end='\n', file=f)

### Down Attribute Set Library

In [None]:
with open(output_path+'attribute_set_library_dn_crisp.gmt', 'w') as f:
    arr = ternarymatrixT.reset_index(drop=True).to_numpy(dtype=np.int_)
    genes = ternarymatrixT.columns

    w, h = arr.shape
    for i in tqdm(range(h)):
        if len([*ternarymatrixT.index[arr[:, i] == -1]])>= 5:
            print(genes[i], *ternarymatrixT.index[arr[:, i] == -1], sep='\t', end='\n', file=f)

### Gene Similarity Matrix

In [None]:
gene_similarity_matrix = dist.pdist(ternarymatrix.to_numpy(dtype=np.int_), 'cosine')
gene_similarity_matrix = dist.squareform(gene_similarity_matrix)
gene_similarity_matrix = 1 - gene_similarity_matrix

gene_similarity_matrix = pd.DataFrame(data=gene_similarity_matrix, index=ternarymatrix.index, columns=ternarymatrix.index)
gene_similarity_matrix.index.name = None
gene_similarity_matrix.columns.name = None
gene_similarity_matrix.to_csv(output_path+'gene_similarity_matrix_cosine.txt.gz', sep='\t', compression='gzip')
gene_similarity_matrix

### Attribute Similarity Matrix

In [None]:
attribute_similarity_matrix = dist.pdist(ternarymatrixT.to_numpy(dtype=np.int_), 'cosine')
attribute_similarity_matrix = dist.squareform(attribute_similarity_matrix)
attribute_similarity_matrix = 1 - attribute_similarity_matrix

attribute_similarity_matrix = pd.DataFrame(data=attribute_similarity_matrix, index=ternarymatrixT.index, columns=ternarymatrixT.index)
attribute_similarity_matrix.index.name = None
attribute_similarity_matrix.columns.name = None
attribute_similarity_matrix.to_csv(output_path+'attribute_similarity_matrix_cosine.txt.gz', sep='\t', compression='gzip')
attribute_similarity_matrix

### Gene Attribute Standardized Matrix

In [None]:
standardizedmatrix = pd.crosstab(gtexagingsigs['Gene'], gtexagingsigs['Aging Signature'], gtexagingsigs['-logP'], aggfunc=max).replace(np.nan, 0)
standardizedmatrix.to_csv(output_path+'gene_attribute_matrix_standardized.txt.gz', sep='\t', compression='gzip')
standardizedmatrix

### Knowledge Graph Serialization

In [None]:
nodes = {}
edges = []

for gene in geneslist.index:
    gene = geneslist.loc[gene]
    nodes[int(gene['Gene ID'])] = {
        "type":"gene",
        "properties": {
            "id":int(gene['Gene ID']),
            "label":gene['Gene']
        }}

for agingsig in attributeslist.index:
    agingsig = attributeslist.loc[agingsig]
    nodes[agingsig['Aging Signature']] = {
        "type":"aging signatureg",
        "properties": {
            "label":agingsig['Aging Signature'],
            "id":agingsig['Aging Signature']
        }}

for edge in edgelist.index:
    edge = edgelist.loc[edge]
    if edge['Threshold Value']==1:
        edges.append({
            "source": int(edge['Gene ID']),
            "relation": "over-expressed in",
            "target": edge['Aging Signature'],
            "properties":{
                "id":str(edge['Gene ID'])+":"+edge['Aging Signature'],
                "source_id":int(edge['Gene ID']),
                "source_label":edge['Gene'],
                "target_id":edge['Aging Signature'],
                "target_label":edge['Aging Signature'],
                "directed":True,
                "standardized_value":edge['Standardized Value'],
                "threshold":1
            }})
    else:
            edges.append({
            "source": int(edge['Gene ID']),
            "relation": "under-expressed in",
            "target": edge['Aging Signature'],
            "properties":{
                "id":str(edge['Gene ID'])+":"+edge['Aging Signature'],
                "source_id":int(edge['Gene ID']),
                "source_label":edge['Gene'],
                "target_id":edge['Aging Signature'],
                "target_label":edge['Aging Signature'],
                "directed":True,
                "standardized_value":edge['Standardized Value'],
                "threshold":-1
            }})

#### RDF

In [None]:
with open(output_path+'kg_serializations/gtexagingsignatures.rdf', 'w') as f:
    print('@prefix gene: ncbi.nlm.nih.gov/gene/', file=f)
    print('@prefix RO: purl.obolibrary.org/RO_', file=f)

    print('', file=f)
    for edge in edges:
        if edge["properties"]["threshold"]==1:
            print('gene:'+str(edge['properties']['source_id']), 'RO:0002245', edge['properties']['target_id'], end=' .\n', file=f)
        else:
            print('gene:'+str(edge['properties']['source_id']), 'RO:0002246 ', edge['properties']['target_id'], end=' .\n', file=f)

#### JSON

In [None]:
with open(output_path+'kg_serializations/gtexagingsignatures.json', 'w') as f:
    serial = json.dump(
        {
            "Version":"1", 
            "nodes": nodes,
            "edges": edges
        }, indent=4, fp=f)

#### TSV

In [None]:
nodeframe = pd.DataFrame(nodes).T
nodeframe['id'] = nodeframe['properties'].apply(lambda x: x['id'])
nodeframe['label'] = nodeframe['properties'].apply(lambda x: x['label'])
namespace = {'gene':'NCBI Entrez', 'tissue sample':'MoTrPAC'}
nodeframe['namespace'] = nodeframe['type'].apply(lambda x: namespace[x])
nodeframe = nodeframe.get(['namespace', 'id', 'label']).reset_index(drop=True)
nodeframe.to_csv(output_path+'kg_serializations/gtexagingsignatures_tsv/nodes.tsv', sep='\t')
nodeframe

In [None]:
edgeframe = pd.DataFrame(edges)
edgeframe['standardized'] = edgeframe['properties'].apply(lambda x: x['standardized_value'])
edgeframe['threshold'] = edgeframe['properties'].apply(lambda x: x['threshold'])
edgeframe = edgeframe.get(['source', 'relation', 'target', 'threshold'])
edgeframe.to_csv(output_path+'kg_serializations/gtexagingsignatures_tsv/edges.tsv', sep='\t')
edgeframe

## Create Visualizations

### Gene Attribute Clustered Heatmap

In [None]:
sns.clustermap(ternarymatrix, cmap='seismic', center=0)

### Gene Similarity Clustered Heatmap

In [None]:
sns.clustermap(gene_similarity_matrix, cmap='seismic', center=0)

### Attribute Similarity Clustered Heatmap

In [None]:
sns.clustermap(attribute_similarity_matrix, cmap='seismic', center=0)

### UMAP

In [None]:
def load_gmt(file):
    gmt = OrderedDict()
    for line in file:
        term, *geneset = line.strip().split('\t')
        gmt[term+'_up'] = ' '.join(set(geneset))
    return gmt
libdict = load_gmt(open('downloads/gene_set_library_up_crisp.gmt', 'r'))
def load_gmt(file):
    gmt = OrderedDict()
    for line in file:
        term, *geneset = line.strip().split('\t')
        gmt[term+'_down'] = ' '.join(set(geneset))
    return gmt
downlibdict = load_gmt(open('downloads/gene_set_library_dn_crisp.gmt', 'r'))
libdict.update(downlibdict)
scatterdir = 'images/'

In [None]:
def process_scatterplot(libdict, nneighbors=30, mindist=0.1, spread=1.0, maxdf=1.0, mindf=1):
    print("\tTF-IDF vectorizing gene set data...")
    vec = TfidfVectorizer(max_df=maxdf, min_df=mindf)
    X = vec.fit_transform(libdict.values())
    print(X.shape)
    adata = anndata.AnnData(X)
    adata.obs.index = libdict.keys()

    print("\tPerforming Leiden clustering...")
    ### the n_neighbors and min_dist parameters can be altered
    sc.pp.neighbors(adata, n_neighbors=nneighbors)
    sc.tl.leiden(adata, resolution=1.0)
    sc.tl.umap(adata, min_dist=mindist, spread=spread, random_state=42)

    new_order = adata.obs.sort_values(by='leiden').index.tolist()
    adata = adata[new_order, :]
    adata.obs['leiden'] = 'Cluster ' + adata.obs['leiden'].astype('object')

    df = pd.DataFrame(adata.obsm['X_umap'])
    df.columns = ['x', 'y']

    df['cluster'] = adata.obs['leiden'].values
    df['term'] = adata.obs.index
    df['genes'] = [libdict[l] for l in df['term']]

    return df

In [None]:
def get_scatter_colors(df):
    clusters = pd.unique(df['cluster']).tolist()
    colors = list(Category20[20])[::2] + list(Category20[20])[1::2]
    color_mapper = {clusters[i]: colors[i % 20] for i in range(len(clusters))}
    return color_mapper

def get_scatterplot(scatterdf):
    df = scatterdf.copy()
    color_mapper = get_scatter_colors(df)
    df['color'] = df['cluster'].apply(lambda x: color_mapper[x])

    hover_emb = HoverTool(name="df", tooltips="""
        <div style="margin: 10">
            <div style="margin: 0 auto; width:300px;">
                <span style="font-size: 12px; font-weight: bold;">Gene Set:</span>
                <span style="font-size: 12px">@gene_set</span>
            <div style="margin: 0 auto; width:300px;">
                <span style="font-size: 12px; font-weight: bold;">Coordinates:</span>
                <span style="font-size: 12px">(@x,@y)</span>
            <div style="margin: 0 auto; width:300px;">
                <span style="font-size: 12px; font-weight: bold;">Cluster:</span>
                <span style="font-size: 12px">@cluster</span>
            </div>
        </div>
    """)
    tools_emb = [hover_emb, 'pan', 'wheel_zoom', 'reset', 'save']

    plot_emb = figure(
        width=1000, 
        height=700, 
        tools=tools_emb
    )

    source = ColumnDataSource(
        data=dict(
            x = df['x'],
            y = df['y'],
            gene_set = df['term'],
            cluster = df['cluster'],
            colors = df['color'],
            label = df['cluster']
        )
    )

    # hide axis labels and grid lines
    plot_emb.xaxis.major_tick_line_color = None
    plot_emb.xaxis.minor_tick_line_color = None
    plot_emb.yaxis.major_tick_line_color = None
    plot_emb.yaxis.minor_tick_line_color = None
    plot_emb.xaxis.major_label_text_font_size = '0pt'
    plot_emb.yaxis.major_label_text_font_size = '0pt' 

    plot_emb.output_backend = "svg"    
    
    plot_emb.title = 'Gene Sets in the GTEx Tissue-Specific Aging Signatures Library'
    plot_emb.xaxis.axis_label = "UMAP_1"
    plot_emb.yaxis.axis_label = "UMAP_2"
    plot_emb.xaxis.axis_label_text_font_style = 'normal'
    plot_emb.xaxis.axis_label_text_font_size = '18px'
    plot_emb.yaxis.axis_label_text_font_size = '18px'
    plot_emb.yaxis.axis_label_text_font_style = 'normal'
    plot_emb.title.align = 'center'
    plot_emb.title.text_font_size = '18px'
    
    s = plot_emb.scatter(
        'x', 
        'y', 
        size = 4, 
        source = source, 
        color = 'colors'
    )
    
    return plot_emb

In [None]:
scatter_df

In [None]:
## defaults: nneighbors=30, mindist=0.1, spread=1.0, maxdf=1.0, mindf=1
scatter_df = process_scatterplot(libdict, nneighbors=10,mindist=0.1
     ,spread=1.5
     #,maxdf=0.2
     ,mindf=10
)

# Display Scatter Plot
plot = get_scatterplot(scatter_df)
show(plot)

In [None]:
output_file(filename=f"{scatterdir}/gtexagingsignatures.html", title = 'Gene Sets in the GTEx Tissue-Specific Aging Signatures Library')
save(plot)