Takes an RSAP log2-normalised quantitated matrix and a list of gene names and makes a heatmap of the results

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import os

In [None]:
# Setup
expression_file = '../rsap_results/expression_data_pipeline_format/log2_normalised_expression_data__plus_1_pipeline_format.tsv.gz'
genes_of_interest_file = 'genes_of_interest.txt'
samples_of_interest_file = 'samples_of_interest.txt'
#samples_of_interest_file = None

image_formats = ('png', 'svg', 'eps')
outdir = 'selected_genes_heatmaps'

In [None]:
#Read in data 
print('Reading in expression data: ' + expression_file)
expression_data = pd.read_csv(expression_file, sep='\t')
print(f'{expression_data.shape[1] - 2} samples with {expression_data.shape[0]} genes')

print('Reading in genes of interest: ' + genes_of_interest_file)
genes_of_interest = pd.read_csv(genes_of_interest_file, sep='\t', header=None)
print(f'{len(genes_of_interest)} genes of interest')

if samples_of_interest_file is None:
    samples_of_interest = expression_data.columns.to_list()[2:]
else:   
    print('Reading in samples of interest: ' + samples_of_interest_file)
    samples_of_interest = pd.read_csv(samples_of_interest_file, sep='\t', header=None)
    samples_of_interest = samples_of_interest[0].to_list()

print(f'{len(samples_of_interest)} samples of interest')

In [None]:
# Extract genes of interest
filt = expression_data['gene_name'].isin(genes_of_interest[0])
expression_data = expression_data[filt]
print(f'{expression_data.shape[0]} genes of interest retrieved from the expression matrix')

In [None]:
# Extract samples of interest
expression_data = expression_data.loc[:, ['gene_name'] + samples_of_interest]

In [None]:
# Format for heatmap
expression_data = expression_data.set_index('gene_name')
expression_data.index.name = None
columns_order = []

if len(columns_order) == 0:
    columns_order = expression_data.columns.to_list()

expression_data = expression_data.loc[:, columns_order]

In [None]:
# Make output directory
if not (os.path.exists(outdir)):
    os.mkdir(outdir)

In [None]:
# Make heatmap
sns.clustermap(data=expression_data,
               z_score=0,
               col_cluster=False,
               row_cluster=False,
               xticklabels=True, 
               yticklabels=True,
               center=0,
               cmap="RdBu_r"
              )

outfile = f'{outdir}/{os.path.basename(expression_file)}.selected_genes_heatmap'

for image_format in image_formats:
    plt.savefig(fname=f'{outfile}.{image_format}', bbox_inches='tight', pad_inches=0.5)
#plt.clf()
plt.show()

In [None]:
print('Done')