#### Takes Panther output and creates a barplot showing enrichment and p-values

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
import os

In [None]:
# # # # # # # # # # # # # #
# Setup

# Pather inputfile
#panther_file = 'analysis-panther-GOBP-down-in good.txt'
panther_file = 'analysis-panther-GOBP-up-in-good.txt'
panther_file_lines_to_skip = 8


# GO terms to select file
#selected_go_terms_file = 'go_terms_to_select.txt'
selected_go_terms_file = 'go_terms_to_select_magda.txt'

# Colour scheme
my_palette = sns.blend_palette(['blue', 'red'], as_cmap=True)
fdr_color_min = 1
fdr_color_max = 15


# Sort graph columns
graph_sort_category = 'minus_log10_FDR'
#graph_sort_category = 'Enrichment'

sort_low_to_high = False
#sort_low_to_high = True


# Output directory
outdir = './enrichment_plots'

In [None]:
# Import data
print(f'Importing Panther file {panther_file}')
panther_data = pd.read_csv(panther_file, sep='\t', skiprows=panther_file_lines_to_skip)

print(f'Importing GO term to select file {selected_go_terms_file}')
selected_go_terms = pd.read_csv(selected_go_terms_file, header=None)
selected_go_terms = selected_go_terms.iloc[:, 0].tolist()

In [None]:
panther_data.head(2)

In [None]:
selected_go_terms

In [None]:
# Format
panther_data = panther_data.iloc[:, [0, -3, -1]]
panther_data.columns = ['GO', 'Enrichment', 'minus_log10_FDR']

panther_data.loc[:, 'GO'] = panther_data.loc[:, 'GO'].str.split(' \(', expand=True)[0].copy()  # Remove GO term ID

filt = panther_data['GO'].isin(selected_go_terms)   # Filter for GO terms of interest
panther_data = panther_data[filt]


# Filter out enrichment values which are '< 0.01'
filt = panther_data['Enrichment'] != '< 0.01'
panther_data = panther_data[filt]
panther_data.loc[:, ['Enrichment', 'minus_log10_FDR']] = panther_data.loc[:, ['Enrichment', 'minus_log10_FDR']].astype(float)

panther_data['minus_log10_FDR'] = -np.log10(panther_data['minus_log10_FDR'])
panther_data = panther_data.sort_values(graph_sort_category, ascending=sort_low_to_high)

In [None]:
# Create an output directory
if not os.path.isdir(outdir):
    os.mkdir(outdir)
    
outfile = panther_file.split('/')[-1]
outfile = f'{outdir}/{outfile}.enrichment_barplot'

In [None]:
# Create a custom palette to match the data
panther_data = panther_data.reset_index(drop=True)  # DO THIS to ENSURE index ID matches loop count !

norm = plt.Normalize(fdr_color_min, fdr_color_max)
sm = plt.cm.ScalarMappable(cmap=my_palette, norm=norm)

custom_palette_lookup = {}
for i in range(0, panther_data.shape[0]):
    my_value = panther_data.loc[i, 'minus_log10_FDR']
    #scaled_value = (my_value - fdr_color_min) / (fdr_color_max - fdr_color_min)
    scaled_value = norm(my_value)
    custom_palette_lookup[i] =  my_palette(scaled_value)

In [None]:
# Plot graph
sns.set_style("whitegrid")
ax = sns.barplot(data=panther_data, 
            y='GO', 
            x='Enrichment',
            hue=panther_data.index,
            palette=custom_palette_lookup,
            dodge=False
           )

#ax.set_ylabel('GO Term')
ax.get_legend().remove()
cb = ax.figure.colorbar(sm, ax=ax)
ax.set_ylabel(None)
ax.set_xlabel('Fold Enrichment')

cb.ax.set_title('-log10(FDR)')

# Write out file
plt.savefig(fname=f'{outfile}.svg', bbox_inches='tight', pad_inches=0.5)
plt.savefig(fname=f'{outfile}.png', bbox_inches='tight', pad_inches=0.5, dpi=1200)
plt.savefig(fname=f'{outfile}.eps', bbox_inches='tight', pad_inches=0.5, dpi=1200)

plt.show()

In [None]:
print('Done')