In [None]:
import os
import sys
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

sys.path.append('../../..')
from data.constants import BASE_PATH_EXPERIMENTS

In [None]:
base_path = os.path.join(BASE_PATH_EXPERIMENTS, 'EMT_signature_scoring_case_study/results/correlation_with_histotype')
df = pd.read_csv(os.path.join(base_path, 'EMT_histotype.txt'), sep='\t')

In [None]:
df['-log10_adj_pval'] = -np.log10(df.padjusted)

In [None]:
alpha=0.05

In [None]:
df.sort_values(by='-log10_adj_pval', inplace=True, ascending=False)

In [None]:
cutoff = (-1)*np.log10(alpha)

In [None]:
df

In [None]:
sns.set_style('ticks')

In [None]:
matplotlib.rcParams['font.family'] = 'Arial'

ax = sns.catplot(data=df,
            x='cancer',
            y='-log10_adj_pval',
            height=4,
            aspect=1.75,
            palette='tab10',
            s=9
            )

plt.axhline(y=cutoff, color='black', linestyle=':', label=f'- log10({alpha})')
ax.set_xlabels('TCGA cancer type', fontsize=16)
ax.set_ylabels('-log10(q-value)', fontsize=16)
ax.set_xticklabels(rotation=90, fontsize=14)
ax.set_yticklabels(fontsize=14)
plt.legend(fontsize=14)
plt.title('Association with histological type', fontsize=16);
plt.tight_layout()
plt.savefig(os.path.join(base_path, 'emt_histotype_dots.pdf'), format='pdf')


In [None]:
df.sort_values(by='cancer', inplace=True)

In [None]:
df['-log10_adj_pval_zeroed'] = df['-log10_adj_pval'].copy()
df.loc[df.padjusted>alpha, '-log10_adj_pval_zeroed'] = 0

In [None]:
df.set_index('cancer', inplace=True)

In [None]:
val_min = df[df['-log10_adj_pval_zeroed']!=0].min()['-log10_adj_pval_zeroed']
val_max = df[df['-log10_adj_pval_zeroed']!=0].max()['-log10_adj_pval_zeroed']
val_min, val_max

In [None]:
df1 = df.iloc[0:int(len(df)/2),:].copy()
df2 = df.iloc[int(len(df)/2):,:].copy()

In [None]:
sns.set(font_scale=1.0, font='Arial', rc={"axes.labelsize": 16, "axes.titlesize": 18})

f, ax = plt.subplots(nrows=2,ncols=1, figsize=(8,4))
cmap = sns.color_palette("YlOrBr_r", as_cmap=True)
cmap.set_bad("white")

heatmap1 = sns.heatmap(df1[['-log10_adj_pval_zeroed']].T.replace(0, np.nan), square=True, ax=ax[0], linewidths=0.5, linecolor='black', vmin=val_min, vmax=val_max,cmap=cmap, cbar=False)
heatmap2  =sns.heatmap(df2[['-log10_adj_pval_zeroed']].T.replace(0, np.nan), square=True, ax=ax[1], linewidths=0.5, linecolor='black', vmin=val_min, vmax=val_max,cmap=cmap, cbar=False)

ax[0].tick_params(axis='x', which='major', top=True, labeltop=True, bottom=False, labelbottom=False);
ax[0].set_xticklabels(ax[0].get_xticklabels()[0:12], rotation=45, ha='left', fontsize=16);
ax[1].tick_params(axis='x', which='major', top=False, labeltop=False, bottom=True, labelbottom=True);
ax[1].set_xticklabels(ax[1].get_xticklabels(), rotation=45, ha='right', fontsize=16);

ax[0].set_xlabel('')
ax[1].set_xlabel('TCGA cancer type', fontsize = 18)
ax[0].set_ylabel('Associations with\nhistological type', fontsize = 18)
ax[1].set_ylabel('')

ax[0].set_yticks([])
ax[1].set_yticks([])


# Create a shared colorbar axis
cbar_ax = f.add_axes([0.95, 0.25, 0.03, 0.5])  # Adjust the coordinates as needed
cbar = plt.colorbar(heatmap2.get_children()[0], cax=cbar_ax)
cbar.ax.tick_params(labelsize=16)
cbar.set_label('-log10(q-value)', fontsize=18)
plt.savefig(os.path.join(base_path, 'emt_histotype_heatmap.pdf'), format='pdf')
plt.savefig(os.path.join(base_path, 'emt_histotype_heatmap.svg'), format='svg')

plt.tight_layout()