-
Notifications
You must be signed in to change notification settings - Fork 0
/
figures_3.py
117 lines (104 loc) · 6.25 KB
/
figures_3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
helper = __import__('helpers')
df_tables_means = pd.read_csv('figures/tables_numbers/pca_link_strength_means.csv')
df_tables_sds = pd.read_csv('figures/tables_numbers/pca_link_strength_sds.csv')
df_significance = pd.read_csv('figures/tables_numbers/stat_significance.csv')
psfnames = ['base', 'grhl2full', 'grhl2partial', 'ovol', 'nrf2']
oe = ['de10', 'oe0', 'oe10']
paths = ['collated_data/base/oe0/base1_clusData.csv', 'collated_data/GRHL2/full/oe0/grhl2full1_clusData.csv',
'collated_data/GRHL2/partial/oe0/grhl2partial1_clusData.csv', 'collated_data/OVOL/oe0/ovol1_clusData.csv',
'collated_data/NRF2/oe0/nrf21_clusData.csv']
dfs = [pd.read_csv(i) for i in paths]
sc1 = ['#7a5195', '#003f5c', '#ef5675', '#ffa600']
sc10 = ['#011627', '#ff9f1c', '#e71d36', '#2ec486']
gr1 = ['#1e3d58', '#057dcd', '#43b0f1']
plt.rc('axes', labelsize=15)
plt.rc('xtick', labelsize=20)
plt.rc('ytick', labelsize=20)
plt.rc('legend', fontsize=18)
# link strength
fig, ax = plt.subplots(figsize=(7, 7))
ax.bar([0, 1, 2, 3], np.array(df_tables_means['baseoe0'].to_list()[4:8], dtype=float),
yerr=df_tables_sds['baseoe0'].to_list()[4:8], capsize=5, ecolor='black', color=gr1[1],
error_kw={'elinewidth': 2, 'capthick': 2})
helper.significance_annotate((0, 1.5), (1, 1.5), df_significance['p'].to_numpy()[df_significance['params'] == 'base_zeb/u200_asym_e_he'], ax, h=0.2, h2=0.1, h3=0.05)
helper.significance_annotate((2, 5.2), (3, 5.2), df_significance['p'].to_numpy()[df_significance['params'] == 'base_zeb/u200_asym_hm_m'], ax, h=0.2, h2=0.1, h3=0.05)
helper.significance_annotate((0, 5.6), (3, 5.6), df_significance['p'].to_numpy()[df_significance['params'] == 'base_zeb/u200_asym_e_m'], ax, h=0.2, h2=0.1, h3=0.05)
ax.set_xticks([0, 1, 2, 3])
ax.set_xticklabels(['e', 'he', 'hm', 'm'])
ax.set_xlabel('ZEB/u200 Asymmetry')
fig.savefig('figures/plots/zeb_u200_asym.png', dpi=500, bbox_inches='tight')
plt.close()
fig, ax = plt.subplots(figsize=(7, 7))
ax.bar([0, 1, 2, 3], np.array(df_tables_means['baseoe0'].to_list()[8:12], dtype=float),
yerr=df_tables_sds['baseoe0'].to_list()[8:12], capsize=5, ecolor='black', color=gr1[1],
error_kw={'elinewidth': 2, 'capthick': 2})
helper.significance_annotate_inverted((0, -1.2), (2, -1.2), df_significance['p'].to_numpy()[df_significance['params'] == 'base_lin28/let7_asym_e_hm'], ax, h=0.2, h2=0.1, h3=0.05)
helper.significance_annotate((1, 4.2), (3, 4.1), df_significance['p'].to_numpy()[df_significance['params'] == 'base_lin28/let7_asym_he_m'], ax, h=0.2, h2=0.1, h3=0.05)
helper.significance_annotate((0, 4.6), (3, 4.5), df_significance['p'].to_numpy()[df_significance['params'] == 'base_lin28/let7_asym_e_m'], ax, h=0.2, h2=0.1, h3=0.05)
ax.set_xticks([0, 1, 2, 3])
ax.set_xticklabels(['e', 'he', 'hm', 'm'])
ax.set_xlabel('LIN28/let7 Asymmetry')
fig.savefig('figures/plots/lin28_let7_asym.png', dpi=500, bbox_inches='tight')
plt.close()
fig, ax = plt.subplots(figsize=(7, 7))
ax.bar([0, 1, 2, 3], np.array(df_tables_means['baseoe0'].to_list()[12:16], dtype=float),
yerr=df_tables_sds['baseoe0'].to_list()[12:16], capsize=5, ecolor='black', color=gr1[1],
error_kw={'elinewidth': 2, 'capthick': 2})
helper.significance_annotate((0, 20.5), (1, 20.2), df_significance['p'].to_numpy()[df_significance['params'] == 'base_coupling_e_he'], ax, h=0.1, h2=0.05, h3=0.01)
helper.significance_annotate((2, 19.7), (3, 18.8), df_significance['p'].to_numpy()[df_significance['params'] == 'base_coupling_hm_m'], ax, h=0.1, h2=0.05, h3=0.01)
helper.significance_annotate((1, 19.2), (2, 17.5), df_significance['p'].to_numpy()[df_significance['params'] == 'base_coupling_he_hm'], ax, h=0.1, h2=0.05, h3=0.01)
ax.set_xticks([0, 1, 2, 3])
ax.set_xticklabels(['e', 'he', 'hm', 'm'])
ax.set_xlabel('Total Coupling Strength')
ax.set_ylim(18, 21)
fig.savefig('figures/plots/total_coupling_strength.png', dpi=500, bbox_inches='tight')
plt.close()
for psf, df in zip(psfnames, dfs):
if not (psf == 'base'):
continue
pca_coeff_1 = [float(x) for x in df_tables_means[psf + 'oe0'].to_numpy()[0].split(' ')]
pca_coeff_2 = [float(x) for x in df_tables_means[psf + 'oe0'].to_numpy()[1].split(' ')]
pca1 = np.zeros(df.shape[0])
count = 0
for c in pca_coeff_1:
pca1 += c * df[df.columns[3 + count]].to_numpy()
count += 1
pca2 = np.zeros(df.shape[0])
count = 0
for c in pca_coeff_2:
pca2 += c * df[df.columns[3 + count]].to_numpy()
count += 1
xlims = np.percentile(pca1, [0.1, 99.9]) # filter the outliers for better visualization
ylims = np.percentile(pca2, [0.1, 99.9])
fig, ax = plt.subplots(figsize=(8, 7))
count = 0
for p in ['e', 'he', 'hm', 'm']:
ax.scatter(pca1[df['phenotype'] == p], pca2[df['phenotype'] == p], color=sc1[count], s=5, alpha=0.5)
count += 1
ax.set_xlabel('PC1')
ax.set_ylabel('PC2')
ax.set_xlim(xlims)
ax.set_ylim(ylims)
ax.set_xticks([round(x, 2) for x in np.linspace(xlims[0], xlims[1], 5)])
ax.set_yticks([round(x, 2) for x in np.linspace(ylims[0], ylims[1], 5)])
fig.savefig('figures/plots/' + psf + '_run1_oe0_pca_scatter.png', dpi=500, bbox_inches='tight')
plt.close()
# transform dataset to make visualization better (capping all values > +2.5 and < -2.5)
genes = ['u200', 'ZEB', 'LIN28', 'let7', 'SNAIL', 'NFkB', 'GRHL2', 'OVOL', 'KEAP1', 'ECad', 'NRF2']
for i in [x for x in df.columns[3:] if x in genes]:
df.loc[df[i] <= -2.5, i] = -2.5
df.loc[df[i] >= 2.5, i] = 2.5
col_vec = np.zeros(df.shape[0], dtype=np.int8) # to color the rows according to the assigned clusters (KMeans)
col_vec[df['phenotype'] == 'he'] = 1
col_vec[df['phenotype'] == 'hm'] = 2
col_vec[df['phenotype'] == 'm'] = 3
col_vec = np.array(sc1)[col_vec]
genes = ['u200', 'ZEB', 'LIN28', 'let7']
fig = sns.clustermap(df.loc[:, genes], method='ward', col_cluster=False, cmap='mako', figsize=(8, 7),
dendrogram_ratio=0.1, tree_kws={'linewidths': 2}, cbar_pos=None, row_colors=col_vec)
fig.savefig('figures/plots/' + psf + '_run1_heatmap_coreGenes.png', dpi=500, bbox_inches='tight')
plt.close()