In [None]:
import pandas as pd 
import numpy as np 
import os
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt

import utils.DE_plotting_tools as vol

#### Reading in DE csv

In [None]:
data_path = "../DE_out/Blobel-15045/"

# Instantiating files to read from 
DE_files = [ file for file in os.listdir(data_path) if file.startswith('DE')]
# Seperating files with allgene or Olfr only DE 
DE_allgene_files = [ file for file in DE_files if 'allgene' in file]
DE_Olfr_files = [ file for file in DE_files if 'Olfr' in file]

In [None]:
DE_allgene_df_dict = {}
for file in DE_allgene_files: 
    DE_allgene_df_dict[file.replace('.csv', '')] = pd.read_csv(os.path.join(data_path, file), index_col= 0).reset_index(drop=True)

DE_Olfr_df_dict = {}
for file in DE_Olfr_files: 
    DE_Olfr_df_dict[file.replace('.csv', '')] = pd.read_csv(os.path.join(data_path, file), index_col= 0).reset_index(drop=True)    
    
print(DE_allgene_df_dict.keys())
print(DE_Olfr_df_dict.keys())

#### Vol comparison plots

In [None]:
diff_OR = DE_Olfr_df_dict['DE_Olfr_WTvsKO_14357'][DE_Olfr_df_dict['DE_Olfr_WTvsKO_15045'].FDR < 0.05][['symbol','logFC', 'FDR']].symbol.values
diff_OR = set(diff_OR).union(set(DE_Olfr_df_dict['DE_Olfr_WTvsKO_15045'][DE_Olfr_df_dict['DE_Olfr_WTvsKO_15045'].FDR < 0.05][['symbol','logFC', 'FDR']].symbol.values))

In [None]:

temp = pd.merge(DE_Olfr_df_dict['DE_Olfr_WTvsKO_15045'][DE_Olfr_df_dict['DE_Olfr_WTvsKO_15045'].symbol.isin(diff_OR)][['symbol','logFC', 'FDR']], 
                DE_Olfr_df_dict['DE_Olfr_WTvsKO_14357'][DE_Olfr_df_dict['DE_Olfr_WTvsKO_14357'].symbol.isin(diff_OR)][['symbol','logFC', 'FDR']], 
                on='symbol', how = 'outer').sort_values('logFC_x')
diff_OR = temp[(temp.FDR_x < 0.05) & (temp.FDR_y < 0.05)].symbol.values
# temp['diff_logFC'] = temp.logFC_x - temp.logFC_y

In [None]:
fig = vol.compare_vol_plot([DE_Olfr_df_dict['DE_Olfr_WTvsKO_15045'][(DE_Olfr_df_dict['DE_Olfr_WTvsKO_15045'].symbol.isin(diff_OR))], 
                            DE_Olfr_df_dict['DE_Olfr_WTvsKO_14357'][(DE_Olfr_df_dict['DE_Olfr_WTvsKO_14357'].symbol.isin(diff_OR))]], 
                           DE_df_name = ['DE_Olfr_WTvsKO_14357', 'DE_Olfr_WTvsKO_15045'], 
                           fig_dimension = [800,500])
fig.show()
fig.write_html('../output/Blobel_15045/WTvsKO_14357vs15045_FDR.html')

In [None]:
fig = vol.compare_vol_plot([DE_allgene_df_dict['DE_allgene_WTvsKO_14357'], 
                            DE_allgene_df_dict['DE_allgene_WTvsKO_15045']], 
                           DE_df_name = ['DE_allgene_WTvsKO_14357', 'DE_allgene_WTvsKO_15045'], 
                           fig_dimension = [800,500])
fig.show()
# fig.to_html('../output/Blobel_15045/WTvsKO_14357vs15045.html')

In [None]:
fig = vol.compare_vol_plot([DE_Olfr_df_dict['DE_Olfr_WT_14357vs15045']], 
                           DE_df_name = ['DE_Olfr_WT_14357vs15045'], 
                           fig_dimension = [800,500])
fig.show()
# fig.to_html('../output/Blobel_15045/WTvsKO_n6.html')

#### Single Vol plots 

In [None]:
"""
Volcano plot
"""

# plot_df = DE_allgene_df_dict['DE_allgene_WTvsKO_n6'].copy()
plot_df = DE_Olfr_df_dict['DE_Olfr_WTvsKO_n6'].copy()

fig = go.Figure()
# plot padj > 0.05
temp = plot_df.query('FDR > 0.05')
fig.add_trace(go.Scatter(x=temp['logFC'], 
                        y=-np.log10(temp['FDR']),
                        text=temp['symbol'],
                        mode='markers', 
                        name = 'FDR > 0.05',
                        marker=dict(size = 10, color = 'grey', opacity=0.3)))
temp = plot_df.query('FDR <= 0.05')
fig.add_trace(go.Scatter(x=temp['logFC'], 
                        y=-np.log10(temp['FDR']),
                        text=temp['symbol'],
                        mode='markers', 
                        name = 'FDR <= 0.05',
                        marker=dict(size = 10, color = 'red', opacity=0.3)))

# Add the horizontal line at y=0.5
fig.add_shape(type='line', x0=-10, x1=10,
                      y0=-np.log10(0.05), y1=-np.log10(0.05),
              line=dict(color='violet', width=3, dash='dash'))

fig.update_traces( 
    textposition='top center',
    hovertemplate =
    '<b>%{text}</b>' + 
    '<br>LogFC: %{x}'+
    '<br>FDR: %{y}<br>')

fig.update_layout(
    title='Rhbdf2 DE',
    autosize=True,
    width=800,
    height=500,
    template='simple_white'
)
fig.show()

# fig.write_html("../output/Blobel_15045//allgene_WTvsKO.html")
# fig.write_html("../output/Blobel_15045//Olfr_WTvsKO.html")

#### WT vs KO Olfr Rhbdf2 

In [None]:
# group the original dataframe by 'Group' and get the average count and sum
Rhbdf2_df = pd.read_csv('../output/Rhbdf2_expression.csv', index_col = 0)
# grouped = Rhbdf2_df.groupby('Olfr')['Rhbdf2_counts'].agg(['mean', 'sum']).reset_index()
# Rhbdf2_df = pd.DataFrame({'Olfr': grouped['Olfr'],
#                        'Rhbdf2_avg': grouped['mean'],
#                        'Rhbdf2_sum': grouped['sum']})

# merge the original dataframe with the new dataframe
# merged_df = pd.merge(df, new_df, on='Group')

top_Rhbdf2_avg_Olfr = list(Rhbdf2_df.sort_values('Rhbdf2_sum', ascending=False)\
                    .head(int(len(Rhbdf2_df) * 0.2))['Olfr'])
bottom_Rhbdf2_avg_Olfr = list(Rhbdf2_df.sort_values('Rhbdf2_avg', ascending=True)\
                    .head(int(len(Rhbdf2_df) * 0.8))['Olfr'])
top_Rhbdf2_sum_Olfr = list(Rhbdf2_df.sort_values('Rhbdf2_sum', ascending=False)\
                    .head(int(len(Rhbdf2_df) * 0.2))['Olfr'])
bottom_Rhbdf2_sum_Olfr = list(Rhbdf2_df.sort_values('Rhbdf2_sum', ascending=True)\
                    .head(int(len(Rhbdf2_df) * 0.8))['Olfr'])


In [None]:
"""
Olfr DE only Volcano plot
"""
"""
Volcano plot
"""

plot_df = DE_Olfr_df_dict['DE_Olfr_WTvsKO_n6'].copy()

fig = go.Figure()
temp = plot_df[~(plot_df.symbol.isin(top_Rhbdf2_avg_Olfr)) & 
               ~(plot_df.symbol.isin(bottom_Rhbdf2_avg_Olfr))]
fig.add_trace(go.Scatter(x=temp['logFC'], 
                        y=-np.log10(temp['FDR']),
                        text=temp['symbol'],
                        mode='markers', 
                        name = 'NA',
                        marker=dict(size = 10, color = 'grey', opacity=0.3)))

temp = plot_df[plot_df.symbol.isin(top_Rhbdf2_avg_Olfr)]
fig.add_trace(go.Scatter(x=temp['logFC'], 
                        y=-np.log10(temp['FDR']),
                        text=temp['symbol'],
                        mode='markers', 
                        name = 'Rhbdf2+ Olfr',
                        marker=dict(size = 10, color = '#990011', opacity=0.3)))
temp = plot_df[plot_df.symbol.isin(bottom_Rhbdf2_avg_Olfr)]
fig.add_trace(go.Scatter(x=temp['logFC'], 
                        y=-np.log10(temp['FDR']),
                        text=temp['symbol'],
                        mode='markers', 
                        name = 'Rhbdf2- Olfr',
                        marker=dict(size = 10, color = '#317773', opacity=0.3)))



# Add the horizontal line at y=0.5
fig.add_shape(type='line', x0=-10, x1=10,
                      y0=-np.log10(0.05), y1=-np.log10(0.05),
              line=dict(color='violet', width=3, dash='dash'))

fig.update_traces( 
    textposition='top center',
    hovertemplate =
    '<b>%{text}</b>' + 
    '<br>LogFC: %{x}'+
    '<br>FDR: %{y}<br>')

fig.update_layout(
    title='Rhbdf2 DE',
    autosize=True,
    template='simple_white'
)
fig.show()

# fig.write_html("../output/Blobel_15045/Rhbdf2_Olfr/WTvsKO_topRhbdf_avg.html")
# fig.write_html("../output/Blobel_15045/Rhbdf2_Olfr/WTvsKO_topRhbdf_sum.html")


##### count comparison 

In [None]:
plot_df[(plot_df.symbol.isin(top_Rhbdf2_avg_Olfr)) & 
                             (plot_df.logFC < -logfc_cutoff) & 
                             (plot_df.FDR < fdr_cutoff)][['No1', 'No2', 'No3', 'No4', 'No5', 'No6']]

In [None]:
logfc_cutoff = 0
fdr_cutoff = 1

temp_df = plot_df[(plot_df.symbol.isin(top_Rhbdf2_avg_Olfr)) & 
                  (plot_df.logFC < -logfc_cutoff) & 
                  (plot_df.FDR < fdr_cutoff)][['No1', 'No2', 'No3', 'No4', 'No5', 'No6']]
wt_topRhbdf2_count = temp_df.sum().mean()
# / len(temp_df)

temp_df = plot_df[(plot_df.symbol.isin(top_Rhbdf2_avg_Olfr)) & 
                             (plot_df.logFC > logfc_cutoff) & 
                             (plot_df.FDR < fdr_cutoff)][['St1', 'St2', 'St3', 'St4', 'St5', 'St6']]
ko_topRhbdf2_count = temp_df.sum().mean()
# / len(temp_df)

temp_df = plot_df[(plot_df.symbol.isin(bottom_Rhbdf2_avg_Olfr)) & 
                             (plot_df.logFC < -logfc_cutoff) & 
                             (plot_df.FDR < fdr_cutoff)][['No1', 'No2', 'No3', 'No4', 'No5', 'No6']]
wt_botRhbdf2_count = temp_df.sum().mean()
# / len(temp_df)

temp_df = plot_df[(plot_df.symbol.isin(bottom_Rhbdf2_avg_Olfr)) & 
                             (plot_df.logFC > logfc_cutoff) & 
                             (plot_df.FDR < fdr_cutoff)][['St1', 'St2', 'St3', 'St4', 'St5', 'St6']]
ko_topRhbdf2_count = temp_df.sum().mean()
# / len(temp_df)

temp_df =  plot_df[~(plot_df.symbol.isin(top_Rhbdf2_avg_Olfr)) & 
                       ~(plot_df.symbol.isin(bottom_Rhbdf2_avg_Olfr)) & 
                       (plot_df.logFC > logfc_cutoff) & 
                       (plot_df.FDR < fdr_cutoff)][['No1', 'No2', 'No3', 'No4', 'No5', 'No6']]
wt_na_count = temp_df.sum().mean()
# / len(temp_df)

temp_df = plot_df[~(plot_df.symbol.isin(top_Rhbdf2_avg_Olfr)) & 
                       ~(plot_df.symbol.isin(bottom_Rhbdf2_avg_Olfr)) & 
                       (plot_df.logFC > logfc_cutoff) & 
                       (plot_df.FDR < fdr_cutoff)][['St1', 'St2', 'St3', 'St4', 'St5', 'St6']]
ko_na_count = temp_df.sum().mean()
# / len(temp_df)


plt.figure(figsize=(8, 6))
plt.bar(['WT_Rhbdf2+', 'KO_Rhbdf2+', 
         'WT_Rhbdf2-', 'KO_Rhbdf2-', 
         'WT_Rhbdf2NA', 'KO_Rhbdf2NA'], 
        [wt_topRhbdf2_count, ko_topRhbdf2_count,
         wt_botRhbdf2_count, ko_botRhbdf2_count, 
         wt_na_count, ko_na_count], 
        color=['#990011', '#990011', '#317773', '#317773', 'grey', 'grey'], 
        width=0.4)
# plt.title('Normalized by num Olfr')
plt.xlabel('logFC Values')
plt.ylabel('Count')
plt.show()

In [None]:
logfc_cutoff = 0
fdr_cutoff = 1

temp_df = plot_df[(plot_df.logFC < -logfc_cutoff) & 
                  (plot_df.FDR < fdr_cutoff)][['No1', 'No2', 'No3', 'No4', 'No5', 'No6']]
wt_count = temp_df.sum().mean() / len(temp_df)

temp_df = plot_df[(plot_df.logFC > logfc_cutoff) & 
                  (plot_df.FDR < fdr_cutoff)][['St1', 'St2', 'St3', 'St4', 'St5', 'St6']]
ko_count = temp_df.sum().mean() / len(temp_df)


plt.figure(figsize=(8, 6))
plt.bar(['WT', 'KO'], 
        [wt_count, ko_count], 
        color=['#990011', '#317773'], 
        width=0.4)
plt.title('Normalized by num Olfr')
plt.xlabel('logFC Values')
plt.ylabel('Count')
plt.show()

In [None]:
logfc_cutoff = 0.5
fdr_cutoff = 1
wt_topRhbdf2_count = ((plot_df.symbol.isin(top_Rhbdf2_sum_Olfr)) & 
                      (plot_df.logFC < -logfc_cutoff) & (plot_df.FDR < fdr_cutoff)).sum()
ko_topRhbdf2_count = ((plot_df.symbol.isin(top_Rhbdf2_sum_Olfr)) & 
                      (plot_df.logFC > logfc_cutoff) & (plot_df.FDR < fdr_cutoff)).sum()
wt_botRhbdf2_count = ((plot_df.symbol.isin(bottom_Rhbdf2_sum_Olfr)) & 
                      (plot_df.logFC < -logfc_cutoff) & (plot_df.FDR < fdr_cutoff)).sum()
ko_botRhbdf2_count = ((plot_df.symbol.isin(bottom_Rhbdf2_sum_Olfr)) & 
                      (plot_df.logFC > logfc_cutoff) & (plot_df.FDR < fdr_cutoff)).sum()
wt_na_count = (~(plot_df.symbol.isin(top_Rhbdf2_sum_Olfr)) & ~(plot_df.symbol.isin(bottom_Rhbdf2_sum_Olfr)) & 
               (plot_df.logFC < -logfc_cutoff) & (plot_df.FDR < fdr_cutoff)).sum()
ko_na_count = (~(plot_df.symbol.isin(top_Rhbdf2_sum_Olfr)) & ~(plot_df.symbol.isin(bottom_Rhbdf2_sum_Olfr)) & 
               (plot_df.logFC > logfc_cutoff) & (plot_df.FDR < fdr_cutoff)).sum()

plt.figure(figsize=(8, 6))
plt.bar(['WT_Rhbdf2+', 'KO_Rhbdf2+', 
         'WT_Rhbdf2-', 'KO_Rhbdf2-', 
         'WT_Rhbdf2NA', 'KO_Rhbdf2NA'], 
        [wt_topRhbdf2_count, ko_topRhbdf2_count,
         wt_botRhbdf2_count, ko_botRhbdf2_count, 
         wt_na_count, ko_na_count], 
        color=['#990011', '#990011', '#317773', '#317773', 'grey', 'grey'], 
        width=0.4)
plt.xlabel('logFC Values')
plt.ylabel('Count')
plt.show()

##### logFC sum comparison 

In [None]:
# See the effect of Rhbdf2 in Olfr down or up regulated in WT or KO 
logfc_cutoff = 0.5
fdr_cutoff = 1
wt_topRhbdf2_logfc_sum = abs(plot_df[(plot_df.symbol.isin(top_Rhbdf2_avg_Olfr)) & 
                                     (plot_df.logFC < -logfc_cutoff)].logFC.sum())
ko_topRhbdf2_logfc_sum = plot_df[(plot_df.symbol.isin(top_Rhbdf2_avg_Olfr)) & 
                                 (plot_df.logFC > logfc_cutoff)].logFC.sum()
wt_botRhbdf2_logfc_sum = abs(plot_df[(plot_df.symbol.isin(bottom_Rhbdf2_avg_Olfr)) & 
                                     (plot_df.logFC < -logfc_cutoff)].logFC.sum())
ko_botRhbdf2_logfc_sum = plot_df[(plot_df.symbol.isin(bottom_Rhbdf2_avg_Olfr)) & 
                                 (plot_df.logFC > logfc_cutoff)].logFC.sum()
wt_na_logfc_sum = abs(plot_df[~(plot_df.symbol.isin(top_Rhbdf2_avg_Olfr)) & ~(plot_df.symbol.isin(bottom_Rhbdf2_avg_Olfr)) & 
                      (plot_df.logFC < -logfc_cutoff) & (plot_df.FDR < fdr_cutoff)].logFC.sum())
ko_na_logfc_sum = plot_df[~(plot_df.symbol.isin(top_Rhbdf2_avg_Olfr)) & ~(plot_df.symbol.isin(bottom_Rhbdf2_avg_Olfr)) & 
                      (plot_df.logFC > logfc_cutoff) & (plot_df.FDR < fdr_cutoff)].logFC.sum()

plt.figure(figsize=(8, 6))
plt.bar(['WT_Rhbdf2+', 'KO_Rhbdf2+', 
         'WT_Rhbdf2-', 'KO_Rhbdf2-', 
         'WT_Rhbdf2NA', 'KO_Rhbdf2NA'], 
        [wt_topRhbdf2_logfc_sum, ko_topRhbdf2_logfc_sum,
         wt_botRhbdf2_logfc_sum, ko_botRhbdf2_logfc_sum, 
         wt_na_logfc_sum, ko_na_logfc_sum], 
        color=['#990011', '#990011', '#317773', '#317773', 'grey', 'grey'], 
        width=0.4)
# Set plot labels
plt.xlabel('logFC Values')
plt.ylabel('Count')
plt.show()

In [None]:
# See the effect of Rhbdf2 in Olfr down or up regulated in WT or KO 
logfc_cutoff = 0.5
fdr_cutoff = 1
wt_topRhbdf2_logfc_sum = abs(plot_df[(plot_df.symbol.isin(top_Rhbdf2_sum_Olfr)) & 
                                     (plot_df.logFC < -logfc_cutoff)].logFC.sum())
ko_topRhbdf2_logfc_sum = plot_df[(plot_df.symbol.isin(top_Rhbdf2_sum_Olfr)) & 
                                 (plot_df.logFC > logfc_cutoff)].logFC.sum()
wt_botRhbdf2_logfc_sum = abs(plot_df[(plot_df.symbol.isin(bottom_Rhbdf2_sum_Olfr)) & 
                                     (plot_df.logFC < -logfc_cutoff)].logFC.sum())
ko_botRhbdf2_logfc_sum = plot_df[(plot_df.symbol.isin(bottom_Rhbdf2_sum_Olfr)) & 
                                 (plot_df.logFC > logfc_cutoff)].logFC.sum()
wt_na_logfc_sum = abs(plot_df[~(plot_df.symbol.isin(top_Rhbdf2_sum_Olfr)) & ~(plot_df.symbol.isin(bottom_Rhbdf2_sum_Olfr)) & 
                      (plot_df.logFC < -logfc_cutoff) & (plot_df.FDR < fdr_cutoff)].logFC.sum())
ko_na_logfc_sum = plot_df[~(plot_df.symbol.isin(top_Rhbdf2_sum_Olfr)) & ~(plot_df.symbol.isin(bottom_Rhbdf2_sum_Olfr)) & 
                      (plot_df.logFC > logfc_cutoff) & (plot_df.FDR < fdr_cutoff)].logFC.sum()

plt.figure(figsize=(8, 6))
plt.bar(['WT_Rhbdf2+', 'KO_Rhbdf2+', 
         'WT_Rhbdf2-', 'KO_Rhbdf2-', 
         'WT_Rhbdf2NA', 'KO_Rhbdf2NA'], 
        [wt_topRhbdf2_logfc_sum, ko_topRhbdf2_logfc_sum,
         wt_botRhbdf2_logfc_sum, ko_botRhbdf2_logfc_sum, 
         wt_na_logfc_sum, ko_na_logfc_sum], 
        color=['#990011', '#990011', '#317773', '#317773', 'grey', 'grey'], 
        width=0.4)
# Set plot labels
plt.xlabel('logFC Values')
plt.ylabel('Count')
plt.show()

#### Cell type volcano 

In [None]:
cell_markers = pd.read_csv('../files/CELL_top_markers.csv', index_col = 0)[0:50]

In [None]:
"""
Olfr DE only Volcano plot
"""

plot_df = DE_allgene_df_dict['DE_allgene_WTvsKO_n6']

fig = go.Figure()

# temp = Blobel_Olfr_de[Blobel_Olfr_de['symbol'].isin(top_Rhbdf2_avg_Olfr)]
for cell in cell_markers: 
    temp = plot_df[plot_df.symbol.isin(cell_markers[cell])]
    fig.add_trace(go.Scatter(x=temp['logFC'], 
                            y=-np.log10(temp['FDR']),
                            text=temp['symbol'],
                            mode='markers', 
                            name=cell,
                            marker=dict(size = 10, opacity=0.3)))

# Add the horizontal line at y=0.5
fig.add_shape(type='line', x0=-10, x1=10,
                      y0=-np.log10(0.05), y1=-np.log10(0.05),
              line=dict(color='violet', width=3, dash='dash'))

fig.update_traces( 
    textposition='top center',
    hovertemplate =
    '<b>%{text}</b>' + 
    '<br>LogFC: %{x}'+
    '<br>FDR: %{y}<br>')

fig.update_layout(
    title='Rhbdf2 cell markers only',
    autosize=True,
#     width=500,
#     height=500,
    template='simple_white'
)
fig.show()
# fig.to_html('../output/Blobel_15045/DE_allgene_WTvsKO_n6_cellmarkers.html')

#### GO analysis 

In [None]:
import utils.go_utils as go_utils

In [None]:
"""
Conduct GO analysis on all the columns in GeneSets from different anlaysis
into the df dictionary 
"""
wt_genes = DE_allgene_df_dict['DE_allgene_WTvsKO_n6'][(DE_allgene_df_dict['DE_allgene_WTvsKO_n6'].FDR < 0.1) & 
                                                      (DE_allgene_df_dict['DE_allgene_WTvsKO_n6'].logFC < 0)].symbol.values
ko_genes = DE_allgene_df_dict['DE_allgene_WTvsKO_n6'][(DE_allgene_df_dict['DE_allgene_WTvsKO_n6'].FDR < 0.1) & 
                                                      (DE_allgene_df_dict['DE_allgene_WTvsKO_n6'].logFC > 0)].symbol.values
go_list = [wt_genes, ko_genes]

go_dict = {}
for i, genes in enumerate(go_list): 
    go_dict[i] = go_it(genes)
    go_dict[i]['n_genes/n_go'] = go_dict[i].n_genes/go_dict[i].n_go
    go_dict[i]['n_genes/n_study'] = go_dict[i].n_genes/go_dict[i].n_study

In [None]:
go_dict[0]['group'] = 'WT'
go_dict[1]['group'] = 'KO'
go_df = pd.concat([go_dict[0], go_dict[1]])
go_df.to_csv('../output/Blobel_15045/GO/GO_terms.csv')

fig = px.bar(go_df, 
            x='n_genes', 
            y='term', 
            orientation='h',
            color = 'group', 
            hover_data=['study_genes']).update_layout(
                plot_bgcolor='rgba(0, 0, 0, 0)'
                )
            
# manually assign color
manual_color = ['lightgrey','pink']
for i in range(len(fig.data)): 
    fig.data[i]['marker']['color'] = manual_color[i]

            
fig.update_layout(
    title='Rhbdf2 DE',
    xaxis_title='logFC (KO/WT)',
    yaxis_title='FDR',
    autosize=True,
    template='simple_white'
)
fig.show()
fig.write_html(f'../output/Blobel_15045/GO/WTvsKO_n6_GO.html')


In [None]:
"""
Quick GO to look at WT vs ALL data 
"""
DE_allgene_df_dict = pd.read_csv('../')

wt_genes = DE_allgene_df_dict['DE_allgene_WTvsKO_n6'][(DE_allgene_df_dict['DE_allgene_WTvsKO_n6'].FDR < 0.1) & 
                                                      (DE_allgene_df_dict['DE_allgene_WTvsKO_n6'].logFC < 0)].symbol.values
ko_genes = DE_allgene_df_dict['DE_allgene_WTvsKO_n6'][(DE_allgene_df_dict['DE_allgene_WTvsKO_n6'].FDR < 0.1) & 
                                                      (DE_allgene_df_dict['DE_allgene_WTvsKO_n6'].logFC > 0)].symbol.values
go_list = [wt_genes, ko_genes]

go_dict = {}
for i, genes in enumerate(go_list): 
    go_dict[i] = go_it(genes)
    go_dict[i]['n_genes/n_go'] = go_dict[i].n_genes/go_dict[i].n_go
    go_dict[i]['n_genes/n_study'] = go_dict[i].n_genes/go_dict[i].n_study
    
go_dict[0]['group'] = 'WT'
go_dict[1]['group'] = 'KO'
go_df = pd.concat([go_dict[0], go_dict[1]])
go_df.to_csv('../output/WTvKO_ALL/GO/GO_terms.csv')
fig = px.bar(go_df, 
            x='n_genes', 
            y='term', 
            orientation='h',
            color = 'group', 
            hover_data=['study_genes']).update_layout(
                plot_bgcolor='rgba(0, 0, 0, 0)'
                )
            
# manually assign color
manual_color = ['lightgrey','pink']
for i in range(len(fig.data)): 
    fig.data[i]['marker']['color'] = manual_color[i]

            
fig.update_layout(
    title='Rhbdf2 DE',
    xaxis_title='logFC (KO/WT)',
    yaxis_title='FDR',
    autosize=True,
    template='simple_white'
)
fig.show()
fig.write_html(f'../output/Blobel_15045/GO/WTvsKO_n6_GO.html')
