In [None]:
import pandas as pd
import numpy as np 
import os 

import plotly.express as px 
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [None]:
import DE_plotting_tools as plot_tl

The goal of this notebook is to compare the results of RNAseq run from Blobel-14357. <br>
Experimental design compared WT vs KO each exposed with no-odor(NO), Euganal(EUG) or Octanol(OCT). <br>
We should explore the category described below : 
- gene expression changes between WT_NO vs WT_exposed, and see if similar trend agree in KO_NO vs KO_exposed  <br>
- bewteen OR that are stimulated in WT_NO vs WT_exposed, are the OR similarly regulated in KO_NO vs KO_exposed <br>
- Explore the population of OR changes between OR stimulated by odor vs other OR 
- PCA on all samples  

In [None]:
data_path = "../DE_out/Blobel-14357/"

# Instantiating files to read from 
DE_files = [ file for file in os.listdir(data_path) if file.startswith('DE')]
# Seperating files with allgene or Olfr only DE 
DE_allgene_files = [ file for file in DE_files if 'allgene' in file]
DE_Olfr_files = [ file for file in DE_files if 'Olfr' in file]

In [None]:
DE_allgene_df_dict = {}
for file in DE_allgene_files: 
    DE_allgene_df_dict[file.replace('.csv', '')] = pd.read_csv(os.path.join(data_path, file), index_col= 0).reset_index(drop=True)

DE_Olfr_df_dict = {}
for file in DE_Olfr_files: 
    DE_Olfr_df_dict[file.replace('.csv', '')] = pd.read_csv(os.path.join(data_path, file), index_col= 0).reset_index(drop=True)    
    
print(DE_Olfr_df_dict.keys())

#### Comparison of WTvsKO NO of Old and new data. 

##### allgene comparison

In [None]:
# Compare allgene 
DE_allgene_old_df = pd.read_csv('../DE_out/Blobel-14025/DE_allgene_WTvsKO_NO.csv', index_col=0).reset_index(drop=True)

In [None]:
fig = plot_tl.compare_vol_plot(DE_df_list = [DE_allgene_old_df, 
                                             DE_allgene_df_dict['DE_allgene_WTvsKO_NO']], 
                               DE_df_name = ['Blobel-14025', 'Blobel-14375'],
                             fig_fixed_range = True)
# fig.write_html('../output/Blobel-14375/vplots/DE_allgene/DE_allgene_WTvsKO_NO_14025vs14375.html')
fig.show()

In [None]:
# Find the significantly DE Olfr's from both data and compare 
sig_Olfr = set(DE_allgene_old_df.query('FDR < 0.05').symbol).union(set(DE_allgene_df_dict['DE_allgene_WTvsKO_NO'].query('FDR < 0.05').symbol))

fig = plot_tl.compare_vol_plot(DE_df_list = [DE_allgene_old_df[DE_allgene_old_df['symbol'].isin(sig_Olfr)], 
                                             DE_allgene_df_dict['DE_allgene_WTvsKO_NO'][DE_allgene_df_dict['DE_allgene_WTvsKO_NO']['symbol'].isin(sig_Olfr)]], 
                               DE_df_name = ['Blobel-14025', 'Blobel-14375'],
                             fig_fixed_range = True)
# fig.write_html('../output/Blobel-14375/vplots/DE_allgene/DE_allgene_WTvsKO_NO_14025vs14375_sigallgene.html')
fig.show()

In [None]:
# DE sig genes old vs new WTvsKO_NO
print(len(DE_allgene_old_df.query('FDR<0.05')))
print(len(DE_allgene_df_dict['DE_allgene_WTvsKO_NO'].query('FDR<0.05')))

In [None]:
compare_df = pd.merge(DE_allgene_old_df[['symbol', 'logFC', 'FDR', 'No1','No2','No3','St1','St2','St3']].dropna(), 
                      DE_allgene_df_dict['DE_allgene_WTvsKO_NO'][['symbol', 'logFC', 'FDR', 'No1','No2','No3','St1','St2','St3']].dropna(), 
                      on='symbol')
compare_df['logFC_diff'] = abs(compare_df['logFC_x'] - compare_df['logFC_y'])
compare_df['logFC_direction'] = (np.sign(compare_df['logFC_x']) == np.sign(compare_df['logFC_y'])).astype(int)

In [None]:
compare_df[((compare_df['FDR_x'] <= 0.05) | (compare_df['FDR_y'] <= 0.05)) & 
           (abs(compare_df['logFC_x']) > 0.1) & (abs(compare_df['logFC_y']) > 0.1)].hist('logFC_diff', bins = 100)

In [None]:
compare_df[((compare_df['FDR_x'] <= 0.05) | (compare_df['FDR_y'] <= 0.05)) & 
           (abs(compare_df['logFC_x']) > 0.1) & (abs(compare_df['logFC_y']) > 0.1)].hist('logFC_direction',bins = 3)

In [None]:
Cell_top_markers = pd.read_csv('../../Chaperone_Analysis/output/brann/CELL_top_markers.csv', index_col = 0)[0:10]

In [None]:
fig = plot_tl.compare_vol_plot(DE_df_list = [DE_allgene_df_dict['DE_allgene_WTvsKO_NO']] + 
                                            [DE_allgene_df_dict['DE_allgene_WTvsKO_NO'][DE_allgene_df_dict['DE_allgene_WTvsKO_NO']['symbol'].isin(Cell_top_markers[markers])]for markers in Cell_top_markers],
                                            DE_df_name = ['allgene'] + Cell_top_markers.columns.to_list())
fig.show()
# fig.write_html('../output/Blobel-14375/vplots/DE_allgene/DE_allgene_WTvsKO_NO_celltype.html')

##### Olfr only comparison

In [None]:
DE_Olfr_old_df = pd.read_csv('../DE_out/Blobel-14025/DE_Olfr_WTvsKO_NO.csv', index_col=0).reset_index(drop=True)

In [None]:
fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_old_df, 
                                             DE_Olfr_df_dict['DE_Olfr_WTvsKO_NO']], 
                               DE_df_name = ['Blobel-14025', 'Blobel-14375'], 
                               fig_fixed_range = True
                               )
fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_WTvsKO_NO_14025vs14375.html')
fig.show()

In [None]:
# Find the significantly DE Olfr's from both data and compare 
sig_Olfr = set(DE_Olfr_old_df.query('FDR < 0.05').symbol).union(set(DE_Olfr_df_dict['DE_Olfr_WTvsKO_NO'].query('FDR < 0.05').symbol))

fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_old_df[DE_Olfr_old_df['symbol'].isin(sig_Olfr)], 
                                             DE_Olfr_df_dict['DE_Olfr_WTvsKO_NO'][DE_Olfr_df_dict['DE_Olfr_WTvsKO_NO']['symbol'].isin(sig_Olfr)]], 
                               DE_df_name = ['Blobel-14025', 'Blobel-14375'])
# fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_WTvsKO_NO_14025vs14375_sigOlfr.html')
# fig.show()

In [None]:
DE_Olfr_old_df[DE_Olfr_old_df['symbol'].isin(sig_Olfr)].sort_values('FDR')

In [None]:
DE_Olfr_df_dict['DE_Olfr_WTvsKO_NO'][DE_Olfr_df_dict['DE_Olfr_WTvsKO_NO']['symbol'].isin(sig_Olfr)].head()

In [None]:
# Since there are inconsistencies between old WTvsKO_NO and the new WTvsKO_NO. Plot the siginificant genes and show directions 

compare_df = pd.merge(DE_Olfr_old_df[['symbol', 'logFC', 'FDR', 'No1','No2','No3','St1','St2','St3']], 
                      DE_Olfr_df_dict['DE_Olfr_WTvsKO_NO'][['symbol', 'logFC', 'FDR', 'No1','No2','No3','St1','St2','St3']], 
                      on='symbol')
compare_df['logFC_diff'] = abs(compare_df['logFC_x'] - compare_df['logFC_y'])
compare_df['logFC_direction'] = (np.sign(compare_df['logFC_x']) == np.sign(compare_df['logFC_y'])).astype(int)

In [None]:
compare_df[((compare_df['FDR_x'] <= 0.05) | (compare_df['FDR_y'] <= 0.05)) & 
           (abs(compare_df['logFC_x']) > 0.1) & (abs(compare_df['logFC_y']) > 0.1)].hist('logFC_diff',bins = 50)

In [None]:
compare_df[((compare_df['FDR_x'] <= 0.05) | (compare_df['FDR_y'] <= 0.05)) & 
           (abs(compare_df['logFC_x']) > 0.1) & (abs(compare_df['logFC_y']) > 0.1)].hist('logFC_direction', bins = 3)

#### Investigation of WT vs KO with odor stimulation

In [None]:
# TODO in the WT FDR < 0.05 where does those OR go in KO 
# TODO within these WT DE OR, what is the epression level of Rhbdf2

# TODO in old data if the Old data's sig OR is consistent as this time 

##### Quick overall volcano plot 

In [None]:
fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'], DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO']], 
                             DE_df_name = ['WT', 'KO'], 
                             fig_title = 'NOvsEUG WT and KO volcano plot',
                             fig_fixed_range = True)
# fig.show()
fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsEUG_WT_KO.html')

fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'], DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO']], 
                             DE_df_name = ['WT', 'KO'], 
                             fig_title = 'NOvsOCT WT and KO volcano plot',
                             fig_fixed_range = True)
# fig.show()
fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsOCT_WT_KO.html')


# Plotting for WTvsKO 
fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_WTvsKO_NO']], 
                             DE_df_name = ['WTvsKO'], 
                             fig_title = 'WTvsKO_NO volcano plot',
                             fig_fixed_range = True)
# fig.show()
fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_WTvsKO_NO.html')

fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_WTvsKO_EUG']], 
                             DE_df_name = ['WTvsKO'], 
                             fig_title = 'WTvsKO_EUG volcano plot',
                             fig_fixed_range = True)
# fig.show()
fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_WTvsKO_EUG.html')


fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_WTvsKO_OCT']], 
                             DE_df_name = ['WTvsKO'], 
                             fig_title = 'WTvsKO_OCT volcano plot',
                             fig_fixed_range = True)
# fig.show()
fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_WTvsKO_OCT.html')





##### Rhbdf2 counts distribution

In [None]:
# Load Rhbdf2 expression table. Obtained form single cell data 
# From single cell data, showing Rhbdf2 counts associated with ORs 
rhbdf2_df_raw = pd.read_csv('../output/Rhbdf2_expression.csv', index_col = 0)
rhbdf2_df_raw[['Rhbdf2_sum', 'S100a5_sum']] = rhbdf2_df_raw.groupby('Olfr')[['Rhbdf2_counts', 'S100a5_counts']].transform('sum')
rhbdf2_df_raw['Olfr_count'] = rhbdf2_df_raw.groupby('Olfr')['Olfr'].transform('count')
rhbdf2_df = rhbdf2_df_raw.drop_duplicates('Olfr').reset_index(drop = True)
rhbdf2_df['Rhbdf2_per_cell'] = rhbdf2_df['Rhbdf2_sum'] / rhbdf2_df['Olfr_count']
rhbdf2_df['S100a5_per_cell'] = rhbdf2_df['S100a5_sum'] / rhbdf2_df['Olfr_count']
rhbdf2_df = rhbdf2_df[['Olfr', 'Rhbdf2_sum', 'S100a5_sum', 'Olfr_count', 'Rhbdf2_per_cell', 'S100a5_per_cell']]

In [None]:
rhbdf2_df[rhbdf2_df['Olfr_count'] > 5].hist('Rhbdf2_per_cell', bins = 50)

##### No Odor vs Euganol 

In [None]:
sig_Olfr = set(DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'].query('FDR <= 0.05').symbol).union(set(DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO'].query('FDR <= 0.05').symbol))

fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'][DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'].symbol.isin(sig_Olfr)],
                                             DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO'][DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO'].symbol.isin(sig_Olfr)]], 
                             DE_df_name = ['WT', 'KO'], 
                             fig_title = 'NOvsEUG WT and KO volcano plot')
fig.show()
# fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsEUG_WT_KO_sigOlfr.html')


In [None]:
DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'][DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'].symbol.isin(sig_Olfr)][['symbol', 'logFC', 'FDR', 'No1','No2','No3','St1','St2','St3']]


In [None]:
DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO'][DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO'].symbol.isin(sig_Olfr)][['symbol', 'logFC', 'FDR', 'No1','No2','No3','St1','St2','St3']]

In [None]:
# EUG DE Olfr 
rhbdf2_df[rhbdf2_df['Olfr'].isin(sig_Olfr)]

In [None]:
# Eugenol positive Olfr is detected via GFP+_S100a5 FACS sorted expression (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4236398/)
Eugenol_pos_Olfr = ['Olfr961', 'Olfr958', 'Olfr960']

In [None]:
fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'][DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'].symbol.isin(Eugenol_pos_Olfr)],
                                             DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO'][DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO'].symbol.isin(Eugenol_pos_Olfr)]], 
                             DE_df_name = ['WT', 'KO'], 
                             fig_title = 'NOvsEUG WT and KO volcano plot')
fig.show()
# fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsEUG_WT_KO_EugOlfr.html')


In [None]:
DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'][DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'].symbol.isin(Eugenol_pos_Olfr)]


In [None]:
DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO'][DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO'].symbol.isin(Eugenol_pos_Olfr)]

In [None]:
# Eug responding OR's Rhbdf2
rhbdf2_df[rhbdf2_df['Olfr'].isin(Eugenol_pos_Olfr)]

##### No Odor vs Octanal 

In [None]:
sig_Olfr = set(DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'].query('FDR < 0.05').symbol).union(set(DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO'].query('FDR < 0.05').symbol))

fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'][DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'].symbol.isin(sig_Olfr)],
                                             DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO'][DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO'].symbol.isin(sig_Olfr)]], 
                             DE_df_name = ['WT', 'KO'], 
                             fig_title = 'NOvsOCT WT and KO volcano plot')
fig.show()
# fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsOCT_WT_KO_sigOlfr.html')


In [None]:
DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'][DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'].symbol.isin(sig_Olfr)][['symbol', 'logFC', 'FDR', 'No1','No2','No3','St1','St2','St3']]


In [None]:
DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO'][DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO'].symbol.isin(sig_Olfr)][['symbol', 'logFC', 'FDR', 'No1','No2','No3','St1','St2','St3']]

In [None]:
# Load pS6IP_df 
pS6IP_df = pd.read_csv('../../pS6-IP-Analysis/pS6IP_MASTER_HL_Annotated_2022.csv', index_col=0)
pS6IP_df = pS6IP_df.rename(columns = {'id': 'symbol'})

In [None]:
# DE ORs are NOT DE in pS6 stimulation
pS6IP_df[(pS6IP_df['odor'] == 'Octanal') & (pS6IP_df['symbol'].isin(sig_Olfr))]

In [None]:
# DE OR's Rhbdf2
rhbdf2_df[rhbdf2_df['Olfr'].isin(DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'].query('FDR < 0.05').symbol)]

In [None]:
# Octanal positive Olfr 
Octanal_positive_Olfr = pS6IP_df[(pS6IP_df['odor'] == 'Octanal') & (pS6IP_df['FDR'] < 0.05)].symbol.values
fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'][DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'].symbol.isin(Octanal_positive_Olfr)],
                                             DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO'][DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO'].symbol.isin(Octanal_positive_Olfr)]], 
                             DE_df_name = ['WT', 'KO'], 
                             fig_title = 'NOvsOCT WT and KO volcano plot')
fig.show()
fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsOCT_WT_KO_OctOlfr.html')


In [None]:
DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'][DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'].symbol.isin(Octanal_positive_Olfr)]

In [None]:
DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO'][DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO'].symbol.isin(Octanal_positive_Olfr)]

In [None]:
# Eug responding OR's Rhbdf2
rhbdf2_df[rhbdf2_df['Olfr'].isin(Octanal_positive_Olfr)]

In [None]:
pS6IP_df.head()

In [44]:

pS6IP_df[(pS6IP_df['symbol'].isin(['Olfr44', 'Olfr1507'])) & 
         (pS6IP_df['FDR'] < 0.05)][['symbol','odor_and_conc', 'logFC', 'FDR', 'No1','No2','No3','ST1','ST2','ST3', 'odor_category']]

Unnamed: 0_level_0,symbol,odor_and_conc,logFC,FDR,No1,No2,No3,ST1,ST2,ST3,odor_category
ensembl_gene_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ENSMUSG00000059887,Olfr1507,100% TMT,-0.773792,0.023646,294,318,191,267,212,168,Thiazole
ENSMUSG00000062649,Olfr44,100% TMT,-1.276026,0.038452,38,19,56,10,20,37,Thiazole
ENSMUSG00000059887,Olfr1507,1% Citronellol,0.595607,0.025253,218,658,335,681,1262,304,Alcohols
ENSMUSG00000062649,Olfr44,1% 2-Methyl-2-thiazoline,-1.509817,0.040583,42,43,53,19,8,24,Thiazole
ENSMUSG00000059887,Olfr1507,100% 2-Methyl-2-thiazoline,-0.994439,5.3e-05,302,380,396,108,182,226,Thiazole
ENSMUSG00000062649,Olfr44,100% 2-Methyl-2-thiazoline,-2.774555,7.3e-05,42,43,53,1,12,8,Thiazole
ENSMUSG00000059887,Olfr1507,1% Diacetyl,0.739105,0.023805,684,408,407,924,737,688,Others
ENSMUSG00000059887,Olfr1507,1% Dimethyl trisulfide,1.092693,0.001984,321,643,660,975,1630,883,Sulfurous
ENSMUSG00000059887,Olfr1507,10mM MTMT,1.07637,0.030858,229,305,326,284,1376,522,Others


In [None]:
# Since there are different ORs differentially expressed in WT vs KO in NOvsEUG condition. 
# Investigate if those ORs are stimulated by EUG via pS6 or OR expressing Rhbdf2

In [None]:
"""
Visualize the effect of Rhbdf2 / S100a5 expression on logFC 
"""
plot_df1 = DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'] 
plot_df1 = pd.merge(plot_df1, rhbdf2_df, left_on = 'symbol', right_on = 'Olfr')
plot_df2 = DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO']
plot_df2 = pd.merge(plot_df2, rhbdf2_df, left_on = 'symbol', right_on = 'Olfr')

DE_df_list = [plot_df1, plot_df2]
DE_df_name = ['NOvsOCT WT', 'NOvsOCT KO']


xmin, xmax, ymin, ymax = 0, 0, 0, 0

fig = go.Figure()
# Add traces of individual DE_df 
for DE_df, DE_name in zip(DE_df_list, DE_df_name): 
    plot_df = DE_df.copy()
    fig.add_trace(go.Scatter(x = plot_df['logFC'], 
                             y = plot_df['Rhbdf2_sum'],
                            text = plot_df['symbol'],
                            mode = 'markers', 
                            name = DE_name+'_Rhbdf2',
                            marker = dict(size = 10, 
                                        opacity=0.1)
                            )
                )
    xmin =  min(plot_df['logFC'])*1.10 if min(plot_df['logFC']) < xmin else xmin
    xmax =  max(plot_df['logFC'])*1.10 if max(plot_df['logFC']) > xmax else xmax
    # ymin =  min(-np.log10(plot_df['FDR']))*1.10 if min(-np.log10(plot_df['FDR'])) < ymin else ymin  # ymin will be 0 anyways
    ymax =  max(plot_df['Rhbdf2_sum'])*1.10 if max(plot_df['Rhbdf2_sum']) > ymax else ymax

# for DE_df, DE_name in zip(DE_df_list, DE_df_name): 
#     plot_df = DE_df.copy()
#     fig.add_trace(go.Scatter(x = plot_df['logFC'], 
#                              y = plot_df['S100a5_sum'],
#                             text = plot_df['symbol'],
#                             mode = 'markers', 
#                             name = DE_name+'_S100a5',
#                             marker = dict(size = 10, 
#                                         opacity=0.1)))
#     xmin =  min(plot_df['logFC'])*1.10 if min(plot_df['logFC']) < xmin else xmin
#     xmax =  max(plot_df['logFC'])*1.10 if max(plot_df['logFC']) > xmax else xmax
#     # ymin =  min(-np.log10(plot_df['FDR']))*1.10 if min(-np.log10(plot_df['FDR'])) < ymin else ymin  # ymin will be 0 anyways
#     ymax =  max(plot_df['S100a5_sum'])*1.10 if max(plot_df['S100a5_sum']) > ymax else ymax
    
# Add a line for FDR = 0.05
# fig.add_shape(type='line', x0=-10, x1=10, y0=-np.log10(0.05), y1=-np.log10(0.05), line=dict(color='violet', width=3, dash='dash'))

fig.update_traces( 
    textposition='top center',
    hovertemplate =
    '<b>%{text}</b>' + 
    '<br>LogFC: %{x}'+
    '<br>sum: %{y}<br>')

fig.update_layout(
    title='',
    autosize=True,
    template='simple_white'
)

# Center the data by taking the bigger value between xmin and xmax 
xmax = max(abs(xmin),abs(xmax))
fig.update_xaxes(range=[-xmax, xmax])
fig.update_yaxes(range=[-ymax*0.05, ymax])

fig.show()
fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsOCT_WT_KO_Rhbdf2_logFC.html')
# fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsOCT_WT_KO_S100a5_logFC.html')


In [None]:
"""
Visualize the effect of Rhbdf2 / S100a5 expression on logFC 
"""
plot_df1 = DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'] 
plot_df1 = pd.merge(plot_df1, rhbdf2_df, left_on = 'symbol', right_on = 'Olfr')
plot_df2 = DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO']
plot_df2 = pd.merge(plot_df2, rhbdf2_df, left_on = 'symbol', right_on = 'Olfr')

DE_df_list = [plot_df1, plot_df2]
DE_df_name = ['NOvsEUG WT', 'NOvsEUG KO']


xmin, xmax, ymin, ymax = 0, 0, 0, 0

fig = go.Figure()
# Add traces of individual DE_df 
for DE_df, DE_name in zip(DE_df_list, DE_df_name): 
    plot_df = DE_df.copy()
    fig.add_trace(go.Scatter(x = plot_df['logFC'], 
                             y = plot_df['Rhbdf2_sum'],
                            text = plot_df['symbol'],
                            mode = 'markers', 
                            name = DE_name+'_Rhbdf2',
                            marker = dict(size = 10, 
                                        opacity=0.1)
                            )
                )
    xmin =  min(plot_df['logFC'])*1.10 if min(plot_df['logFC']) < xmin else xmin
    xmax =  max(plot_df['logFC'])*1.10 if max(plot_df['logFC']) > xmax else xmax
    # ymin =  min(-np.log10(plot_df['FDR']))*1.10 if min(-np.log10(plot_df['FDR'])) < ymin else ymin  # ymin will be 0 anyways
    ymax =  max(plot_df['Rhbdf2_sum'])*1.10 if max(plot_df['Rhbdf2_sum']) > ymax else ymax

# for DE_df, DE_name in zip(DE_df_list, DE_df_name): 
#     plot_df = DE_df.copy()
#     fig.add_trace(go.Scatter(x = plot_df['logFC'], 
#                              y = plot_df['S100a5_sum'],
#                             text = plot_df['symbol'],
#                             mode = 'markers', 
#                             name = DE_name+'_S100a5',
#                             marker = dict(size = 10, 
#                                         opacity=0.1)))
#     xmin =  min(plot_df['logFC'])*1.10 if min(plot_df['logFC']) < xmin else xmin
#     xmax =  max(plot_df['logFC'])*1.10 if max(plot_df['logFC']) > xmax else xmax
#     # ymin =  min(-np.log10(plot_df['FDR']))*1.10 if min(-np.log10(plot_df['FDR'])) < ymin else ymin  # ymin will be 0 anyways
#     ymax =  max(plot_df['S100a5_sum'])*1.10 if max(plot_df['S100a5_sum']) > ymax else ymax
    
# Add a line for FDR = 0.05
# fig.add_shape(type='line', x0=-10, x1=10, y0=-np.log10(0.05), y1=-np.log10(0.05), line=dict(color='violet', width=3, dash='dash'))

fig.update_traces( 
    textposition='top center',
    hovertemplate =
    '<b>%{text}</b>' + 
    '<br>LogFC: %{x}'+
    '<br>sum: %{y}<br>')

fig.update_layout(
    title='',
    autosize=True,
    template='simple_white'
)

# Center the data by taking the bigger value between xmin and xmax 
xmax = max(abs(xmin),abs(xmax))
fig.update_xaxes(range=[-xmax, xmax])
fig.update_yaxes(range=[-ymax*0.05, ymax])

fig.show()
fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsEUG_WT_KO_Rhbdf2_logFC.html')
# fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsEUG_WT_KO_S100a5_logFC.html')


In [None]:
"""
Visualize the effect of Rhbdf2 / S100a5 expression on logFC 
"""
plot_df1 = DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'] 
plot_df1 = pd.merge(plot_df1, rhbdf2_df, left_on = 'symbol', right_on = 'Olfr')
plot_df2 = DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO']
plot_df2 = pd.merge(plot_df2, rhbdf2_df, left_on = 'symbol', right_on = 'Olfr')

DE_df_list = [plot_df1, plot_df2]
DE_df_name = ['NOvsEUG WT', 'NOvsEUG KO']


fig = go.Figure()

# Add traces of individual DE_df 
for DE_df, DE_name in zip(DE_df_list, DE_df_name): 
    plot_df = DE_df.copy()
    fig.add_trace(go.Scatter(x = plot_df['logFC'], 
                             y = plot_df['Rhbdf2_sum'],
                            text = plot_df['symbol'],
                            mode = 'markers', 
                            name = DE_name+'_Rhbdf2',
                            marker = dict(size = 10, 
                                        opacity=0.1)
                            )
                )
for DE_df, DE_name in zip(DE_df_list, DE_df_name): 
    plot_df = DE_df.copy()
    fig.add_trace(go.Scatter(x = plot_df['logFC'], 
                             y = plot_df['S100a5_sum'],
                            text = plot_df['symbol'],
                            mode = 'markers', 
                            name = DE_name+'_S100a5',
                            marker = dict(size = 10, 
                                        opacity=0.1)))
# Add a line for FDR = 0.05
# fig.add_shape(type='line', x0=-10, x1=10, y0=-np.log10(0.05), y1=-np.log10(0.05), line=dict(color='violet', width=3, dash='dash'))

fig.update_traces( 
    textposition='top center',
    hovertemplate =
    '<b>%{text}</b>' + 
    '<br>LogFC: %{x}'+
    '<br>sum: %{y}<br>')

fig.update_layout(
    title='',
    autosize=True,
    template='simple_white'
)

fig.show()
# fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsEUG_WT_KO_Rhbdf2_S100a5_logFC.html')


In [None]:
"""
Visualize the effect of Rhbdf2 / S100a5 expression on logFC 
"""
plot_df1 = DE_Olfr_df_dict['DE_Olfr_NOvsOCT_WT'] 
plot_df1 = pd.merge(plot_df1, rhbdf2_df, left_on = 'symbol', right_on = 'Olfr')
plot_df2 = DE_Olfr_df_dict['DE_Olfr_NOvsOCT_KO']
plot_df2 = pd.merge(plot_df2, rhbdf2_df, left_on = 'symbol', right_on = 'Olfr')

DE_df_list = [plot_df1, plot_df2]
DE_df_name = ['NOvsOCT WT', 'NOvsOCT KO']


fig = go.Figure()

# Add traces of individual DE_df 
for DE_df, DE_name in zip(DE_df_list, DE_df_name): 
    plot_df = DE_df.copy()
    fig.add_trace(go.Scatter(x = -np.log10(plot_df['FDR']), 
                             y = plot_df['Rhbdf2_sum'],
                             text = plot_df['symbol'],
                             mode = 'markers', 
                             name = DE_name+'_Rhbdf2',
                             marker = dict(size = 10, 
                                        opacity=0.1)
                            )
                )
for DE_df, DE_name in zip(DE_df_list, DE_df_name): 
    plot_df = DE_df.copy()
    fig.add_trace(go.Scatter(x = -np.log10(plot_df['FDR']), 
                             y = plot_df['S100a5_sum'],
                             text = plot_df['symbol'],
                             mode = 'markers', 
                             name = DE_name+'_S100a5',
                             marker = dict(size = 10, 
                                        opacity=0.1)))
# Add a line for FDR = 0.05
# fig.add_shape(type='line', y0=0, y1=1e5, x0=-np.log10(0.05), x1=-np.log10(0.05), line=dict(color='violet', width=3, dash='dash'))

fig.update_traces( 
    textposition='top center',
    hovertemplate =
    '<b>%{text}</b>' + 
    '<br>LogFC: %{x}'+
    '<br>sum: %{y}<br>')

fig.update_layout(
    title='',
    autosize=True,
    template='simple_white'
)

fig.show()
# fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsOCT_WT_KO_Rhbdf2_S100a5_FDR.html')


In [None]:
"""
Visualize the effect of Rhbdf2 / S100a5 expression on logFC 
"""
plot_df1 = DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'] 
plot_df1 = pd.merge(plot_df1, rhbdf2_df, left_on = 'symbol', right_on = 'Olfr')
plot_df2 = DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO']
plot_df2 = pd.merge(plot_df2, rhbdf2_df, left_on = 'symbol', right_on = 'Olfr')

DE_df_list = [plot_df1, plot_df2]
DE_df_name = ['NOvsEUG WT', 'NOvsEUG KO']


fig = go.Figure()

# Add traces of individual DE_df 
for DE_df, DE_name in zip(DE_df_list, DE_df_name): 
    plot_df = DE_df.copy()
    fig.add_trace(go.Scatter(x = -np.log10(plot_df['FDR']), 
                             y = plot_df['Rhbdf2_sum'],
                             text = plot_df['symbol'],
                             mode = 'markers', 
                             name = DE_name+'_Rhbdf2',
                             marker = dict(size = 10, 
                                        opacity=0.1)
                            )
                )
for DE_df, DE_name in zip(DE_df_list, DE_df_name): 
    plot_df = DE_df.copy()
    fig.add_trace(go.Scatter(x = -np.log10(plot_df['FDR']), 
                             y = plot_df['S100a5_sum'],
                             text = plot_df['symbol'],
                             mode = 'markers', 
                             name = DE_name+'_S100a5',
                             marker = dict(size = 10, 
                                        opacity=0.1)))
# Add a line for FDR = 0.05
# fig.add_shape(type='line', y0=0, y1=1e5, x0=-np.log10(0.05), x1=-np.log10(0.05), line=dict(color='violet', width=3, dash='dash'))

fig.update_traces( 
    textposition='top center',
    hovertemplate =
    '<b>%{text}</b>' + 
    '<br>LogFC: %{x}'+
    '<br>sum: %{y}<br>')

fig.update_layout(
    title='',
    autosize=True,
    template='simple_white'
)

fig.show()
# fig.write_html('../output/Blobel-14375/vplots/DE_Olfr/DE_Olfr_NOvsEUG_WT_KO_Rhbdf2_S100a5_FDR.html')


### Reduced dimensional comparison 

In [None]:
# Read in raw counts of all DE_out data 
all_counts = pd.read_csv('../DE_out/Blobel-14357/Blobel-14357_counts_CompiledExpectedCounts_allGene.csv', index_col = 0)

geneID_info = pd.read_csv('../DE_out/preprocessing/eID_sym_name.csv', index_col = 0)[['ensembl_gene_id', 'symbol']]
geneID_info = geneID_info.set_index('ensembl_gene_id')
geneID_info = geneID_info.dropna()

# Set index from ensembl_gene_id to symbol
all_counts = pd.merge(all_counts, geneID_info, left_index=True, right_index=True).set_index('symbol')
Olfr_counts = all_counts[all_counts.index.str.contains('Olfr')]

In [None]:
fig = plot_tl.reduced_dimension_plot(count_df = all_counts, 
                               reduction_method = 'umap')
fig.update_layout( title='umap allgene counts')
fig.show()
# fig.write_html('../output/Blobel-14375/allgene_umap.html')
fig = plot_tl.reduced_dimension_plot(count_df = all_counts, 
                               reduction_method = 'pca')
fig.update_layout( title='pca allgene counts')
fig.show()
# fig.write_html('../output/Blobel-14375/allgene_pca.html')

In [None]:
fig = plot_tl.reduced_dimension_plot(count_df = Olfr_counts, 
                               reduction_method = 'umap')
fig.update_layout( title='umap Olfr counts')
fig.show()
# fig.write_html('../output/Blobel-14375/Olfr_umap.html')
fig = plot_tl.reduced_dimension_plot(count_df = Olfr_counts, 
                               reduction_method = 'pca')
fig.update_layout( title='pca Olfr counts')
fig.show()
# fig.write_html('../output/Blobel-14375/Olfr_pca.html')