In [1]:
import pandas as pd
import numpy as np 
import os 

import plotly.express as px 
import plotly.graph_objects as go


In [2]:
import DE_plotting_tools as plot_tl

The goal of this notebook is to compare the results of RNAseq run from Blobel-14357. \n 
Experimental design compared WT vs KO each exposed with no-odor(NO), Euganal(EUG) or Octanol(OCT). \n
We should explore the category described below : \n 
- gene expression changes between WT_NO vs WT_exposed, and see if similar trend agree in KO_NO vs KO_exposed  \n 
- bewteen OR that are stimulated in WT_NO vs WT_exposed, are the OR similarly regulated in KO_NO vs KO_exposed \n
- Explore the population of OR changes between OR stimulated by odor vs other OR 
- PCA on all samples  

In [8]:
data_path = "../DE_out/Blobel-14357/"

# Instantiating files to read from 
DE_files = [ file for file in os.listdir(data_path) if file.startswith('DE')]
# Seperating files with allgene or Olfr only DE 
DE_allgene_files = [ file for file in DE_files if 'allgene' in file]
DE_Olfr_files = [ file for file in DE_files if 'Olfr' in file]

In [9]:
DE_allgene_df_dict = {}
for file in DE_allgene_files: 
    DE_allgene_df_dict[file.replace('.csv', '')] = pd.read_csv(os.path.join(data_path, file), index_col= 0).reset_index(drop=True)

DE_Olfr_df_dict = {}
for file in DE_Olfr_files: 
    DE_Olfr_df_dict[file.replace('.csv', '')] = pd.read_csv(os.path.join(data_path, file), index_col= 0).reset_index(drop=True)    

In [None]:
DE_Olfr_df_dict.keys()

In [12]:
fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_NOvsEUG_WT'], DE_Olfr_df_dict['DE_Olfr_NOvsEUG_KO']], 
                             DE_df_name = ['WT', 'KO'], 
                             fig_title = 'NOvsEUG WT and KO volcano plot')
fig.show()
# fig.write_html('../output/Blobel-14375/vplots/NOvsEUG_WT_KO.html')

# Plotting for WTvsKO 
fig = plot_tl.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_WTvsKO_NO']], 
                             DE_df_name = ['WTvsKO'], 
                             fig_title = 'WTvsKO_NO volcano plot')
fig.show()
# fig = vplot.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_WTvsKO_EUG']], 
#                              DE_df_name = ['WTvsKO'], 
#                              fig_title = 'WTvsKO_EUG volcano plot')
# fig.show()

# fig = vplot.compare_vol_plot(DE_df_list = [DE_Olfr_df_dict['DE_Olfr_WTvsKO_OCT']], 
#                              DE_df_name = ['WTvsKO'], 
#                              fig_title = 'WTvsKO_OCT volcano plot')
# fig.show()


In [None]:
# Since there are different ORs differentially expressed in WT vs KO in NOvsEUG condition. 
# Investigate if those ORs are stimulated by EUG via pS6 or OR expressing Rhbdf2

### Reduced dimensional comparison 

In [3]:
# Read in raw counts of all DE_out data 
all_counts = pd.read_csv('../DE_out/Blobel-14357/Blobel-14357_counts_CompiledExpectedCounts_allGene.csv', index_col = 0)

geneID_info = pd.read_csv('../DE_out/preprocessing/eID_sym_name.csv', index_col = 0)[['ensembl_gene_id', 'symbol']]
geneID_info = geneID_info.set_index('ensembl_gene_id')
geneID_info = geneID_info.dropna()

# Set index from ensembl_gene_id to symbol
all_counts = pd.merge(all_counts, geneID_info, left_index=True, right_index=True).set_index('symbol')
Olfr_counts = all_counts[all_counts.index.str.contains('Olfr')]

In [13]:
fig = plot_tl.reduced_dimension_plot(count_df = all_counts, 
                               reduction_method = 'umap')
fig.update_layout( title='umap allgene counts')
fig.show()
fig.write_html('../output/Blobel-14375/allgene_umap.html')
fig = plot_tl.reduced_dimension_plot(count_df = all_counts, 
                               reduction_method = 'pca')
fig.update_layout( title='pca allgene counts')
fig.show()
fig.write_html('../output/Blobel-14375/allgene_pca.html')

In [15]:
fig = plot_tl.reduced_dimension_plot(count_df = Olfr_counts, 
                               reduction_method = 'umap')
fig.update_layout( title='umap Olfr counts')
fig.show()
fig.write_html('../output/Blobel-14375/Olfr_umap.html')
fig = plot_tl.reduced_dimension_plot(count_df = Olfr_counts, 
                               reduction_method = 'pca')
fig.update_layout( title='pca Olfr counts')
fig.show()
fig.write_html('../output/Blobel-14375/Olfr_pca.html')