# Fold Change
Looking for fold change of two or higher.

In [None]:
import json, pickle

import pandas as pd

## 1. Load Data and Experimental Groups

In [None]:
present_transcripts_df = pd.read_csv('../../data/expression_by_probe.csv', index_col=0)
probe_key = pickle.load(open('../../data/probe_key.p', 'rb'))
experimental_groups = json.load(open('../../data/experimental_groups.json'))
post_hoc_diff_dict = json.load(open('../../results/post_hoc_diff_dict.json'))

## 2. Calculate Fold Change for All

In [None]:
post_hoc_fold_change_dict = {}

# Do for each comparison
for comparison in post_hoc_diff_dict:
    # Get the conditions
    condA = comparison.split('_')[0]
    condB = comparison.split('_')[1]
    
    # Calculate Fold Change
    original_val = present_transcripts_df.loc[:,experimental_groups[condA]].T.mean()
    new_val = present_transcripts_df.loc[:,experimental_groups[condB]].T.mean()
    post_hoc_fold_change_dict[comparison] = new_val / original_val

Make it a DataFrame

In [None]:
post_hoc_fold_change_df=pd.DataFrame(post_hoc_fold_change_dict)
post_hoc_fold_change_df

In [None]:
diffexp_fold_change_ph={}
  
for condition in post_hoc_diff_dict:

    diffexp_fold_change_ph[condition]={
        'up':[],
        'dn':[]
    }
    for transcript in post_hoc_diff_dict[condition]:
        #Note that this fold change is for condition 2 relative to condition 1 in 1_2
        fold_change=post_hoc_fold_change_df.loc[transcript, condition]
        if fold_change > 2:
            diffexp_fold_change_ph[condition]['up'].append(transcript)

        elif fold_change < 0.5:
            diffexp_fold_change_ph[condition]['dn'].append(transcript)


In [None]:
#Change probe names to gene names
diffexp_genes_fold_change_ph={}

for condition in diffexp_fold_change_ph:
    
    diffexp_genes_fold_change_ph[condition]={}
    
    for up_or_dn in diffexp_fold_change_ph[condition]:
        
        #There are probes that map to multiple gene symbols, so we use nested list comprehension and change it to a 
        #set to remove duplicates
        diffexp_genes_fold_change_ph[condition][up_or_dn]=set([gene for i in diffexp_fold_change_ph[condition][up_or_dn] 
                                                        for gene in probe_key[i]])

In [None]:
for condition in diffexp_genes_fold_change_ph:
    for up_or_dn in diffexp_genes_fold_change_ph[condition]:
        print(f'{condition}_{up_or_dn}: {len(diffexp_genes_fold_change_ph[condition][up_or_dn])} genes')

## 4. Save Results for Further Use

In [None]:
post_hoc_fold_change_df.to_csv('../../results/post_hoc_fold_change_df.csv')
pickle.dump(diffexp_genes_fold_change_ph, open('../../results/diffexp_genes_fold_change_ph.p', 'wb'))