# Serializing data to JSON for visualization

In [8]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style("whitegrid")

## I. Serialize LogFC to JSON

### Human

In [9]:
human = pd.read_csv('DESeq2/Yamuna/Human_NT_vs_ABCD1_DESeq2_results.tsv', sep='\t', header=None, names=['GeneID','Base mean','log2(FC)','StdErr','Wald-Stats','P-value','P-adj']).set_index('GeneID')
human['log2(FC)'] = human['log2(FC)'] * -1
human.head()

Unnamed: 0_level_0,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj
GeneID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
TAGLN2,3702.951794,-2.954344,0.056286,52.48818,0.0,0.0
UHMK1,3543.839403,-3.225018,0.067672,47.656533,0.0,0.0
CSRP1,4592.587776,1.856967,0.046232,-40.165842,0.0,0.0
CAPN2,10417.715365,2.116233,0.045564,-46.445277,0.0,0.0
RHOB,10401.154292,-1.758295,0.045384,38.742385,0.0,0.0


In [10]:
human['log2(FC)'].to_json('./web/human_logFC.json')

### Mouse

In [11]:
mouse = pd.read_csv('DESeq2/Yamuna/Mouse_NT_vs_ABCD1_DESeq2_results.tsv', sep='\t', header=None, names=['GeneID','Base mean','log2(FC)','StdErr','Wald-Stats','P-value','P-adj']).set_index('GeneID')
mouse['log2(FC)'] = mouse['log2(FC)'] * -1
mouse.head()

Unnamed: 0_level_0,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj
GeneID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Timp2,6898.429574,3.955563,0.096145,-41.141552,0.0,0.0
Tmbim6,6188.554131,1.990989,0.063462,-31.372878,4.744136e-216,4.415842e-212
Cdc34,1881.032867,-2.176991,0.075121,28.979662,1.1872919999999998e-184,7.367543000000001e-181
Lipa,3256.752104,2.064111,0.072635,-28.417525,1.2283530000000001e-177,5.716754e-174
Selp,3124.381122,-2.682026,0.102079,26.273974,3.804801e-152,1.4166029999999999e-148


In [12]:
mouse['log2(FC)'].to_json('./web/mouse_logFC.json')

## II. Serialize LogFC & P-Adj (q-val) to JSON

### Human

In [13]:
human['q'] = -np.log10(human['P-adj'])
human_max_finite_log_qVal = np.max(human['q'][np.isfinite(human['q'])])
human['q'] = np.around(np.clip(human['q'], 0, human_max_finite_log_qVal), decimals=3)
print("max q-val: " + str(human_max_finite_log_qVal))

human['logFC'] = np.around(human['log2(FC)'], decimals=3)

max q-val: 302.671025009


  """Entry point for launching an IPython kernel.


In [14]:
human[['logFC', 'q']].to_json('./web/human_logFC_qVal.json', orient='index')

### Mouse

In [15]:
mouse['q'] = -np.log10(mouse['P-adj'])
mouse_max_finite_log_qVal = np.max(mouse['q'][np.isfinite(mouse['q'])])
mouse['q'] = np.around(np.clip(mouse['q'], 0, mouse_max_finite_log_qVal), decimals=3)
print("max q-val: " + str(mouse_max_finite_log_qVal))

mouse['logFC'] = np.around(mouse['log2(FC)'], decimals=3)

max q-val: 211.354986486


  """Entry point for launching an IPython kernel.


In [16]:
mouse[['logFC', 'q']].to_json('./web/mouse_logFC_qVal.json', orient='index')

## III. Serialize Gene Ontology to JSON

In [17]:
go = pd.read_csv('../GONN/GO/biological_process.csv')
go.head()

Unnamed: 0,GeneSymbol,GO_ID,GO_term,Evidence
0,A1BG,GO:0002576,platelet degranulation,TAS
1,A1BG,GO:0008150,biological_process,ND
2,A1BG,GO:0043312,neutrophil degranulation,TAS
3,A2M,GO:0001869,"negative regulation of complement activation, ...",IDA
4,A2M,GO:0002576,platelet degranulation,TAS


In [18]:
go = (go.groupby(['GO_term', 'GO_ID'])['GeneSymbol'].apply(list)).to_frame()
go.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,GeneSymbol
GO_term,GO_ID,Unnamed: 2_level_1
'de novo' AMP biosynthetic process,GO:0044208,"[ADSL, ADSL, ADSS, ADSS, ADSSL1, ADSSL1]"
'de novo' CTP biosynthetic process,GO:0044210,"[CTPS1, CTPS2]"
'de novo' GDP-L-fucose biosynthetic process,GO:0042351,"[GMDS, GMDS, TSTA3, TSTA3, TSTA3]"
'de novo' IMP biosynthetic process,GO:0006189,"[ADSL, ATIC, GART, PFAS, PFAS, PPAT, PAICS]"
'de novo' L-methionine biosynthetic process,GO:0071266,[CTH]


In [19]:
go.index = go.reset_index()['GO_ID'].map(str) + ': ' + go.reset_index()['GO_term']
go.head()

Unnamed: 0,GeneSymbol
GO:0044208: 'de novo' AMP biosynthetic process,"[ADSL, ADSL, ADSS, ADSS, ADSSL1, ADSSL1]"
GO:0044210: 'de novo' CTP biosynthetic process,"[CTPS1, CTPS2]"
GO:0042351: 'de novo' GDP-L-fucose biosynthetic process,"[GMDS, GMDS, TSTA3, TSTA3, TSTA3]"
GO:0006189: 'de novo' IMP biosynthetic process,"[ADSL, ATIC, GART, PFAS, PFAS, PPAT, PAICS]"
GO:0071266: 'de novo' L-methionine biosynthetic process,[CTH]


In [20]:
go.to_json('/Users/alex/Documents/abcd1/go_biological_process_genes.json')

## IV. Serialize Normalized Counts to JSON

### Human

In [24]:
human = pd.read_csv('DESeq2/Yamuna/Normalized_counts_Yamuna_human.tsv', sep='\t', index_col=0).round(3)
human.head()

Unnamed: 0,Human_ABCD1_1,Human_ABCD1_2,Human_ABCD1_3,Human_NT_1,Human_NT_2,Human_NT_3
DDX11L1,0.0,1.217,0.92,0.0,1.988,0.867
WASH7P,25.774,38.939,18.398,40.293,56.661,28.625
MIR6859-1,9.205,8.518,7.359,11.664,2.982,6.939
RP11-34P13.3,0.0,0.0,0.0,0.0,0.0,0.0
MIR1302-2,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
human.to_json('./web/human_counts.json')

### Mouse

In [25]:
mouse = pd.read_csv('DESeq2/Yamuna/Normalized_counts_Yamuna_mouse.tsv', sep='\t', index_col=0).round(3)
mouse.head()

Unnamed: 0,Mouse_ABCD1_1,Mouse_ABCD1_2,Mouse_ABCD1_3,Mouse_NT_1,Mouse_NT_2,Mouse_NT_3
RP23-271O17.1,0.0,0.0,0.0,0.0,0.0,0.0
Gm26206,0.0,0.0,0.0,0.0,0.0,0.0
Xkr4,0.0,0.0,0.0,0.0,0.0,0.0
RP23-317L18.1,0.0,0.0,0.0,0.0,0.0,0.0
RP23-317L18.4,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
mouse.to_json('./web/mouse_counts.json')