# Serializing data to JSON for visualization

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style("whitegrid")

In [2]:
def serialize(df, name, how='columns'): open(f"{name}.js", 'w').write(f"var {name} = {df.to_json(orient=how)};\n")

## I. Serialize LogFC & P-Adj (q-val) to JSON

### Human

In [3]:
human = pd.read_csv('DESeq2_data/Yamuna/Human_NT_vs_ABCD1_DESeq2_results.tsv', sep='\t', header=None, names=['GeneID','Base mean','log2(FC)','StdErr','Wald-Stats','P-value','P-adj']).set_index('GeneID')
human['log2(FC)'] = human['log2(FC)'] * -1
human.head()

Unnamed: 0_level_0,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj
GeneID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
TAGLN2,3702.951794,-2.954344,0.056286,52.48818,0.0,0.0
UHMK1,3543.839403,-3.225018,0.067672,47.656533,0.0,0.0
CSRP1,4592.587776,1.856967,0.046232,-40.165842,0.0,0.0
CAPN2,10417.715365,2.116233,0.045564,-46.445277,0.0,0.0
RHOB,10401.154292,-1.758295,0.045384,38.742385,0.0,0.0


In [4]:
human['q'] = -np.log10(human['P-adj'])
human_max_finite_log_qVal = np.max(human['q'][np.isfinite(human['q'])])
human['q'] = np.around(np.clip(human['q'], 0, human_max_finite_log_qVal), decimals=3)
print("max q-val: " + str(human_max_finite_log_qVal))

human['logFC'] = np.around(human['log2(FC)'], decimals=3)

max q-val: 302.6710250087188


  """Entry point for launching an IPython kernel.


In [5]:
human = human.dropna(how='any', subset=['logFC', 'q'])

In [6]:
serialize(human[['logFC', 'q']], 'human_differential', how='index')

### Mouse

In [7]:
mouse = pd.read_csv('DESeq2_data/Yamuna/Mouse_NT_vs_ABCD1_DESeq2_results.tsv', sep='\t', header=None, names=['GeneID','Base mean','log2(FC)','StdErr','Wald-Stats','P-value','P-adj']).set_index('GeneID')
mouse['log2(FC)'] = mouse['log2(FC)'] * -1
mouse.head()

Unnamed: 0_level_0,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj
GeneID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Timp2,6898.429574,3.955563,0.096145,-41.141552,0.0,0.0
Tmbim6,6188.554131,1.990989,0.063462,-31.372878,4.744136e-216,4.415842e-212
Cdc34,1881.032867,-2.176991,0.075121,28.979662,1.1872919999999998e-184,7.367543000000001e-181
Lipa,3256.752104,2.064111,0.072635,-28.417525,1.2283530000000001e-177,5.716754e-174
Selp,3124.381122,-2.682026,0.102079,26.273974,3.804801e-152,1.4166029999999999e-148


In [8]:
mouse['q'] = -np.log10(mouse['P-adj'])
mouse_max_finite_log_qVal = np.max(mouse['q'][np.isfinite(mouse['q'])])
mouse['q'] = np.around(np.clip(mouse['q'], 0, mouse_max_finite_log_qVal), decimals=3)
print("max q-val: " + str(mouse_max_finite_log_qVal))

mouse['logFC'] = np.around(mouse['log2(FC)'], decimals=3)

max q-val: 211.3549864860441


  """Entry point for launching an IPython kernel.


In [9]:
mouse = mouse.dropna(how='any', subset=['logFC', 'q'])

In [10]:
serialize(mouse[['logFC', 'q']], 'mouse_differential', how='index')

### Lee

In [11]:
lee = pd.read_csv('DESeq2_data/Lee/Lee_DESeq2.tsv', sep='\t', header=None, names=['GeneID','Base mean','log2(FC)','StdErr','Wald-Stats','P-value','P-adj']).set_index('GeneID')
lee.head()

Unnamed: 0_level_0,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj
GeneID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CTSF,530.623969,7.625389,0.305383,24.969883,1.2989579999999999e-137,3.4586059999999998e-133
AC006039.4,69.056857,2.395232,0.188938,12.677335,7.897114e-37,1.0513430000000001e-32
PRSS12,5631.132313,2.155079,0.171747,12.54802,4.0757449999999995e-36,3.61736e-32
SOX7,612.479651,2.390252,0.203622,11.738663,8.075288e-32,5.375316e-28
SLC1A6,345.256285,2.108443,0.181737,11.601616,4.043717e-31,2.15336e-27


In [12]:
lee['q'] = -np.log10(lee['P-adj'])
lee_max_finite_log_qVal = np.max(lee['q'][np.isfinite(lee['q'])])
lee['q'] = np.around(np.clip(lee['q'], 0, lee_max_finite_log_qVal), decimals=3)
print("max q-val: " + str(lee_max_finite_log_qVal))

lee['logFC'] = np.around(lee['log2(FC)'], decimals=3)

max q-val: 132.461098939291


In [13]:
lee = lee.dropna(how='any', subset=['logFC', 'q'])

In [14]:
serialize(lee[['logFC', 'q']], 'lee_differential', how='index')

## II. Serialize Normalized Counts to JSON

### Human

In [15]:
human = pd.read_csv('DESeq2_data/Yamuna/Normalized_counts_Yamuna_human.tsv', sep='\t', index_col=0).round(3)
human.head()

Unnamed: 0,Human_ABCD1_1,Human_ABCD1_2,Human_ABCD1_3,Human_NT_1,Human_NT_2,Human_NT_3
DDX11L1,0.0,1.217,0.92,0.0,1.988,0.867
WASH7P,25.774,38.939,18.398,40.293,56.661,28.625
MIR6859-1,9.205,8.518,7.359,11.664,2.982,6.939
RP11-34P13.3,0.0,0.0,0.0,0.0,0.0,0.0
MIR1302-2,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
human = human[~(human.sum(axis=1) == 0)]
human.head()

Unnamed: 0,Human_ABCD1_1,Human_ABCD1_2,Human_ABCD1_3,Human_NT_1,Human_NT_2,Human_NT_3
DDX11L1,0.0,1.217,0.92,0.0,1.988,0.867
WASH7P,25.774,38.939,18.398,40.293,56.661,28.625
MIR6859-1,9.205,8.518,7.359,11.664,2.982,6.939
RP11-34P13.7,0.0,0.0,0.0,0.0,0.994,0.0
RP11-34P13.15,1.841,0.0,0.0,0.0,0.0,0.867


In [17]:
serialize(human, 'human_counts')

### Lee

In [18]:
lee = pd.read_csv('DESeq2_data/Lee/Normalized_counts_Lee.tsv', sep='\t', index_col=0).round(3)
lee.head()

Unnamed: 0,ccALD3: counts,ccALD2: counts,ccALD1: counts,WT3: counts,WT2: counts,WT1: counts
DDX11L1,1.021,0.997,0.0,3.085,0.0,0.0
WASH7P,634.996,480.679,394.198,483.241,452.629,499.03
MIR6859-1,57.17,39.89,43.381,46.268,57.477,64.052
RP11-34P13.3,1.021,0.0,5.658,2.056,0.0,2.868
MIR1302-2,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
lee.columns = 'Lee_' + lee.columns.str.split(':').str[0]
lee.head()

Unnamed: 0,Lee_ccALD3,Lee_ccALD2,Lee_ccALD1,Lee_WT3,Lee_WT2,Lee_WT1
DDX11L1,1.021,0.997,0.0,3.085,0.0,0.0
WASH7P,634.996,480.679,394.198,483.241,452.629,499.03
MIR6859-1,57.17,39.89,43.381,46.268,57.477,64.052
RP11-34P13.3,1.021,0.0,5.658,2.056,0.0,2.868
MIR1302-2,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
lee = lee[~(lee.sum(axis=1) == 0)]
lee.columns = lee.columns.str[:-1] + "_" + lee.columns.str[-1]
lee.head()

Unnamed: 0,Lee_ccALD_3,Lee_ccALD_2,Lee_ccALD_1,Lee_WT_3,Lee_WT_2,Lee_WT_1
DDX11L1,1.021,0.997,0.0,3.085,0.0,0.0
WASH7P,634.996,480.679,394.198,483.241,452.629,499.03
MIR6859-1,57.17,39.89,43.381,46.268,57.477,64.052
RP11-34P13.3,1.021,0.0,5.658,2.056,0.0,2.868
RP11-34P13.7,1.021,0.997,4.715,1.028,3.079,4.78


In [21]:
serialize(lee, 'lee_counts')

### Mouse

In [22]:
mouse = pd.read_csv('DESeq2_data/Yamuna/Normalized_counts_Yamuna_mouse.tsv', sep='\t', index_col=0).round(3)
mouse.head()

Unnamed: 0,Mouse_ABCD1_1,Mouse_ABCD1_2,Mouse_ABCD1_3,Mouse_NT_1,Mouse_NT_2,Mouse_NT_3
RP23-271O17.1,0.0,0.0,0.0,0.0,0.0,0.0
Gm26206,0.0,0.0,0.0,0.0,0.0,0.0
Xkr4,0.0,0.0,0.0,0.0,0.0,0.0
RP23-317L18.1,0.0,0.0,0.0,0.0,0.0,0.0
RP23-317L18.4,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
mouse = mouse[~(mouse.sum(axis=1) == 0)]

In [24]:
serialize(mouse, 'mouse_counts')

### Human & Lee Merged

In [25]:
merged = pd.concat([human, lee], axis=1, sort=True)
merged.head()

Unnamed: 0,Human_ABCD1_1,Human_ABCD1_2,Human_ABCD1_3,Human_NT_1,Human_NT_2,Human_NT_3,Lee_ccALD_3,Lee_ccALD_2,Lee_ccALD_1,Lee_WT_3,Lee_WT_2,Lee_WT_1
A1BG,3.682,1.217,1.84,1.06,0.994,0.0,3.063,9.973,2.829,0.0,6.158,12.428
A1BG-AS1,7.364,23.12,7.359,19.086,16.899,13.011,88.818,107.704,89.59,94.592,132.402,133.839
A1CF,1.841,3.65,1.84,0.0,0.0,0.0,0.0,7.978,5.658,0.0,4.105,5.736
A2M,135.314,120.466,122.348,201.463,283.305,233.339,32.669,25.929,34.893,30.845,23.606,70.744
A2ML1,2.762,0.0,0.92,2.121,1.988,0.0,987.204,2428.328,1041.135,605.593,1265.513,582.202


In [26]:
serialize(merged, 'merged_counts')

### Mouse, Human, and Lee Merged

In [27]:
mouse.index = mouse.index.str.upper()

In [28]:
merged = pd.concat([human, mouse, lee], axis=1, sort=True).dropna(how='all', subset=["Human_ABCD1_1", "Human_ABCD1_2", "Human_ABCD1_3", "Human_NT_1", "Human_NT_2", "Human_NT_3"])
merged.head()


Unnamed: 0,Human_ABCD1_1,Human_ABCD1_2,Human_ABCD1_3,Human_NT_1,Human_NT_2,Human_NT_3,Mouse_ABCD1_1,Mouse_ABCD1_2,Mouse_ABCD1_3,Mouse_NT_1,Mouse_NT_2,Mouse_NT_3,Lee_ccALD_3,Lee_ccALD_2,Lee_ccALD_1,Lee_WT_3,Lee_WT_2,Lee_WT_1
A1BG,3.682,1.217,1.84,1.06,0.994,0.0,0.0,1.857,0.948,0.0,0.0,0.0,3.063,9.973,2.829,0.0,6.158,12.428
A1BG-AS1,7.364,23.12,7.359,19.086,16.899,13.011,,,,,,,88.818,107.704,89.59,94.592,132.402,133.839
A1CF,1.841,3.65,1.84,0.0,0.0,0.0,3.092,14.854,4.74,1.888,7.838,5.674,0.0,7.978,5.658,0.0,4.105,5.736
A2M,135.314,120.466,122.348,201.463,283.305,233.339,,,,,,,32.669,25.929,34.893,30.845,23.606,70.744
A2ML1,2.762,0.0,0.92,2.121,1.988,0.0,,,,,,,987.204,2428.328,1041.135,605.593,1265.513,582.202


In [29]:
serialize(merged, 'mouse_human_lee_merged_counts')

### Mouse, Human, Lee, Jang Merged

In [60]:
jang = pd.read_csv('./Jang/Jang.csv', index_col=0)
jang.head()

Unnamed: 0,Jang-hESCs_Control_71,Jang-hESCs_Control_72,Jang-hESCs_Control_73,Jang-iPSCs_Control_74,Jang-iPSCs_Control_75,Jang-iPSCs_Control_76,Jang-iPSCs_AMN_77,Jang-iPSCs_AMN_78,Jang-iPSCs_AMN_79,Jang-iPSCs_CCALD_80,Jang-iPSCs_CCALD_81,Jang-iPSCs_CCALD_82
A1BG,6.783346,6.776958,6.795812,6.728081,6.822618,6.733021,6.802505,6.777705,7.035105,6.718123,6.806478,6.807935
A1BG-AS1,6.576109,6.533098,6.452472,6.395807,6.646602,6.512293,6.548372,6.485149,6.42736,6.250021,6.582812,6.435398
A1CF,6.410723,6.460727,6.647604,6.474361,6.461917,6.553724,6.569535,6.380732,6.583363,6.459121,6.50203,6.564174
A2M,6.296271,6.198797,6.267357,6.22699,6.397614,6.078723,6.265612,6.303778,6.102758,6.176963,6.293784,6.317991
A2ML1,7.57093,7.473169,7.149522,7.324126,7.020197,7.128969,7.336106,7.137465,6.722119,6.671977,6.959798,6.954089


In [61]:
merged = pd.concat([human, mouse, lee, jang], axis=1, sort=True).dropna(how='all', subset=["Human_ABCD1_1", "Human_ABCD1_2", "Human_ABCD1_3", "Human_NT_1", "Human_NT_2", "Human_NT_3"])
merged.head()

Unnamed: 0,Human_ABCD1_1,Human_ABCD1_2,Human_ABCD1_3,Human_NT_1,Human_NT_2,Human_NT_3,Mouse_ABCD1_1,Mouse_ABCD1_2,Mouse_ABCD1_3,Mouse_NT_1,...,Jang-hESCs_Control_73,Jang-iPSCs_Control_74,Jang-iPSCs_Control_75,Jang-iPSCs_Control_76,Jang-iPSCs_AMN_77,Jang-iPSCs_AMN_78,Jang-iPSCs_AMN_79,Jang-iPSCs_CCALD_80,Jang-iPSCs_CCALD_81,Jang-iPSCs_CCALD_82
A1BG,3.682,1.217,1.84,1.06,0.994,0.0,0.0,1.857,0.948,0.0,...,6.795812,6.728081,6.822618,6.733021,6.802505,6.777705,7.035105,6.718123,6.806478,6.807935
A1BG-AS1,7.364,23.12,7.359,19.086,16.899,13.011,,,,,...,6.452472,6.395807,6.646602,6.512293,6.548372,6.485149,6.42736,6.250021,6.582812,6.435398
A1CF,1.841,3.65,1.84,0.0,0.0,0.0,3.092,14.854,4.74,1.888,...,6.647604,6.474361,6.461917,6.553724,6.569535,6.380732,6.583363,6.459121,6.50203,6.564174
A2M,135.314,120.466,122.348,201.463,283.305,233.339,,,,,...,6.267357,6.22699,6.397614,6.078723,6.265612,6.303778,6.102758,6.176963,6.293784,6.317991
A2ML1,2.762,0.0,0.92,2.121,1.988,0.0,,,,,...,7.149522,7.324126,7.020197,7.128969,7.336106,7.137465,6.722119,6.671977,6.959798,6.954089


In [62]:
serialize(merged, 'mouse_human_lee_jang_merged_counts')

### Classes

In [63]:
classes = merged.columns.to_frame()
classes['system'], classes['condition'], classes['replicate'] = classes[0].str.split('_').str
classes

Unnamed: 0,0,system,condition,replicate
Human_ABCD1_1,Human_ABCD1_1,Human,ABCD1,1
Human_ABCD1_2,Human_ABCD1_2,Human,ABCD1,2
Human_ABCD1_3,Human_ABCD1_3,Human,ABCD1,3
Human_NT_1,Human_NT_1,Human,NT,1
Human_NT_2,Human_NT_2,Human,NT,2
Human_NT_3,Human_NT_3,Human,NT,3
Mouse_ABCD1_1,Mouse_ABCD1_1,Mouse,ABCD1,1
Mouse_ABCD1_2,Mouse_ABCD1_2,Mouse,ABCD1,2
Mouse_ABCD1_3,Mouse_ABCD1_3,Mouse,ABCD1,3
Mouse_NT_1,Mouse_NT_1,Mouse,NT,1


In [64]:
classes['system'] = classes['system'].replace('Lee','iPSC').replace('Jang-iPSCs', 'iPSC').replace('Jang-hESCs', 'hESC')
classes['condition'] = classes['condition'].replace('ABCD1', 'ABCD1 KO').replace('ccALD', 'ABCD1 KO').replace('CCALD', 'ccALD').replace('NT', 'WT').replace('Control', 'WT')
classes[['system', 'condition']]

Unnamed: 0,system,condition
Human_ABCD1_1,Human,ABCD1 KO
Human_ABCD1_2,Human,ABCD1 KO
Human_ABCD1_3,Human,ABCD1 KO
Human_NT_1,Human,WT
Human_NT_2,Human,WT
Human_NT_3,Human,WT
Mouse_ABCD1_1,Mouse,ABCD1 KO
Mouse_ABCD1_2,Mouse,ABCD1 KO
Mouse_ABCD1_3,Mouse,ABCD1 KO
Mouse_NT_1,Mouse,WT


In [65]:
serialize(classes[['system', 'condition']], 'mouse_human_lee_jang_merged_classes', how='index')