In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from scipy.sparse import csr_matrix
from matplotlib.lines import Line2D

In [47]:
from sklearn import cluster, metrics
from sklearn.decomposition import PCA, FactorAnalysis
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr
from itertools import combinations_with_replacement


In [3]:
# Data was filtered to preserve one sample type per experimental condition 
data = pd.read_csv("../data/filtered_data.csv",index_col=0)

In [4]:
# Non-standard normalization in order to preserve sparsity, as z-scores fail to execute
# due to mathematical error of dividing by zero
scaler = StandardScaler(with_mean=False)
scaler.fit(data.transpose())
norm_data = scaler.transform(data.transpose())

In [49]:
from sklearn.decomposition import FastICA
ica_transformer_1 = FastICA(n_components=30,random_state=0).fit(norm_data[0:300].transpose())
M1 = pd.DataFrame(ica_transformer_1.transform(norm_data[0:300].transpose()))
ica_transformer_2 = FastICA(n_components=30,random_state=1).fit(norm_data[0:300].transpose())
M2 = pd.DataFrame(ica_transformer_2.transform(norm_data[0:300].transpose()))

In [111]:
A1 = pd.DataFrame(ica_transformer_1.mixing_)
A1[1]

0     -0.135568
1     -0.401332
2     -0.063816
3     -1.499432
4     -0.728284
         ...   
295   -0.591462
296   -0.091589
297   -0.442536
298   -0.953881
299    0.151585
Name: 1, Length: 300, dtype: float64

In [105]:
A2 = pd.DataFrame(ica_transformer_2.mixing_)
A2[10]

0      5.931141
1     -0.615379
2     -4.480602
3     -5.861077
4     -5.555133
         ...   
295   -6.079356
296   -2.358306
297   -3.245262
298   -1.228660
299   -2.003154
Name: 10, Length: 300, dtype: float64

In [57]:
metrics = pd.DataFrame(index = range(0,len(M1.columns)),columns= range(0,len(M1.columns)))
for i in range(0,len(M1.columns)):
    for j in range(0,len(M2.columns)):
        metrics.loc[i][j] = abs(pearsonr(M1[i],M2[j])[0])
        metrics.loc[j][i] = abs(pearsonr(M1[i],M2[j])[0])
metrics= metrics.fillna(0)
metrics

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,0.001517,0.709877,0.0005243677,0.000663,0.001238,0.0004869593,0.0001662366,0.0003959634,0.001202122,0.00128514,...,0.0005200364,0.001778,0.0009151442,0.001442124,0.02471506,0.0001335657,0.001214525,0.000609552,0.0004214724,0.0002205878
1,0.709877,0.001626,4.24622e-05,0.000116,0.000616,3.663106e-05,6.944763e-05,3.26837e-05,5.264793e-05,4.10735e-06,...,6.057984e-05,5e-05,0.0001624373,2.525921e-05,0.009853058,2.374769e-05,7.632154e-05,1.613995e-05,2.897069e-05,3.480131e-05
2,0.000524,4.2e-05,2.609923e-05,4e-06,0.00016,1.334915e-07,1.125539e-05,9.892056e-06,2.035418e-05,8.184562e-06,...,2.868039e-06,3e-05,5.556858e-05,1.753094e-05,8.294961e-05,4.064873e-06,0.9999976,1.09854e-05,4.729903e-06,3.804989e-06
3,0.000663,0.000116,4.4756e-06,0.003291,0.248728,0.0003605387,0.002374167,0.0006686783,0.007143264,0.00174266,...,0.001663896,0.013805,0.01122348,0.0009824951,0.385373,0.0005386638,0.001263857,0.002886846,9.147541e-05,0.001116694
4,0.001238,0.000616,0.0001597105,0.248728,0.051639,0.0007782031,0.001954912,0.0004154666,0.003295827,0.002584327,...,0.002428465,0.003277,0.001358626,0.001495476,0.9201263,0.0001686574,0.0005536483,8.050691e-05,0.0005594895,0.0004664695
5,0.000487,3.7e-05,1.334915e-07,0.000361,0.000778,2.927574e-06,5.634875e-07,1.403956e-05,4.491249e-05,6.805686e-05,...,1.838587e-05,0.000129,8.383431e-05,9.421479e-06,0.003482365,4.443463e-06,5.934715e-06,7.397431e-06,1.945641e-05,5.190291e-06
6,0.000166,6.9e-05,1.125539e-05,0.002374,0.001955,5.634875e-07,0.0004117675,4.225235e-05,0.0001956929,1.202498e-05,...,8.798015e-05,0.000778,0.0007620368,0.0001743761,0.01730932,2.84657e-05,0.0001555897,0.0001210174,0.0001011226,1.878775e-05
7,0.000396,3.3e-05,9.892056e-06,0.000669,0.000415,1.403956e-05,4.225235e-05,0.9999996,1.438931e-05,1.63572e-06,...,1.142022e-05,8e-06,5.954839e-05,7.904692e-06,0.0006423654,6.182712e-07,8.854761e-06,2.196673e-06,6.042023e-08,2.463148e-06
8,0.001202,5.3e-05,2.035418e-05,0.007143,0.003296,4.491249e-05,0.0001956929,1.438931e-05,2.551866e-05,3.234135e-05,...,3.823124e-05,5e-06,5.477883e-05,1.391263e-05,0.004089683,4.171388e-07,2.605789e-05,2.315585e-05,1.417292e-05,3.379333e-06
9,0.001285,4e-06,8.184562e-06,0.001743,0.002584,6.805686e-05,1.202498e-05,1.63572e-06,3.234135e-05,1.013647e-06,...,3.477997e-06,1e-05,1.05421e-05,7.723674e-07,2.500299e-05,2.157732e-06,2.522313e-06,4.509156e-06,1.62717e-06,0.9999992


In [112]:
M = pd.DataFrame(index=M1.index)
for i,item in metrics.iteritems():
    for j in item.index:
        if item[j] == max(item) and max(item) > .9:
            M1_abs_max = float(M1[i].loc[M1[i].abs().nlargest(1).index])
            M2_abs_max = float(M2[j].loc[M2[j].abs().nlargest(1).index])
            A1_abs_max = float(A1[i].loc[A1[i].abs().nlargest(1).index])
            A2_abs_max = float(A2[j].loc[A2[j].abs().nlargest(1).index])
            if M1_abs_max < 0:
                M1_corrected = -M1[i]
            else:
                M1_corrected = M1[i]
            if M2_abs_max < 0:
                M2_corrected = -M2[i]
            else:
                M2_corrected = M2[i]
            if A1_abs_max < 0:
                A1_corrected = -A1[i]
            else:
                A1_corrected = A1[i]
            if A2_abs_max < 0:
                A2_corrected = -A2[i]
            else:
                A2_corrected = A2[i]
            M[str(i)+"_"+str(j)] = (M1_corrected+M2_corrected)/2 
            A[str(i)+"_"+str(j)] = (A1_corrected+M2_corrected)/2 

In [113]:
A

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17_21,20_23,21_17,22_10,23_28,24_4,26_2,27_16,28_23,29_9
0,0.102851,-0.135568,-0.362526,-0.206713,-5.175461,3.698771,-3.457578,0.436994,-0.784585,-0.075767,...,0.251001,0.758656,-0.010461,-2.960234,-0.429580,-0.172649,0.208774,0.284835,0.270197,0.311323
1,0.532319,-0.401332,-0.267678,-0.324219,0.275862,2.813886,-0.960526,-1.503000,0.345004,-0.494619,...,-0.168267,0.413339,0.415178,0.303958,0.089364,1.367448,0.130191,-0.097193,0.296258,167.365199
2,0.719260,-0.063816,0.136733,0.496026,6.264853,1.386231,0.630185,-3.114027,1.107545,-1.451012,...,0.430867,0.804793,0.693198,2.241473,0.058970,2.914002,0.650727,-0.015239,-0.281985,0.412887
3,1.353137,-1.499432,-0.192706,1.079097,4.496191,3.131673,-2.583370,-1.166425,0.645099,-0.042078,...,-0.461384,1.225166,2.448517,2.934933,0.682524,2.562134,1.877591,-0.210831,0.850591,1.282955
4,-0.064136,-0.728284,-0.588772,2.243621,0.898927,0.292039,-0.702866,-0.934779,-1.566747,-1.137498,...,-0.659593,0.478735,1.070296,2.783329,0.365318,1.411566,1.409696,0.638181,0.637690,3.926503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,2.003539,-0.591462,0.481936,0.431935,2.926714,1.470612,-1.237476,-2.022823,-2.278118,1.477271,...,0.496009,0.274336,2.635693,3.020770,1.276655,6.231628,1.318643,0.205489,0.969638,0.002818
296,2.255845,-0.091589,1.400618,1.523412,3.532671,1.538871,-1.654932,-1.655181,-1.125908,1.007382,...,0.461843,0.372753,2.159561,1.169725,0.806665,5.711722,1.523637,0.707261,0.129723,0.082810
297,1.991770,-0.442536,1.305170,2.104647,3.071718,1.991205,-0.343082,-0.654182,-3.519359,0.355210,...,0.697461,0.546030,1.787114,1.610106,1.232113,5.730579,0.377044,0.541108,-0.035262,-0.176813
298,1.332100,-0.953881,5.523860,0.415658,6.263912,2.393721,-1.890481,-0.886774,-3.924620,1.056737,...,0.833577,-0.166900,3.024156,0.590097,0.790032,7.530871,0.444520,0.125560,0.634574,0.051482
