In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
# 2 most common patterns for np.sign(fc) for each compound with 4 sign. tests
PATTERN1=np.array([-1,-1,1,1])
PATTERN2=-1*PATTERN1
COMMON4 = ['aSYN--comb.', 'aSYN--INFg', 'comb.--UT', 'INFg--UT']

# Uncommon pattern
PATTERN3 = np.array([-1,-1,-1,-1])
UNCOMMON4 = ['aSYN--INFg', 'aSYN--UT',  'comb.--INFg.',  'comb.--UT']

ALL=['aSYN--comb.', 'aSYN--INFg', 'aSYN--UT',  'comb.--INFg.', 'comb.--UT', 'INFg--UT']

In [3]:
def index_dict(d_obj, ind, dtype):
    if dtype==str:
        return list(d_obj)[ind]
    return np.fromiter(d_obj, dtype=dtype)[ind]

def show(D, ind):
    for each in np.fromiter(D.values(), dtype=list)[ind]:
        print(each)

def show_effects(counts, positives):
    # counts sould be dict of the form
    # TEST: count (test and how many time found overall)
    print("Number of tests with positive fc in parenthesis")
    for key in counts.keys():
        print("{}: {} ({})".format(key, counts[key], positives[key]))
    
def show_counts(counts):
    # counts should be dict of the form
    # MTIME: count (compound and how many significant tests it had)
    n = max(counts.values())
    counts_arr=np.fromiter(counts.values(), dtype=int)
    
    n = max(counts.values())
    n_keys = len(counts.keys())
    
    print("number of compounds (masstime): {}".format(n_keys))
    print("max number of tests per compound: {}".format(n))
    
    for i in range(1,n+1):
        print("{} test in {} cases".format(i, sum(counts_arr==i)))
    return n
    
def counts2indices(counts):
    # counts sould be dict of the form
    # MTIME: count (compound and how many significant tests it had)
    n = max(counts.values())
    
    counts_arr=np.fromiter(counts.values(), dtype=int)
    result = [None]
    for i in range(1,n+1):
        result.append(np.where(counts_arr==i))
    
    return result

def show_tests(test, fc, inds):
    # test should be a dict of the form
    # MTIME: test (test is list of all test patterns)
    #for i in np.fromiter(test, dtype=list)[inds]:
    for num,i in enumerate(test.keys()):
        if(num not in inds[0]):
            continue
        for j in range(len(test[i])):
            tmp = test[i][j]
            mjono = tmp.split('--')
            sign = '+' if fc[i][j]>=0 else '-'
            print("{}-({})-{}  ".format(mjono[0],sign, mjono[1]),end='')
        print("({})".format(i))
        
def list_tests(test, fc, inds):
    # test should be a dict of the form
    # MTIME: test (test is list of all test patterns)
    result=defaultdict()
    #for i in np.fromiter(test, dtype=list)[inds]:
    for num, i in enumerate(test.keys()):
        if(num not in inds[0]):
            continue
        result[i]=[]
        for j in range(len(test[i])):
            tmp = test[i][j]
            mjono = tmp.split('--')
            sign = '+' if fc[i][j]>=0 else '-'
            s = "{}-({})-{}".format(mjono[0],sign, mjono[1])
            result[i].append(s)
    return result
        
        
def count_quad_patterns(counts, fc, tests, PATTERN):
    # Returns the number of times a PATTERN is found
    # in fc for each compound with 4 significant tests.
    # Pattern is a sign function over fold change, and
    # the 2 important ones are 
    # pattern1 = [-1,-1,1,1]
    # and
    # pattern2 = [1,1,-1,-1]
    
    common = COMMON4#['aSYN--comb.', 'aSYN--INFg', 'comb.--UT', 'INFg--UT']
    s=0
    n_compounds=0
    n_common=0
    
    for compound in counts:
        if(counts[compound]==4):
            n_compounds += 1
            if(tests[compound]!=common):
                continue
            n_common += 1
            if (np.sign(fc[compound])==PATTERN).sum() == 4:
                s+=1
    return s, s/n_common, s/n_compounds


def count_quad_outlier(counts, fc, tests):
    # Returns the number of times an uncommon pattern is found
    # pattern3 = [-1,-1,-1,-1]
    common = UNCOMMON4
    s=0
    n_compounds=0
    n_common=0
    
    for compound in counts:
        if(counts[compound]==4):
            n_compounds += 1
            if(tests[compound]!=common):
                continue
            n_common += 1
            if (np.sign(fc[compound])==PATTERN3).sum() == 4:
                s+=1
    return s, s/n_common, s/n_compounds

def name_quad_outlier(counts, fc, tests, names):
    # Returns the number of times an uncommon pattern is found
    # pattern3 = [-1,-1,-1,-1]
    common = UNCOMMON4
    result = []
    result2 = []
    s=0
    for compound in counts:
        if(tests[compound]!=common):
            continue
        if (np.sign(fc[compound])==PATTERN3).sum() == 4:
            s+=1
            result.append(names[compound])
            result2.append(compound)
    #print("number of uncommon findings: {}".format(s))
    return result,result2


def tests2sparse(tests):
    full=defaultdict()
    
    for compound in tests:
        #seed = ['aSYN--comb.', 'aSYN--INFg', 'aSYN--UT',  'comb.--INFg.', 'comb.--UT', 'INFg--UT']
        TMP = ['aSYN-(-)-comb.', 'aSYN-(-)-INFg', 'aSYN-(-)-UT',  'comb.-(-)-INFg.', 'comb.-(-)-UT', 'INFg-(-)-UT',\
               'aSYN-(+)-comb.', 'aSYN-(+)-INFg', 'aSYN-(+)-UT',  'comb.-(+)-INFg.', 'comb.-(+)-UT', 'INFg-(+)-UT']
        seed = ['****None****','****None****','****None****','****None****','****None****','****None****']
        for i in range(6):
            #if(ALL[i] in tests[compound]):
            #    continue
            #else:        
            #    seed[i]='***None***'
            if(TMP[i] in tests[compound]):
                seed[i]=TMP[i]
            elif(TMP[i+6] in tests[compound]):
                seed[i]=TMP[i+6]
            
        full[compound]=seed
        
    return full

def tests2sparse_unsigned(tests):
    full=defaultdict()
    
    for compound in tests:
        seed = ['aSYN--comb.', 'aSYN--INFg', 'aSYN--UT',  'comb.--INFg.', 'comb.--UT', 'INFg--UT']
        for i in range(6):
            if(ALL[i] in tests[compound]):
                continue
            else:        
                seed[i]='***None***'
            
        full[compound]=seed
        
    return full



### Without GAP fill information

In [15]:
#path = '../PDproj/cellresults/ttest/dumps/'
path = '../../PDproj/cellresults/ttest/ALPHA0.05FC1.0MINVAR0.125/'
filenames = [line.rstrip() for line in open(path +'filenames.txt')]
filenames

['cellshilicneg_log2.csvALPHA0.05FC1.0MINVAR0.125.csv',
 'cellshilicpos_log2.csvALPHA0.05FC1.0MINVAR0.125.csv',
 'cellslipidomicsneg_log2.csvALPHA0.05FC1.0MINVAR0.125.csv',
 'cellslipidomicspos_log2.csvALPHA0.05FC1.0MINVAR0.125.csv',
 'cellsRPneg_log2.csvALPHA0.05FC1.0MINVAR0.125.csv',
 'cellsRPpos_log2.csvALPHA0.05FC1.0MINVAR0.125.csv',
 'mediumHILICneg_log2.csvALPHA0.05FC1.0MINVAR0.125.csv',
 'mediumHILICpos_log2.csvALPHA0.05FC1.0MINVAR0.125.csv',
 'mediumRPneg_log2.csvALPHA0.05FC1.0MINVAR0.125.csv',
 'mediumRPpos_log2.csvALPHA0.05FC1.0MINVAR0.125.csv']

### With GAP fill information

In [4]:
#path = '../../PDproj/cellresults/ttest/withGF/ALPHA0.05FC1.5MINVAR0.250/'
path = '../../PDproj/cellresults/ttest/withGF/ALPHA0.05FC1.0MINVAR0.125/'
filenames = [line.rstrip() for line in open(path +'filenames.txt')]
filenames

['cellshilicnegALPHA0.05FC1.0MINVAR0.125.csv',
 'cellshilicposALPHA0.05FC1.0MINVAR0.125.csv',
 'cellslipidnegALPHA0.05FC1.0MINVAR0.125.csv',
 'cellslipidposALPHA0.05FC1.0MINVAR0.125.csv',
 'cellsRPnegALPHA0.05FC1.0MINVAR0.125.csv',
 'cellsRPposALPHA0.05FC1.0MINVAR0.125.csv',
 'mediumhilicnegALPHA0.05FC1.0MINVAR0.125.csv',
 'mediumhilicposALPHA0.05FC1.0MINVAR0.125.csv',
 'mediumRPnegALPHA0.05FC1.0MINVAR0.125.csv',
 'mediumRPposALPHA0.05FC1.0MINVAR0.125.csv']

### Extract information to dicts with MTIME as key, and some basic information with TEST as key

First define the necessary column names

In [60]:
col_masstime='masstime'
col_test='TEST'
col_FC='LOG-FC'
col_compound='Name'
missing_values=False

In [5]:
col_masstime=' masstime'
col_test=' test'
col_FC=' FC'
col_compound=' compound'
missing_values=False
mv_tol=10

In [8]:
filenum = 3
data = pd.read_csv(path + filenames[filenum], sep=';')
#print(filenames[filenum])

MTIME_test = defaultdict()
MTIME_counts = defaultdict()
MTIME_fc = defaultdict()
MTIME_name = defaultdict()
n_compounds=0
for mt in np.unique(data[col_masstime]):
    MTIME_test[mt]=[]
    MTIME_fc[mt]=[]
    MTIME_counts[mt]=0
    n_compounds+=1
    for i in range(data.shape[0]):
        if(data[col_masstime][i]==mt):
            MTIME_test[mt].append(data[col_test][i])
            MTIME_fc[mt].append(data[col_FC][i])
            MTIME_counts[mt]+=1
            MTIME_name[mt]=data[col_compound][i]

# Collect indices for each different count
inds=counts2indices(MTIME_counts)

TEST_counts = defaultdict()
TEST_pos = defaultdict()
for test in np.unique(data[col_test]):
    TEST_counts[test]=0
    TEST_pos[test]=0
    for i in range(data.shape[0]):
        if(data[col_test][i]==test):
            TEST_counts[test]+=1
            if(data[col_FC][i]>=0):
                TEST_pos[test]+=1

########################################################################
# Add count info to dataframe
data['count']=np.zeros((data.shape[0],1),dtype=int)
for i in range(data.shape[0]):
    data['count'][i]=MTIME_counts[data[col_masstime][i]]
    
########################################################################
#MTIME_full_test = tests2sparse(MTIME_test)
if missing_values:
    mvalues = data[' mv1'] + data[' mv2']

    MTIME_mv = defaultdict()
    dubious_compounds=[]
    for mt in np.unique(data[col_masstime]):
        MTIME_mv[mt]=[]
        for i in range(data.shape[0]):
            if(data[col_masstime][i]==mt):
                if data[' mv1'][i] + data[' mv2'][i] >= mv_tol:
                    MTIME_mv[mt].append(True)
                    dubious_compounds.append(mt)
                else:
                    MTIME_mv[mt].append(False)
        

    mv_counts=[]
    for each in MTIME_mv.values():
        mv_counts.append(np.sum(each))
    mv_counts=np.asarray(mv_counts, dtype=int)
    mv_bool=mv_counts>=1
    dubious_compounds=np.unique(dubious_compounds)
    
#############################################################################
print("----------------------------\nresults for file: \n{}\n".format(filenames[filenum]))

print("Number of tests: ",data.shape[0])
max_tests = show_counts(MTIME_counts)
print()
show_effects(TEST_counts,TEST_pos)
print()
for j in range(1,max_tests+1):
    FC = data.iloc[np.where(data['count']==j)][col_FC].to_numpy()
    #print("mean positive FC: {}".format(FC[FC>=0].mean()))
    #print("mean negative FC: {}".format(FC[FC<0].mean()))
    print("{} test(s) present, mean absolute FC: {}".format(j,np.abs(FC).mean()))
    
print()
q1 = count_quad_patterns(MTIME_counts, MTIME_fc, MTIME_test, PATTERN1)
q2 = count_quad_patterns(MTIME_counts, MTIME_fc, MTIME_test, PATTERN2)
r1, r2 = q1[2], q2[2]
print('common pattern {}'.format(COMMON4))
print("Number of {}: {} ({}%)".format(PATTERN1, q1[0],round(r1,3)))
print("Number of {}: {} ({}%)".format(PATTERN2, q2[0],round(r2,3)))
print("Number of outliers: {} ({}%)".format(inds[4][0].shape[0]-q1[0]-q2[0], round(1-r1-r2,3)))

if missing_values:
    print("\nProportion (number) of tests with missing with values: ",np.round(np.mean(mvalues>=mv_tol),3),\
                                                                       " (",np.sum(mvalues>=mv_tol),')')

    print("\n----------------------------\nresults for file: \n{}".format(filenames[filenum]))
    print("\nDubious compounds:")
    for cmp in dubious_compounds:
        print(cmp, MTIME_name[cmp])

----------------------------
results for file: 
cellslipidposALPHA0.05FC1.0MINVAR0.125.csv

Number of tests:  1320
number of compounds (masstime): 414
max number of tests per compound: 4
1 test in 46 cases
2 test in 78 cases
3 test in 42 cases
4 test in 248 cases

Number of tests with positive fc in parenthesis
INFg--UT: 294 (232)
aSYN--INFg: 306 (69)
aSYN--comb.: 361 (80)
comb.--UT: 359 (279)

1 test(s) present, mean absolute FC: 1.550342739130435
2 test(s) present, mean absolute FC: 1.6825803589743589
3 test(s) present, mean absolute FC: 1.6416019206349204
4 test(s) present, mean absolute FC: 2.1004659183467744

common pattern ['aSYN--comb.', 'aSYN--INFg', 'comb.--UT', 'INFg--UT']
Number of [-1 -1  1  1]: 34 (0.137%)
Number of [ 1  1 -1 -1]: 12 (0.048%)
Number of outliers: 202 (0.815%)


In [27]:
q3=count_quad_outlier(MTIME_counts,MTIME_fc,MTIME_test)
r3=q3[2]
print('*uncommon* pattern {}'.format(UNCOMMON4))
print("Number of {}: {} ({}%)]".format(PATTERN3, q3[0], round(r3,3)))
print(name_quad_outlier(MTIME_counts,MTIME_fc,MTIME_test,MTIME_name))

*uncommon* pattern ['aSYN--INFg', 'aSYN--UT', 'comb.--INFg.', 'comb.--UT']
(['[Similar to: Sultamicillin: ΔMass: -435.1099 Da]', 'Indole-3-acetic acid', 'Penicillin G', nan, nan, nan], ['160.04277@6.651', '176.07067@6.651', '335.10592@6.651', '380.1636@6.654', '435.0306@6.654', '515.16911@5.676'])


### Inspect the results

Common tests:
* INFg--UT: 
* aSYN--INFg: 
* aSYN--comb.: 
* comb.--UT: 

In [56]:
print("----------------------------\nresults for file: \n{}\n".format(filenames[filenum]))

print("Number of tests: ",data.shape[0])
max_tests = show_counts(MTIME_counts)
print()
show_effects(TEST_counts,TEST_pos)
print()
for j in range(1,max_tests+1):
    FC = data.iloc[np.where(data['count']==j)][col_FC].to_numpy()
    #print("mean positive FC: {}".format(FC[FC>=0].mean()))
    #print("mean negative FC: {}".format(FC[FC<0].mean()))
    print("{} test(s) present, mean absolute FC: {}".format(j,np.abs(FC).mean()))
    
print()
q1 = count_quad_patterns(MTIME_counts, MTIME_fc, MTIME_test, PATTERN1)
q2 = count_quad_patterns(MTIME_counts, MTIME_fc, MTIME_test, PATTERN2)
r1, r2 = q1[2], q2[2]
print('common pattern {}'.format(COMMON4))
print("Number of {}: {} ({}%)".format(PATTERN1, q1[0],round(r1,3)))
print("Number of {}: {} ({}%)".format(PATTERN2, q2[0],round(r2,3)))
print("Number of outliers: {} ({}%)".format(inds[4][0].shape[0]-q1[0]-q2[0], round(1-r1-r2,3)))

print("\nProportion (number) of tests with missing with values: ",np.round(np.mean(mvalues>=mv_tol),3),\
                                                                       " (",np.sum(mvalues>=mv_tol),')')

print("\n----------------------------\nresults for file: \n{}".format(filenames[filenum]))
print("\nDubious compounds:")
for cmp in dubious_compounds:
    print(cmp, MTIME_name[cmp])

----------------------------
results for file: 
cellsRPpos_log2.csvALPHA0.05FC1.0MINVAR0.125.csv

Number of tests:  453
number of compounds (masstime): 180
max number of tests per compound: 4
1 test in 48 cases
2 test in 54 cases
3 test in 15 cases
4 test in 63 cases

Number of tests with positive fc in parenthesis
INFg--UT: 99 (77)
aSYN--INFg: 75 (12)
aSYN--UT: 1 (0)
aSYN--comb.: 115 (14)
comb.--INFg.: 1 (0)
comb.--UT: 162 (135)

1 test(s) present, mean absolute FC: 1.0534166666666667
2 test(s) present, mean absolute FC: 1.267787037037037
3 test(s) present, mean absolute FC: 1.5702888888888884
4 test(s) present, mean absolute FC: 2.2801468253968253

common pattern ['aSYN--comb.', 'aSYN--INFg', 'comb.--UT', 'INFg--UT']
Number of [-1 -1  1  1]: 52 (0.825%)
Number of [ 1  1 -1 -1]: 10 (0.159%)
Number of outliers: 1 (0.016%)

Proportion (number) of tests with missing with values:  0.0  ( 0 )

----------------------------
results for file: 
cellsRPpos_log2.csvALPHA0.05FC1.0MINVAR0.125.csv


In [41]:
q3=count_quad_outlier(MTIME_counts,MTIME_fc,MTIME_test)
r3=q3[2]
print('*uncommon* pattern {}'.format(UNCOMMON4))
print("Number of {}: {} ({}%)]".format(PATTERN3, q3[0], round(r3,3)))
print(name_quad_outlier(MTIME_counts,MTIME_fc,MTIME_test,MTIME_name))

ZeroDivisionError: division by zero

In [244]:
print("----------------------------\nresults for file: \n{}".format(filenames[filenum]))
print("\nDubious compounds:")
for cmp in dubious_compounds:
    print(cmp, MTIME_name[cmp])

----------------------------
results for file: 
cellsRPnegALPHA0.05FC1.0MINVAR0.125.csv

Dubious compounds:
323.07971@0.562 nan
447.38528@11.182 stigmastane-3beta,5alpha,6beta-triol


In [136]:
print("----------------------------\nresults for file: \n{}".format(filenames[filenum]))
print("\nDubious compounds:")
for cmp in dubious_compounds:
    print(cmp)

----------------------------
results for file: 
cellslipidnegALPHA0.05FC1.0MINVAR0.125.csv

Dubious compounds:
1289.84621@17.639
1329.8026@16.62
1331.8182@17.725
1399.80566@17.726
1450.1332@14.64
1454.01302@22.421
1519.12313@14.645
1564.15253@16.206
584.52625@16.891
584.52628@16.894
662.47713@14.948
676.5291@17.223
686.62299@21.06
692.62049@19.31
694.63616@20.133
702.50845@15.827
704.56047@18.389
706.48578@16.986
706.67261@21.407
709.62933@21.058
718.53996@17.599
721.55095@13.629
731.56039@16.493
736.51428@13.855
741.52771@15.549
744.55584@17.692
780.59193@18.2
786.5661@15.838
798.60286@18.725
808.51165@15.801
808.51177@15.516
811.53064@16.518
816.4493@16.482
819.51905@11.976
820.54816@17.392
822.56376@17.881
824.46729@15.112
840.61276@16.474
863.55938@15.294
870.54829@14.939
880.53027@17.69
914.53587@16.113


### Uncommon pattern

In [64]:
name_quad_outlier(MTIME_counts,MTIME_fc,MTIME_test,MTIME_name)

([], [])

In [333]:
n_tests=4
tmp=list_tests(MTIME_test, MTIME_fc, inds[n_tests])
for lst in tests2sparse(tmp).values():
    for val in lst:
        print("{}   ".format(val), end='')
    print()

aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
****None****   aSYN-(-)-INFg   aSYN-(-)-UT   comb.-(-)-INFg.   comb.-(-)-UT   ****None****   
aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
****None****   aSYN-(-)-INFg   aSYN-(-)-UT   comb.-(-)-INFg.   comb.-(-)-UT   ****None****   
****None****   aSYN-(-)-INFg   aSYN-(-)-UT   comb.-(-)-INFg.   comb.-(-)-UT   ****None****   
****None****   aSYN-(-)-INFg   aSYN-(-)-UT   comb.-(-)-INFg.   comb.-(-)-UT   ****None****   


# Manual checks

In [218]:
n_tests=4
tmp=list_tests(MTIME_test, MTIME_fc, inds[n_tests])
for lst in tests2sparse(tmp).values():
    for val in lst:
        print("{}   ".format(val), end='')
    print()

aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
aSYN-(+)-comb.   aSYN-(+)-INFg   ****None****   ****None****   comb.-(-)-UT   INFg-(-)-UT   
aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(+)-UT   INFg-(+)-UT   
aSYN-(-)-comb.   aSYN-(-)-INFg   ****None****   ****None****   comb.-(

In [300]:
for lst in tests2sparse_unsigned(MTIME_test).values():
    for val in lst:
        print("{}   ".format(val), end='')
    print()

aSYN--comb.   aSYN--INFg   ***None***   ***None***   comb.--UT   INFg--UT   
aSYN--comb.   aSYN--INFg   ***None***   ***None***   comb.--UT   INFg--UT   
***None***   aSYN--INFg   aSYN--UT   comb.--INFg.   comb.--UT   ***None***   
aSYN--comb.   aSYN--INFg   ***None***   ***None***   comb.--UT   INFg--UT   
***None***   aSYN--INFg   ***None***   ***None***   ***None***   ***None***   
***None***   aSYN--INFg   aSYN--UT   comb.--INFg.   comb.--UT   ***None***   
***None***   aSYN--INFg   aSYN--UT   comb.--INFg.   comb.--UT   ***None***   
***None***   aSYN--INFg   aSYN--UT   comb.--INFg.   comb.--UT   ***None***   


In [171]:
for i in range(max_tests,0,-1):
    show_tests(MTIME_test, MTIME_fc, inds[i])
    print("-------------------------------------")

aSYN-(-)-comb.  aSYN-(-)-INFg  comb.-(+)-UT  INFg-(+)-UT  (1151.70665@12.726)
INFg-(+)-UT  comb.-(+)-UT  aSYN-(-)-comb.  aSYN-(-)-INFg  (1153.72215@13.283)
aSYN-(-)-comb.  aSYN-(-)-INFg  comb.-(+)-UT  INFg-(+)-UT  (1179.73769@14.233)
aSYN-(-)-comb.  aSYN-(-)-INFg  comb.-(+)-UT  INFg-(+)-UT  (1207.76851@15.542)
comb.-(+)-UT  INFg-(+)-UT  aSYN-(-)-comb.  aSYN-(-)-INFg  (1219.69346@12.732)
comb.-(+)-UT  INFg-(+)-UT  aSYN-(-)-comb.  aSYN-(-)-INFg  (1233.78384@15.494)
aSYN-(-)-comb.  aSYN-(-)-INFg  comb.-(+)-UT  INFg-(+)-UT  (1235.79971@16.695)
INFg-(+)-UT  aSYN-(-)-INFg  aSYN-(-)-comb.  comb.-(+)-UT  (1237.81503@17.059)
aSYN-(-)-comb.  aSYN-(-)-INFg  comb.-(+)-UT  INFg-(+)-UT  (1259.79955@15.672)
aSYN-(-)-INFg  aSYN-(-)-comb.  INFg-(+)-UT  comb.-(+)-UT  (1261.81512@16.618)
comb.-(+)-UT  INFg-(+)-UT  aSYN-(-)-comb.  aSYN-(-)-INFg  (1261.81555@16.085)
aSYN-(-)-comb.  aSYN-(-)-INFg  comb.-(+)-UT  INFg-(+)-UT  (1263.83096@17.724)
INFg-(+)-UT  aSYN-(-)-INFg  aSYN-(-)-comb.  comb.-(+)-UT  (1263.

In [501]:
MTIME_name['160.04277@6.651']

'[Similar to: Sultamicillin: ΔMass: -435.1099 Da]'

In [502]:
MTIME_name['176.07067@6.651']

'Indole-3-acetic acid'

In [503]:
MTIME_name['189.06927@6.649']

nan

In [504]:
MTIME_name['335.10592@6.651']

'Penicillin G'

In [505]:
MTIME_name['380.1636@6.654']

nan

In [500]:
print(MTIME_name['419.05785@6.651'])
print(MTIME_name['434.03366@6.651'])
print(MTIME_name['435.0306@6.654'])
print(MTIME_name['515.16911@5.676'])

nan
nan
nan
nan


In [None]:
MTIME_name['']

In [506]:
i=3
show_tests(MTIME_test,MTIME_fc,inds[i])
print()
#show_tests(MTIME_fc,inds[i])

aSYN-(+)-comb.  aSYN-(+)-INFg  comb.-(-)-UT  (298.27399@10.969)
aSYN-(-)-comb.  comb.-(+)-UT  INFg-(+)-UT  (774.655@6.207)
aSYN-(-)-comb.  comb.-(+)-UT  INFg-(+)-UT  (926.85226@6.246)



In [473]:
show(MTIME_test,inds[i])
print()
show(MTIME_fc,inds[i])

['aSYN--comb.', 'aSYN--INFg', 'comb.--UT', 'INFg--UT']
['aSYN--comb.', 'aSYN--INFg', 'comb.--UT', 'INFg--UT']
['aSYN--INFg', 'aSYN--UT', 'comb.--INFg.', 'comb.--UT']
['aSYN--comb.', 'aSYN--INFg', 'comb.--UT', 'INFg--UT']
['aSYN--INFg', 'aSYN--UT', 'comb.--INFg.', 'comb.--UT']
['aSYN--INFg', 'aSYN--UT', 'comb.--INFg.', 'comb.--UT']
['aSYN--INFg', 'aSYN--UT', 'comb.--INFg.', 'comb.--UT']

[-1.833, -1.886, 1.814, 1.866]
[-4.029, -4.049, 3.997, 4.017]
[-4.574, -4.037, -4.544, -4.007]
[-5.3, -5.247, 5.315, 5.261]
[-4.759, -4.172, -4.73, -4.143]
[-4.696, -4.359, -4.437, -4.1]
[-4.612, -4.32, -4.337, -4.045]


In [43]:
MTIME_name

defaultdict(None,
            {'116.07097@4.86': 'Proline',
             '118.06147@5.74': 'Guanidineacetic acid',
             '123.05559@0.665': 'Nicotinamide',
             '123.05561@0.592': 'Nicotinamide',
             '127.05048@5.447': 'Imidazoleacetic acid',
             '144.10208@5.123': '1-Aminocyclohexanecarboxylic acid',
             '144.10209@4.64': 'Stachydrine',
             '146.09257@3.718': '4-Guanidinobutyric acid',
             '179.04861@6.716': 'L-Cysteinylglycine',
             '204.12317@3.219': 'Diethyl glutamate',
             '204.1232@2.791': 'Acetyl-L-carnitine',
             '205.09724@4.043': 'DL-Tryptophan',
             '209.09228@4.045': 'Kynurenine',
             '219.11294@0.719': 'N-Acetylserotonin',
             '248.14927@4.052': 'N1-(1,3,5-Trimethyl-1H-pyrazol-4-yl)-2-cyano-3-(dimethylamino)acrylamide',
             '300.12062@5.099': '3-Acetoxy-2-hydroxypropyl 2-(trimethylammonio)ethyl phosphate',
             '308.091@6.716': 'L-Glutathione (

### Check if same compounds present in multiple modes

In [8]:
allNames4=defaultdict()
allNames=defaultdict()
n_global_compounds=0

for filenum in range(10):

    data = pd.read_csv(path + filenames[filenum], sep=';')
    print(filenames[filenum])

    
    MTIME_counts = defaultdict()
    n_compounds=0
    for mt in np.unique(data[' masstime']):
        MTIME_counts[mt]=0
        n_compounds+=1
        for i in range(data.shape[0]):
            if(data[' masstime'][i]==mt):
                MTIME_counts[mt]+=1
                
    n_global_compounds += n_compounds


    names = []
    for compound in MTIME_counts:
        names.append(compound)

    for compound in names:
        if (compound in allNames.keys()):
            allNames[compound]+=1
        else:
            allNames[compound]=1

    names4 = []
    for compound in MTIME_counts:
        if(MTIME_counts[compound]==4):
            names4.append(compound)

    for compound in names4:
        if (compound in allNames4.keys()):
            allNames4[compound]+=1
        else:
            allNames4[compound]=1
            
            


cellshilicnegALPHA0.05FC1.5MINVAR0.250.csv
cellshilicposALPHA0.05FC1.5MINVAR0.250.csv
cellsRPnegALPHA0.05FC1.5MINVAR0.250.csv
cellsRPposALPHA0.05FC1.5MINVAR0.250.csv
cellslipidnegALPHA0.05FC1.5MINVAR0.250.csv
cellslipidposALPHA0.05FC1.5MINVAR0.250.csv
mediumhilicnegALPHA0.05FC1.5MINVAR0.250.csv
mediumhilicposALPHA0.05FC1.5MINVAR0.250.csv
mediumRPnegALPHA0.05FC1.5MINVAR0.250.csv
mediumRPposALPHA0.05FC1.5MINVAR0.250.csv


In [9]:
len(allNames)

831

In [10]:
n_global_compounds

832

In [11]:
sum(np.fromiter(allNames.values(), dtype=int)!=1)

1

In [12]:
len(allNames4)

468

In [13]:
sum(np.fromiter(allNames4.values(), dtype=int)!=1)

1

In [14]:
np.where(np.fromiter(allNames4.values(), dtype=int)!=1)

(array([2]),)

In [17]:
i=0
for elem in allNames4:
    print(elem)
    i+=1
    if i==3:
        break

123.05559@0.665
123.05561@0.592
205.09724@4.043
