In [1]:
import pandas as pd
import sympy as sy

In [2]:
inputData = pd.read_csv('Ethanol Prediction.csv')

In [3]:
run calcIsotopologues.py

In [4]:
def computeMNUValues(MNSolution, key):
    '''
    Given an MN U value, update the MN output dataframe to include clumped and site-specific delta values
    
    Key should be "MN", i.e. "M1".
    '''
    MNSolution['U Values'] = MNSolution[key + ' Percent Abundance'] * MNSolution["U" + key]
    
    #calculate clumped deltas
    clumpedDeltas = [1000*(x/y-1) for x, y in zip(MNSolution['U Values'].values, MNSolution['Stochastic U'].values)]
    clumpedCulled = []
    for i in range(len(clumpedDeltas)):
        if '|' in MNSolution.index[i]:
            if np.abs(clumpedDeltas[i]) < 10**(-10):
                clumpedCulled.append(0)
            else:
                clumpedCulled.append(clumpedDeltas[i])
        else:
            clumpedCulled.append('N/A')
      
    #calculate site specific deltas
    deltas = []
    for i, v in MNSolution.iterrows():
        if '|' not in i:
            siteSpecificR = v['U Values'] / v['Number']
            delta = op.ratioToDelta(v['Composition'],siteSpecificR)
            deltas.append(delta)
            
        else:
            deltas.append('N/A')

    MNSolution['Deltas'] = deltas
    MNSolution['Clumped Deltas'] = clumpedCulled
    
    return MNSolution

def GJElim(Matrix, augMatrix = False):
    M = Matrix.copy()
    rows, cols = M.shape

    r = 0
    c = 0
    
    if augMatrix == True:
        colLimit = cols - 1
    else:
        colLimit = cols
        
    rank = 0
    storage = []
    while r < rows and c < colLimit:
        storage.append(M.copy())
        #If there is a nonzero entry in the column, then pivot and eliminate. 
        if True in (M[r:,c]!=0):
            pivotRow = (M[r:,c]!=0).argmax(axis=0) + r
            rank += 1

            M[[r, pivotRow]] = M[[pivotRow, r]]

            M[r] = M[r]/ M[r,c]

            for i in range(1,rows-r):
                M[r+i] -= (M[r+i,c]/M[r,c] * M[r])

            for j in range(0,r):
                M[j] -= M[j,c]/M[r,c] * M[r]
                
            r += 1

        c += 1

    storage.append(M.copy())
        
    return M, rank, storage

Both to simulate a measurement and to reconstruct the molecule based on measurements, we begin by defining basic information about the experiment. This is included in the top block of the .csv file. It includes our sites, the number of atoms present ("Stoich"), and how fragments sample the sites. It also includes site-specific delta values that will be used to simulate the measurements. 

In [5]:
############################################################################################################
###  First, initialize the molecule and the fragments we will be looking at. This includes an "IDList",  ###
###  which gives names to the sites, "elIDs", giving their chemical element, "numberAtSite", giving the  ###
###  number of atoms per site, "deltas", giving delta 13C vs PDB, 17O vs VSMOW, 33S vs CDT, 15N vs AIR,  ###
###  and D vs VSMOW. 18O and 34/36S are assigned delta values based on mass scaling laws. The details    ###
###  can be find in the basicDeltaOperations file.                                                       ###
###                                                                                                      ###
###  We then initialize the fragments, giving them entries of 1 or 'x' to indicate sites which are       ###
###  retained or lost. We put all this information into a dataFrame to easily check.                     ###
###                                                                                                      ###
############################################################################################################

#Define the "set of element isotopes" for each chemical element
subsDict = {'H':(0,1),'N':(0,1),'C':(0,1),'O':(0,1,2),'S':(0,1,2,4)}

##### INITIALIZE SITES #####
IDList = ['C-methyl','C-alpha','O-alcohol','H-methyl','H-alpha','H-alcohol']
elIDs = ['C','C','O','H','H','H']
numberAtSite = [1,1,1,3,2,1]

deltas = [0,0,0,0,0,0]

frag_OH = [1,1,'x',1,1,'x']
frag_methyl = ['x',1,1,'x',1,1]

l = [elIDs, numberAtSite, deltas, frag_OH, frag_methyl]

infoDf = pd.DataFrame(l, columns = IDList)
infoDf = infoDf.transpose()
infoDf.columns = ['IDS','Number','deltas','frag_OH','frag_methyl']

In [6]:
df = infoDf
df

Unnamed: 0,IDS,Number,deltas,frag_OH,frag_methyl
C-methyl,C,1,0,1,x
C-alpha,C,1,0,1,1
O-alcohol,O,1,0,x,1
H-methyl,H,3,0,1,x
H-alpha,H,2,0,1,1
H-alcohol,H,1,0,x,1


In [7]:
df.index[1]

'C-alpha'

In [8]:
siteElements = strSiteElements(df)
siteIsotopes, multinomialCoeff = calculateSetsOfSiteIsotopes(df)
bigA, SN = calcAllIsotopologues(siteIsotopes, multinomialCoeff)
concentrationArray = siteSpecificConcentrations(df)
d = calculateIsotopologueConcentrations(bigA, SN, concentrationArray)

byCondensed = {}
siteElements = strSiteElements(df)
for i, v in d.items():
    condensed = condenseStr(i)
    byCondensed[condensed] = {}
    byCondensed[condensed]['Number'] = v['num']
    byCondensed[condensed]['full'] = i
    byCondensed[condensed]['Conc'] = v['Conc']
    byCondensed[condensed]['Mass'] = np.array(list(map(int,condensed))).sum()
    byCondensed[condensed]['Subs'] = ''.join([uEl(element, int(number)) for element, number in zip(siteElements, condensed)])
    
M0 = {}
M1 = {}
M2 = {}
M3 = {}
M4 = {}

for i, v in byCondensed.items():
    if v['Mass'] == 0:
        M0[i] = v
    if v['Mass'] == 1:
        M1[i] = v
    if v['Mass'] == 2:
        M2[i] = v
    if v['Mass'] == 3:
        M3[i] = v
    if v['Mass'] == 4:
        M4[i] = v
    

In [9]:
inputData

Unnamed: 0.1,Unnamed: 0,Abs. Abundance,Rel. Abundance,Adj. Rel. Abundance
0,13C U Value,2.247440e-02,2.247440e-02,2.247440e-02
1,17O U Value,3.799000e-04,3.799000e-04,3.799000e-04
2,18O U Value,2.005200e-03,2.005200e-03,2.005200e-03
3,13C13C U Value,1.262747e-04,1.262747e-04,1.262747e-04
4,13C18O U Value,4.506567e-05,4.506567e-05,4.506567e-05
...,...,...,...,...
77,M4 Methyl 13C/17O/D,2.275675e-11,7.884027e-05,7.884027e-05
78,M4 Methyl 13C/17O,2.215107e-11,7.674190e-05,7.674190e-05
79,M4 Methyl 13C/18O/D,1.026234e-08,3.555366e-02,3.555366e-02
80,M4 Methyl 13C/18O,2.570523e-07,8.905520e-01,8.905520e-01


In [10]:
M2Measurements = {}

for i, v in inputData.iterrows():
    if len(v['Unnamed: 0'].split(' ')) > 2:
        frag = v['Unnamed: 0'].split(' ')[1]
        sub = v['Unnamed: 0'].split(' ')[2]
        if sub == '':
            sub = 'Unsub'

        if v['Unnamed: 0'][:2] == 'M2':
            if frag not in M2Measurements:
                M2Measurements[frag] = {}
            M2Measurements[frag][sub] = v['Adj. Rel. Abundance']

M2Df = pd.DataFrame.from_dict(M2Measurements)
M2Df.fillna(0, inplace = True)

In [11]:
pd.set_option("precision", 15)
M2Df

Unnamed: 0,OH,Methyl
D,0.00019298015072,0.002530036569278
D/D,0.000112230102215,3.3669030664e-05
Unsub,0.927615492076358,0.002462698507949
13C,0.00556896905411,0.060842593169599
13C/D,0.008096764924283,0.002429029477285
13C/13C,0.058413563692315,0.0
17O/D,0.0,8.2119059768e-05
17O,0.0,0.002056927094395
18O,0.0,0.927588119056435
13C/17O,0.0,0.001974808034627


In [12]:
fragments = [frag_methyl]
fragKeys = ['Methyl']

In [13]:
M0

{'000000000': {'Number': 1,
  'full': '000(0, 0, 0)(0, 0)0',
  'Conc': 0.9746607562824977,
  'Mass': 0,
  'Subs': ''}}

In [14]:
UnsubConc = M0['000000000']['Conc']

#For each fragment we will observe
for j, fragment in enumerate(fragments):
    #compute the isotopologues present after fragmentation and track their concentrations
    fragmentedDict = {}
    for isotopologue, value in M2.items():
        value['Stochastic U'] = value['Conc'] / UnsubConc
        newIsotopologue = expandAndFragment(fragment, df['Number'].values, isotopologue)
        M2[isotopologue][fragKeys[j] + ' Identity'] = newIsotopologue
        
        sub = computeSubs(newIsotopologue, siteElements)
        
        if sub == '':
            sub = 'Unsub'
            
        M2[isotopologue][fragKeys[j] + ' Subs'] = sub

In [15]:
M2

{'000000011': {'Number': 2,
  'full': '000(0, 0, 0)(0, 1)1',
  'Conc': 4.729283541584001e-08,
  'Mass': 2,
  'Subs': 'DD',
  'Stochastic U': 4.852235520000002e-08,
  'Methyl Identity': 'x00xxx011',
  'Methyl Subs': 'D/D'},
 '000000110': {'Number': 1,
  'full': '000(0, 0, 0)(1, 1)0',
  'Conc': 2.364641770792e-08,
  'Mass': 2,
  'Subs': 'DD',
  'Stochastic U': 2.4261177600000008e-08,
  'Methyl Identity': 'x00xxx110',
  'Methyl Subs': 'D/D'},
 '000001001': {'Number': 3,
  'full': '000(0, 0, 1)(0, 0)1',
  'Conc': 7.093925312376001e-08,
  'Mass': 2,
  'Subs': 'DD',
  'Stochastic U': 7.278353280000003e-08,
  'Methyl Identity': 'x00xxx001',
  'Methyl Subs': 'D'},
 '000001010': {'Number': 6,
  'full': '000(0, 0, 1)(0, 1)0',
  'Conc': 1.4187850624752e-07,
  'Mass': 2,
  'Subs': 'DD',
  'Stochastic U': 1.4556706560000004e-07,
  'Methyl Identity': 'x00xxx010',
  'Methyl Subs': 'D'},
 '000011000': {'Number': 3,
  'full': '000(0, 1, 1)(0, 0)0',
  'Conc': 7.093925312376e-08,
  'Mass': 2,
  'Subs': '

In [16]:
elIDs = df['IDS'].values
numberAtSite = df['Number'].values

siteListN = [(x,y) for x,y in zip(df.index, numberAtSite)]
siteNameList = []
for site in siteListN:
    siteNameList += [site[0]] * site[1]

In [17]:
def filterEmptyStr(string):
    if string == '':
        return False
    else:
        return True
    
Isotopologues = pd.DataFrame.from_dict(M2).T
Isotopologues.rename(columns={'Conc':'Stochastic',"Subs": "Composition"},inplace = True)
preciseStrings = []

for i, v in Isotopologues.iterrows():
    index = list(Isotopologues.index).index(i)
    Subs = [uEl(element, int(number)) for element, number in zip(siteElements, i)]
    Precise = [x + " " + y for x, y in zip(Subs, siteNameList) if x != '']
    output = '   |   '.join(Precise)
    preciseStrings.append(output)
Isotopologues['Precise Identity'] = preciseStrings
Isotopologues.sort_values('Composition',inplace = True)

In [18]:
Isotopologues

Unnamed: 0,Number,full,Stochastic,Mass,Composition,Stochastic U,Methyl Identity,Methyl Subs,Precise Identity
110000000,1,"110(0, 0, 0)(0, 0)0",0.0001230749593576,2,13C13C,0.00012627466384,x10xxx000,13C,13C C-methyl | 13C C-alpha
11000000,1,"011(0, 0, 0)(0, 0)0",4.16083873740407e-06,2,13C17O,4.26901228e-06,x11xxx000,13C/17O,13C C-alpha | 17O O-alcohol
101000000,1,"101(0, 0, 0)(0, 0)0",4.16083873740407e-06,2,13C17O,4.26901228e-06,x01xxx000,17O,13C C-methyl | 17O O-alcohol
100001000,3,"100(0, 0, 1)(0, 0)0",5.11786450438056e-06,2,13CD,5.250918816e-06,x00xxx000,Unsub,13C C-methyl | D H-methyl
100000010,2,"100(0, 0, 0)(0, 1)0",3.41190966958704e-06,2,13CD,3.500612544e-06,x00xxx010,D,13C C-methyl | D H-alpha
100000001,1,"100(0, 0, 0)(0, 0)1",1.70595483479352e-06,2,13CD,1.750306272e-06,x00xxx001,D,13C C-methyl | D H-alcohol
10000001,1,"010(0, 0, 0)(0, 0)1",1.70595483479352e-06,2,13CD,1.750306272e-06,x10xxx001,13C/D,13C C-alpha | D H-alcohol
10000010,2,"010(0, 0, 0)(0, 1)0",3.41190966958704e-06,2,13CD,3.500612544e-06,x10xxx010,13C/D,13C C-alpha | D H-alpha
10001000,3,"010(0, 0, 1)(0, 0)0",5.11786450438056e-06,2,13CD,5.250918816e-06,x10xxx000,13C,13C C-alpha | D H-methyl
1000001,1,"001(0, 0, 0)(0, 0)1",5.76738192555137e-08,2,17OD,5.9173224e-08,x01xxx001,17O/D,17O O-alcohol | D H-alcohol


In [19]:
pd.set_option("precision", 15)
pd.set_option('display.max_rows', 500)
Isotopologues

Unnamed: 0,Number,full,Stochastic,Mass,Composition,Stochastic U,Methyl Identity,Methyl Subs,Precise Identity
110000000,1,"110(0, 0, 0)(0, 0)0",0.0001230749593576,2,13C13C,0.00012627466384,x10xxx000,13C,13C C-methyl | 13C C-alpha
11000000,1,"011(0, 0, 0)(0, 0)0",4.16083873740407e-06,2,13C17O,4.26901228e-06,x11xxx000,13C/17O,13C C-alpha | 17O O-alcohol
101000000,1,"101(0, 0, 0)(0, 0)0",4.16083873740407e-06,2,13C17O,4.26901228e-06,x01xxx000,17O,13C C-methyl | 17O O-alcohol
100001000,3,"100(0, 0, 1)(0, 0)0",5.11786450438056e-06,2,13CD,5.250918816e-06,x00xxx000,Unsub,13C C-methyl | D H-methyl
100000010,2,"100(0, 0, 0)(0, 1)0",3.41190966958704e-06,2,13CD,3.500612544e-06,x00xxx010,D,13C C-methyl | D H-alpha
100000001,1,"100(0, 0, 0)(0, 0)1",1.70595483479352e-06,2,13CD,1.750306272e-06,x00xxx001,D,13C C-methyl | D H-alcohol
10000001,1,"010(0, 0, 0)(0, 0)1",1.70595483479352e-06,2,13CD,1.750306272e-06,x10xxx001,13C/D,13C C-alpha | D H-alcohol
10000010,2,"010(0, 0, 0)(0, 1)0",3.41190966958704e-06,2,13CD,3.500612544e-06,x10xxx010,13C/D,13C C-alpha | D H-alpha
10001000,3,"010(0, 0, 1)(0, 0)0",5.11786450438056e-06,2,13CD,5.250918816e-06,x10xxx000,13C,13C C-alpha | D H-methyl
1000001,1,"001(0, 0, 0)(0, 0)1",5.76738192555137e-08,2,17OD,5.9173224e-08,x01xxx001,17O/D,17O O-alcohol | D H-alcohol


In [20]:
M2Df

Unnamed: 0,OH,Methyl
D,0.00019298015072,0.002530036569278
D/D,0.000112230102215,3.3669030664e-05
Unsub,0.927615492076358,0.002462698507949
13C,0.00556896905411,0.060842593169599
13C/D,0.008096764924283,0.002429029477285
13C/13C,0.058413563692315,0.0
17O/D,0.0,8.2119059768e-05
17O,0.0,0.002056927094395
18O,0.0,0.927588119056435
13C/17O,0.0,0.001974808034627


In [21]:
0.000201721163959

0.000201721163959

In [22]:
CMatrix = []
MeasurementVector = []

closure = np.ones(len(Isotopologues['Number']),dtype = int)
CMatrix.append(closure)
MeasurementVector.append(1)

for fragment in fragKeys:
    print(fragment)
    IsotopologueFragments = Isotopologues[fragment + ' Subs']
    for sub, v in M2Df[fragment].iteritems():
        c = list(IsotopologueFragments.isin([sub]) * 1)
        CMatrix.append(c)
        MeasurementVector.append(v)

Methyl


In [23]:
Isotopologues

Unnamed: 0,Number,full,Stochastic,Mass,Composition,Stochastic U,Methyl Identity,Methyl Subs,Precise Identity
110000000,1,"110(0, 0, 0)(0, 0)0",0.0001230749593576,2,13C13C,0.00012627466384,x10xxx000,13C,13C C-methyl | 13C C-alpha
11000000,1,"011(0, 0, 0)(0, 0)0",4.16083873740407e-06,2,13C17O,4.26901228e-06,x11xxx000,13C/17O,13C C-alpha | 17O O-alcohol
101000000,1,"101(0, 0, 0)(0, 0)0",4.16083873740407e-06,2,13C17O,4.26901228e-06,x01xxx000,17O,13C C-methyl | 17O O-alcohol
100001000,3,"100(0, 0, 1)(0, 0)0",5.11786450438056e-06,2,13CD,5.250918816e-06,x00xxx000,Unsub,13C C-methyl | D H-methyl
100000010,2,"100(0, 0, 0)(0, 1)0",3.41190966958704e-06,2,13CD,3.500612544e-06,x00xxx010,D,13C C-methyl | D H-alpha
100000001,1,"100(0, 0, 0)(0, 0)1",1.70595483479352e-06,2,13CD,1.750306272e-06,x00xxx001,D,13C C-methyl | D H-alcohol
10000001,1,"010(0, 0, 0)(0, 0)1",1.70595483479352e-06,2,13CD,1.750306272e-06,x10xxx001,13C/D,13C C-alpha | D H-alcohol
10000010,2,"010(0, 0, 0)(0, 1)0",3.41190966958704e-06,2,13CD,3.500612544e-06,x10xxx010,13C/D,13C C-alpha | D H-alpha
10001000,3,"010(0, 0, 1)(0, 0)0",5.11786450438056e-06,2,13CD,5.250918816e-06,x10xxx000,13C,13C C-alpha | D H-methyl
1000001,1,"001(0, 0, 0)(0, 0)1",5.76738192555137e-08,2,17OD,5.9173224e-08,x01xxx001,17O/D,17O O-alcohol | D H-alcohol


In [24]:
IsotopologueFragments

110000000        13C
011000000    13C/17O
101000000        17O
100001000      Unsub
100000010          D
100000001          D
010000001      13C/D
010000010      13C/D
010001000        13C
001000001      17O/D
001000010      17O/D
001001000        17O
002000000        18O
000001010          D
000001001          D
000000110        D/D
000011000      Unsub
000000011        D/D
Name: Methyl Subs, dtype: object

In [25]:
sy.Matrix(CMatrix[1]).T

Matrix([[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0]])

In [26]:
MeasurementVector

[1,
 0.0025300365692779705,
 3.366903066438977e-05,
 0.002462698507949191,
 0.06084259316959938,
 0.002429029477284801,
 0.0,
 8.211905976760191e-05,
 0.002056927094394584,
 0.9275881190564348,
 0.0019748080346269816]

We also set up the matrix inversion problem we must solve. We define a "Composition Matrix", where each row corresponds to an individual measurement and each column corresponds to an isotopologue. We also define a "Measurement Vector", where each row gives the result of an individual measurement. The composition matrix takes a vector giving the relative concentrations of each isotopologue in M2 space to the observed measurement; matrix inversion therefore gives the relative concentration of each isotopologue in M2 space.

Additionally, we define "Full Matrix Order" and "Single Fragment Order" vectors. These track what measurement each row of the composition matrix corresponds to. For example, the first row gives closure, the second gives the "18O" substitution of the highest mass fragment, the third gives "13C/13C" substituion of the highest mass fragment, and so forth. Each fragment repeats the same possible substitutions in order. 

We put the information into an augmented matrix to prepare to solve

In [27]:
comp = np.array(CMatrix,dtype=float)
meas = np.array(MeasurementVector,dtype = float)
AugMatrix = np.column_stack((comp, meas))

In [28]:
sy.Matrix(AugMatrix)

Matrix([
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,                 1.0],
[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.00253003656927797],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 3.36690306643898e-5],
[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.00246269850794919],
[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,  0.0608425931695994],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,  0.0024290294772848],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,                 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.21190597676019e-5],
[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0

And solve

In [29]:
solve = GJElim(AugMatrix, augMatrix = True)

In [30]:
len(solve[2])

19

In [31]:
sy.Matrix(solve[2][6])

Matrix([
[1.0, 0.0, 0.0, 0.0, 0.0, 0.0,  0.0,  0.0, 1.0,  0.0,  0.0, 0.0,  0.0, 0.0, 0.0,  0.0, 0.0,  0.0,  0.0608425931695994],
[0.0, 1.0, 0.0, 0.0, 0.0, 0.0,  1.0,  1.0, 0.0,  1.0,  1.0, 0.0,  1.0, 0.0, 0.0,  1.0, 0.0,  1.0,   0.932107744658779],
[0.0, 0.0, 1.0, 0.0, 0.0, 0.0,  0.0,  0.0, 0.0,  0.0,  0.0, 1.0,  0.0, 0.0, 0.0,  0.0, 0.0,  0.0, 0.00205692709439458],
[0.0, 0.0, 0.0, 1.0, 0.0, 0.0,  0.0,  0.0, 0.0,  0.0,  0.0, 0.0,  0.0, 0.0, 0.0,  0.0, 1.0,  0.0, 0.00246269850794919],
[0.0, 0.0, 0.0, 0.0, 1.0, 1.0,  0.0,  0.0, 0.0,  0.0,  0.0, 0.0,  0.0, 1.0, 1.0,  0.0, 0.0,  0.0, 0.00253003656927797],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0,  1.0,  1.0, 0.0,  0.0,  0.0, 0.0,  0.0, 0.0, 0.0,  0.0, 0.0,  0.0,  0.0024290294772848],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0,  0.0,  0.0, 0.0,  0.0,  0.0, 0.0,  0.0, 0.0, 0.0,  0.0, 0.0,  0.0,                 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0,  0.0,  0.0, 0.0,  1.0,  1.0, 0.0,  0.0, 0.0, 0.0,  0.0, 0.0,  0.0, 8.21190597676019e-5],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 

In [32]:
sy.Matrix(solve[0])

Matrix([
[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,    0.0608425931695994],
[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,   0.00197480803462722],
[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,   0.00205692709439458],
[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0,   0.00246269850794919],
[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0,   0.00253003656927797],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,    0.0024290294772848],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,   8.21190597676019e-5],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0,     0.927588119056435],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 

Then we solve, tracking which sums of isotopologues are constrained. The first entry in "sol" gives the solution. The second gives additional information from the "rref" function in M2Module.py.

In [33]:
#Construct augmented matrix
comp = np.array(CMatrix,dtype=float)
meas = np.array(MeasurementVector,dtype = float)
AugMatrix = np.column_stack((comp, meas))

#solve by Gauss Jordan
solve = GJElim(AugMatrix, augMatrix = True)

#Take everything but the final column, which is just the answer
solution = solve[0][:,:-1]

#Check which isotopologues correspond to which measurements in the answer, and explicitly track them
uniqueAnswers = []
stochasticValues = []
composition = []
number = []

rank = solve[1]
for i in range(len(solution)):
    stoch = 0
    c = None
    
    if i >= rank:
        break
        
    rowIsotopologues = []
    n = 0
    for j in range(len(solution[i])):
        if solution[i][j] == 1:
            n += 1
            
            rowIsotopologues.append(Isotopologues['Precise Identity'][j])

            stoch += Isotopologues['Stochastic U'][j]
            
            if c == None:
                c = Isotopologues['Composition'][j]
            elif c != Isotopologues['Composition'][j]:
                c = c + " & " + Isotopologues['Composition'][j]
            
    uniqueAnswers.append(rowIsotopologues)
    stochasticValues.append(stoch)
    composition.append(c)
    number.append(n)
    
#take the measured values
values = solve[0][:rank,-1]

condensed = [' & '.join(x) for x in uniqueAnswers]

#output as dataFrame
output = {}
output['M2 Percent Abundance'] = values
output['Stochastic U'] = stochasticValues
output['Composition'] = composition
output['Number'] = number

dfOutput = pd.DataFrame.from_dict(output)
dfOutput.index = condensed

In [34]:
dfOutput

Unnamed: 0,M2 Percent Abundance,Stochastic U,Composition,Number
13C C-methyl | 13C C-alpha & 13C C-alpha | D H-methyl,0.060842593169599,0.000131525582656,13C13C & 13CD,2
13C C-alpha | 17O O-alcohol,0.001974808034627,4.26901228e-06,13C17O,1
13C C-methyl | 17O O-alcohol & 17O O-alcohol | D H-methyl,0.002056927094395,4.446531952e-06,13C17O & 17OD,2
13C C-methyl | D H-methyl & D H-methyl | D H-methyl,0.002462698507949,5.323702349e-06,13CD & DD,2
13C C-methyl | D H-alpha & 13C C-methyl | D H-alcohol & D H-methyl | D H-alpha & D H-methyl | D H-alcohol,0.002530036569278,5.469269414e-06,13CD & DD & DD,4
13C C-alpha | D H-alcohol & 13C C-alpha | D H-alpha,0.002429029477285,5.250918816e-06,13CD,2
17O O-alcohol | D H-alcohol & 17O O-alcohol | D H-alpha,8.2119059768e-05,1.77519672e-07,17OD,2
18O O-alcohol,0.927588119056435,0.0020052,18O,1
D H-alpha | D H-alpha & D H-alpha | D H-alcohol,3.3669030664e-05,7.2783533e-08,DD,2


In [35]:
#Manual calculation of UM+2
#M218O = dfOutput[dfOutput['Composition'] == '18O']['M2 Percent Abundance'].sum()
#UM2 = 0.002005200000000 / M218O
UM2 = 0.002161735320672001

In [36]:
dfOutput['UM2'] = UM2
dfOutput

Unnamed: 0,M2 Percent Abundance,Stochastic U,Composition,Number,UM2
13C C-methyl | 13C C-alpha & 13C C-alpha | D H-methyl,0.060842593169599,0.000131525582656,13C13C & 13CD,2,0.002161735320672
13C C-alpha | 17O O-alcohol,0.001974808034627,4.26901228e-06,13C17O,1,0.002161735320672
13C C-methyl | 17O O-alcohol & 17O O-alcohol | D H-methyl,0.002056927094395,4.446531952e-06,13C17O & 17OD,2,0.002161735320672
13C C-methyl | D H-methyl & D H-methyl | D H-methyl,0.002462698507949,5.323702349e-06,13CD & DD,2,0.002161735320672
13C C-methyl | D H-alpha & 13C C-methyl | D H-alcohol & D H-methyl | D H-alpha & D H-methyl | D H-alcohol,0.002530036569278,5.469269414e-06,13CD & DD & DD,4,0.002161735320672
13C C-alpha | D H-alcohol & 13C C-alpha | D H-alpha,0.002429029477285,5.250918816e-06,13CD,2,0.002161735320672
17O O-alcohol | D H-alcohol & 17O O-alcohol | D H-alpha,8.2119059768e-05,1.77519672e-07,17OD,2,0.002161735320672
18O O-alcohol,0.927588119056435,0.0020052,18O,1,0.002161735320672
D H-alpha | D H-alpha & D H-alpha | D H-alcohol,3.3669030664e-05,7.2783533e-08,DD,2,0.002161735320672


In [37]:
pd.set_option("precision", 15)
computeMNUValues(dfOutput, "M2")

Unnamed: 0,M2 Percent Abundance,Stochastic U,Composition,Number,UM2,U Values,Deltas,Clumped Deltas
13C C-methyl | 13C C-alpha & 13C C-alpha | D H-methyl,0.060842593169599,0.000131525582656,13C13C & 13CD,2,0.002161735320672,0.000131525582656,,0.0
13C C-alpha | 17O O-alcohol,0.001974808034627,4.26901228e-06,13C17O,1,0.002161735320672,4.26901228e-06,,1.23012711128467e-10
13C C-methyl | 17O O-alcohol & 17O O-alcohol | D H-methyl,0.002056927094395,4.446531952e-06,13C17O & 17OD,2,0.002161735320672,4.446531952e-06,,0.0
13C C-methyl | D H-methyl & D H-methyl | D H-methyl,0.002462698507949,5.323702349e-06,13CD & DD,2,0.002161735320672,5.323702349e-06,,0.0
13C C-methyl | D H-alpha & 13C C-methyl | D H-alcohol & D H-methyl | D H-alpha & D H-methyl | D H-alcohol,0.002530036569278,5.469269414e-06,13CD & DD & DD,4,0.002161735320672,5.469269414e-06,,0.0
13C C-alpha | D H-alcohol & 13C C-alpha | D H-alpha,0.002429029477285,5.250918816e-06,13CD,2,0.002161735320672,5.250918816e-06,,0.0
17O O-alcohol | D H-alcohol & 17O O-alcohol | D H-alpha,8.2119059768e-05,1.77519672e-07,17OD,2,0.002161735320672,1.77519672e-07,,0.0
18O O-alcohol,0.927588119056435,0.0020052,18O,1,0.002161735320672,0.0020052,0.0,
D H-alpha | D H-alpha & D H-alpha | D H-alcohol,3.3669030664e-05,7.2783533e-08,DD,2,0.002161735320672,7.2783533e-08,,0.0


In [38]:
dfOutput[['Deltas','Clumped Deltas']]

Unnamed: 0,Deltas,Clumped Deltas
13C C-methyl | 13C C-alpha & 13C C-alpha | D H-methyl,,0.0
13C C-alpha | 17O O-alcohol,,1.23012711128467e-10
13C C-methyl | 17O O-alcohol & 17O O-alcohol | D H-methyl,,0.0
13C C-methyl | D H-methyl & D H-methyl | D H-methyl,,0.0
13C C-methyl | D H-alpha & 13C C-methyl | D H-alcohol & D H-methyl | D H-alpha & D H-methyl | D H-alcohol,,0.0
13C C-alpha | D H-alcohol & 13C C-alpha | D H-alpha,,0.0
17O O-alcohol | D H-alcohol & 17O O-alcohol | D H-alpha,,0.0
18O O-alcohol,0.0,
D H-alpha | D H-alpha & D H-alpha | D H-alcohol,,0.0


In [39]:
fragments = [frag_OH,frag_methyl]
fragKeys = ['OH','Methyl']

UnsubConc = M0['000000000']['Conc']

#For each fragment we will observe
for j, fragment in enumerate(fragments):
    #compute the isotopologues present after fragmentation and track their concentrations
    fragmentedDict = {}
    for isotopologue, value in M2.items():
        value['Stochastic U'] = value['Conc'] / UnsubConc
        newIsotopologue = expandAndFragment(fragment, df['Number'].values, isotopologue)
        M2[isotopologue][fragKeys[j] + ' Identity'] = newIsotopologue
        
        sub = computeSubs(newIsotopologue, siteElements)
        
        if sub == '':
            sub = 'Unsub'
            
        M2[isotopologue][fragKeys[j] + ' Subs'] = sub
        
def filterEmptyStr(string):
    if string == '':
        return False
    else:
        return True
    
Isotopologues = pd.DataFrame.from_dict(M2).T
Isotopologues.rename(columns={'Conc':'Stochastic',"Subs": "Composition"},inplace = True)
preciseStrings = []

for i, v in Isotopologues.iterrows():
    index = list(Isotopologues.index).index(i)
    Subs = [uEl(element, int(number)) for element, number in zip(siteElements, i)]
    Precise = [x + " " + y for x, y in zip(Subs, siteNameList) if x != '']
    output = '   |   '.join(Precise)
    preciseStrings.append(output)
Isotopologues['Precise Identity'] = preciseStrings
Isotopologues.sort_values('Composition',inplace = True)

CMatrix = []
MeasurementVector = []

closure = np.ones(len(Isotopologues['Number']),dtype = int)
CMatrix.append(closure)
MeasurementVector.append(1)

for fragment in fragKeys:
    print(fragment)
    IsotopologueFragments = Isotopologues[fragment + ' Subs']
    for sub, v in M2Df[fragment].iteritems():
        c = list(IsotopologueFragments.isin([sub]) * 1)
        CMatrix.append(c)
        MeasurementVector.append(v)
        
#Construct augmented matrix
comp = np.array(CMatrix,dtype=float)
meas = np.array(MeasurementVector,dtype = float)
AugMatrix = np.column_stack((comp, meas))

#solve by Gauss Jordan
solve = GJElim(AugMatrix, augMatrix = True)

#Take everything but the final column, which is just the answer
solution = solve[0][:,:-1]

#Check which isotopologues correspond to which measurements in the answer, and explicitly track them
uniqueAnswers = []
stochasticValues = []
composition = []
number = []

rank = solve[1]
for i in range(len(solution)):
    stoch = 0
    c = None
    
    if i >= rank:
        break
        
    rowIsotopologues = []
    n = 0
    for j in range(len(solution[i])):
        if solution[i][j] not in [-1,0,1]:
            print("WARNING: Something unanticipated is going on with row reduction. You need to check the matrix.")
        if solution[i][j] == 1:
            n += 1
            
            rowIsotopologues.append(Isotopologues['Precise Identity'][j])

            stoch += Isotopologues['Stochastic U'][j]
            
            if c == None:
                c = Isotopologues['Composition'][j]
            elif c != Isotopologues['Composition'][j]:
                c = c + " & " + Isotopologues['Composition'][j]
                
        elif solution[i][j] == -1:
            n += 1
            rowIsotopologues.append("MINUS " + Isotopologues['Precise Identity'][j])
            stoch -= Isotopologues['Stochastic U'][j]
            
            if c == None:
                c = Isotopologues['Composition'][j]
            elif c != Isotopologues['Composition'][j]:
                c = c + " & " + Isotopologues['Composition'][j]
            
    uniqueAnswers.append(rowIsotopologues)
    stochasticValues.append(stoch)
    composition.append(c)
    number.append(n)
    
#take the measured values
values = solve[0][:rank,-1]

condensed = [' & '.join(x) for x in uniqueAnswers]

#output as dataFrame
output = {}
output['M2 Percent Abundance'] = values
output['Stochastic U'] = stochasticValues
output['Composition'] = composition
output['Number'] = number

dfOutput = pd.DataFrame.from_dict(output)
dfOutput.index = condensed

dfOutput['UM2'] = UM2

pd.set_option("precision", 15)
computeMNUValues(dfOutput, "M2")

both =dfOutput

OH
Methyl


In [40]:
both

Unnamed: 0,M2 Percent Abundance,Stochastic U,Composition,Number,UM2,U Values,Deltas,Clumped Deltas
13C C-methyl | 13C C-alpha,0.058413563692315,0.00012627466384,13C13C,1,0.002161735320672,0.00012627466384,,0.0
13C C-alpha | 17O O-alcohol,0.001974808034627,4.26901228e-06,13C17O,1,0.002161735320672,4.26901228e-06,,1.23012711128467e-10
13C C-methyl | 17O O-alcohol & MINUS D H-methyl | D H-alcohol & MINUS D H-alpha | D H-alcohol,0.001918692983519,4.147706392e-06,13C17O & DD & DD,3,0.002161735320672,4.147706392e-06,,-3.5271785492341203e-10
13C C-methyl | D H-methyl & D H-methyl | D H-methyl,0.002462698507949,5.323702349e-06,13CD & DD,2,0.002161735320672,5.323702349e-06,,0.0
13C C-methyl | D H-alpha & 13C C-alpha | D H-alpha & MINUS D H-methyl | D H-methyl,0.003205036939049,6.928441555e-06,13CD & DD,3,0.002161735320672,6.928441555e-06,,0.0
13C C-methyl | D H-alcohol & MINUS 13C C-alpha | D H-alpha & D H-methyl | D H-alcohol & D H-alpha | D H-alcohol,-0.000753561441321,-1.629000384e-06,13CD & DD & DD,4,0.002161735320672,-1.629000384e-06,,-5.33351141029925e-10
13C C-alpha | D H-alcohol & 13C C-alpha | D H-alpha,0.002429029477285,5.250918816e-06,13CD,2,0.002161735320672,5.250918816e-06,,0.0
13C C-alpha | D H-methyl,0.002429029477285,5.250918816e-06,13CD,1,0.002161735320672,5.250918816e-06,,0.0
17O O-alcohol | D H-alcohol,2.7373019923e-05,5.9173224e-08,17OD,1,0.002161735320672,5.9173224e-08,,2.32882602091422e-08
17O O-alcohol | D H-alpha,5.4746039844e-05,1.18346448e-07,17OD,1,0.002161735320672,1.18346448e-07,,-1.11869402630305e-08
