# Creation of Interaction Classification Matrices - Nicky Solares

### Each matrix will be a truth table where a 1 indicates a true positive and a 0 indicates a true negative.

## Split protein naming convention 

Each protein name is put into a new list with *protein*_half1 and *protein*_half2

In [1]:
names = ['cydA', 'cydB', 'fliG', 'fliM', 'fliN', 'folA', 'thyA', 'glgA', 'glgC', 'ilvB', 'ilvC','nrdA', 'nrdB', 'purC', 'purE', 'purK', 'trpA','trpB']

splitNames = []

for name in names:
    splitNames.append(name+'_half1')
    splitNames.append(name+'_half2')
    


## Creating initial blank zeros matrix

Convert to dataframe to add column and row names

In [None]:
import numpy as np
import pandas as pd
import copy

row_names = splitNames
column_names = splitNames

matrix = np.zeros((36,36))

df = pd.DataFrame(matrix, columns = column_names, index = row_names)


## Testing indexing

In [None]:
#df

df.columns.values[1]

df.index.values[0]

type(df.index.values[0])

row = df.index.values[1]
row[0:4]
row[5:10]

## Creating class 1 dataframe

Class 1 protein interaction is same protein halves. The halves were matched up using logic and double nested for loops. 

ex: (*protein_half1* vs *protein_half2* = 1, *protein_half1* vs *protein_half1* = 0)

In [3]:
import numpy as np
import pandas as pd
import copy

row_names = splitNames
column_names = splitNames

matrix = np.zeros((36,36))

df1 = pd.DataFrame(matrix, columns = column_names, index = row_names)

class1 = df1

for row in class1.index.values:
    proteinx = row[0:4]
    halfx = row[5:10]
    
    for col in class1.columns.values: 
        proteiny = col[0:4]
        halfy = col[5:10]
        
        if proteinx == proteiny and halfx == 'half1' and halfy == 'half2':
            class1.loc[row, col] = 1
        elif proteinx == proteiny and halfx == 'half2' and halfy == 'half1':
            class1.loc[row,col] = 1
            

class1
        

Unnamed: 0,cydA_half1,cydA_half2,cydB_half1,cydB_half2,fliG_half1,fliG_half2,fliM_half1,fliM_half2,fliN_half1,fliN_half2,...,purC_half1,purC_half2,purE_half1,purE_half2,purK_half1,purK_half2,trpA_half1,trpA_half2,trpB_half1,trpB_half2
cydA_half1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
cydA_half2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
cydB_half1,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
cydB_half2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fliG_half1,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fliG_half2,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fliM_half1,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
fliM_half2,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fliN_half1,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
fliN_half2,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


## Creating class 2 dataframe

Class 2 interaction is proteins that interact physically. The interactions were semi- hardcoded with logic to match up the proteins with the same prefix with some exclusionary checks. 
    
ex: (*cydA_half1* vs *cydB_half1 = 1, *cydA_half1* vs *cydB_half2* = 1, *cydA_half1* vs *cydA_half2* = 0)
    
List of physically interacting proteins:

*cydA* - *cydB*

*nrdA* - *nrdB*

*fliG* - *fliM* - *fliN*

*trpA* - *trpB*

In [4]:
import numpy as np
import pandas as pd
import copy

row_names = splitNames
column_names = splitNames

matrix = np.zeros((36,36))

df2 = pd.DataFrame(matrix, columns = column_names, index = row_names)

class2 = df2

for row in class2.index.values:
    proteinx = row[0:3]
    halfx = row[5:10]
    checkx = row[0:4]
    
    for col in class2.columns.values: 
        proteiny = col[0:3]
        halfy = col[5:10]
        checky = col[0:4]
        
        if proteinx == proteiny and halfx == 'half1' and halfy == 'half1':
            class2.loc[row, col] = 1
        elif proteinx == proteiny and halfx == 'half1' and halfy == 'half2':
            class2.loc[row,col] = 1
        elif proteinx == proteiny and halfx == 'half2' and halfy == 'half1':
            class2.loc[row,col] = 1
        elif proteinx == proteiny and halfx == 'half2' and halfy == 'half2':
            class2.loc[row,col] = 1
        
        if checkx == checky:
            class2.loc[row,col]=0
        elif proteinx == 'pur':
            class2.loc[row,col]=0
        elif proteinx == 'ilv':
            class2.loc[row,col]=0
        elif proteinx == 'glg':
            class2.loc[row,col]=0

class2

Unnamed: 0,cydA_half1,cydA_half2,cydB_half1,cydB_half2,fliG_half1,fliG_half2,fliM_half1,fliM_half2,fliN_half1,fliN_half2,...,purC_half1,purC_half2,purE_half1,purE_half2,purK_half1,purK_half2,trpA_half1,trpA_half2,trpB_half1,trpB_half2
cydA_half1,0,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
cydA_half2,0,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
cydB_half1,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
cydB_half2,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fliG_half1,0,0,0,0,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
fliG_half2,0,0,0,0,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
fliM_half1,0,0,0,0,1,1,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
fliM_half2,0,0,0,0,1,1,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
fliN_half1,0,0,0,0,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
fliN_half2,0,0,0,0,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0


## Creating class 3 interaction dataframe

Class 3 interaction is proteins that interact enzymatically. The interactions were hardcoded here using logic and then exclusionary checks. This was the fastest way I could make this work for now. 
    
ex: (*folA_half1* vs *thyA_half1 = 1, *folA_half1* vs *thyA_half2* = 1, *folA_half1* vs *folA_half2* = 0)
    
List of enzymatically interacting proteins:

*folA* - *thyA*

*ilvC* - *ilvB*

*glgA* - *glgC*

*purE* - *purK* - *purC*

In [5]:
import numpy as np
import pandas as pd
import copy

row_names = splitNames
column_names = splitNames

matrix = np.zeros((36,36))

df3 = pd.DataFrame(matrix, columns = column_names, index = row_names)

class3 = df3

for row in class3.index.values:
    proteinx = row[0:4]
    halfx = row[5:10]
    checkx = row[0:3]
    
    
    for col in class3.columns.values: 
        proteiny = col[0:4]
        halfy = col[5:10]
        checky = col[0:3]
        
        
        if proteinx == 'folA' and proteiny == 'thyA' and halfx == 'half1' and halfy == 'half1':
            class3.loc[row, col] = 1
        elif proteinx == 'folA' and proteiny == 'thyA' and halfx == 'half1' and halfy == 'half2':
            class3.loc[row,col] = 1
        elif proteinx == 'folA' and proteiny == 'thyA' and halfx == 'half2' and halfy == 'half1':
            class3.loc[row,col] = 1
        elif proteinx == 'folA' and proteiny == 'thyA' and halfx == 'half2' and halfy == 'half2':
            class3.loc[row,col] = 1
        elif proteinx == 'thyA' and proteiny == 'folA' and halfx == 'half1' and halfy == 'half1':
            class3.loc[row, col] = 1
        elif proteinx == 'thyA' and proteiny == 'folA' and halfx == 'half1' and halfy == 'half2':
            class3.loc[row,col] = 1
        elif proteinx == 'thyA' and proteiny == 'folA' and halfx == 'half2' and halfy == 'half1':
            class3.loc[row,col] = 1
        elif proteinx == 'thyA' and proteiny == 'folA' and halfx == 'half2' and halfy == 'half2':
            class3.loc[row,col] = 1    
        
        
        if proteinx == 'ilvC' and proteiny == 'ilvB' and halfx == 'half1' and halfy == 'half1':
            class3.loc[row, col] = 1
        elif proteinx == 'ilvC' and proteiny == 'ilvB' and halfx == 'half1' and halfy == 'half2':
            class3.loc[row,col] = 1
        elif proteinx == 'ilvC' and proteiny == 'ilvB' and halfx == 'half2' and halfy == 'half1':
            class3.loc[row,col] = 1
        elif proteinx == 'ilvC' and proteiny == 'ilvB' and halfx == 'half2' and halfy == 'half2':
            class3.loc[row,col] = 1
        elif proteinx == 'ilvB' and proteiny == 'ilvC' and halfx == 'half1' and halfy == 'half1':
            class3.loc[row, col] = 1
        elif proteinx == 'ilvB' and proteiny == 'ilvC' and halfx == 'half1' and halfy == 'half2':
            class3.loc[row,col] = 1
        elif proteinx == 'ilvB' and proteiny == 'ilvC' and halfx == 'half2' and halfy == 'half1':
            class3.loc[row,col] = 1
        elif proteinx == 'ilvB' and proteiny == 'ilvC' and halfx == 'half2' and halfy == 'half2':
            class3.loc[row,col] = 1
        
        if proteinx == 'glgC' and proteiny == 'glgA' and halfx == 'half1' and halfy == 'half1':
            class3.loc[row, col] = 1
        elif proteinx == 'glgC' and proteiny == 'glgA' and halfx == 'half1' and halfy == 'half2':
            class3.loc[row,col] = 1
        elif proteinx == 'glgC' and proteiny == 'glgA' and halfx == 'half2' and halfy == 'half1':
            class3.loc[row,col] = 1
        elif proteinx == 'glgC' and proteiny == 'glgA' and halfx == 'half2' and halfy == 'half2':
            class3.loc[row,col] = 1
        elif proteinx == 'glgA' and proteiny == 'glgC' and halfx == 'half1' and halfy == 'half1':
            class3.loc[row, col] = 1
        elif proteinx == 'glgA' and proteiny == 'glgC' and halfx == 'half1' and halfy == 'half2':
            class3.loc[row,col] = 1
        elif proteinx == 'glgA' and proteiny == 'glgC' and halfx == 'half2' and halfy == 'half1':
            class3.loc[row,col] = 1
        elif proteinx == 'glgA' and proteiny == 'glgC' and halfx == 'half2' and halfy == 'half2':
            class3.loc[row,col] = 1
        
        
        if checkx == 'pur' and checky == 'pur' and halfx == 'half1' and halfy == 'half1':
            class3.loc[row, col] = 1
        elif checkx == 'pur' and checky == 'pur' and halfx == 'half1' and halfy == 'half2':
            class3.loc[row,col] = 1
        elif checkx == 'pur' and checky == 'pur' and halfx == 'half2' and halfy == 'half1':
            class3.loc[row,col] = 1
        elif checkx == 'pur' and checky == 'pur' and halfx == 'half2' and halfy == 'half2':
            class3.loc[row,col] = 1
            
        if checkx == 'cyd' and checky == 'cyd':
            class3.loc[row,col] = 0
        elif checkx == 'nrd' and checky == 'nrd':
            class3.loc[row,col] = 0
        elif checkx == 'fli' and checky == 'fli':
            class3.loc[row,col] = 0
        elif checkx == 'trp' and checky == 'trp':
            class3.loc[row,col] = 0
        elif proteinx == proteiny:
            class3.loc[row,col] = 0
        
        

class3
#df.to_csv(r'test.csv')

Unnamed: 0,cydA_half1,cydA_half2,cydB_half1,cydB_half2,fliG_half1,fliG_half2,fliM_half1,fliM_half2,fliN_half1,fliN_half2,...,purC_half1,purC_half2,purE_half1,purE_half2,purK_half1,purK_half2,trpA_half1,trpA_half2,trpB_half1,trpB_half2
cydA_half1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
cydA_half2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
cydB_half1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
cydB_half2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fliG_half1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fliG_half2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fliM_half1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fliM_half2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fliN_half1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fliN_half2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Saving each of the dataframes into a pickle for use in statistical analysis 

In [12]:
np.save('team3_truthtables.npy', [class1.as_matrix(),class2.as_matrix(),class3.as_matrix()])