In [None]:
import numpy as np
import os
import pandas as pd
import matplotlib as pyplot

In [None]:
# Gets list of all files in the directory
baseDirectory = 'ddMatrices'
fileList = os.listdir(baseDirectory)
print(os.listdir())

# Lists all .npy files and their index in fileList
# This can be used to find .npy files but I hardcoded the distance difference filename
# for convenience
for index, file in enumerate(fileList):
    if file[-4:] == '.npy':
        print('Index: ' + str(index))
        print('Matrix Filename: ' + str(file))

In [None]:
# Loading the "index" of amino acids common to both pdb files 
# (any of the "Formatted" csv files)
formattedIndices = []
for index, file in enumerate(fileList):
    if file[-4:] == '.csv':
        print('Index: ' + str(index))
        print('Common Index Filename: ' + str(file))
        formattedIndices.append(index)

# Removes the max limit on how many rows DataFrames display
pd.set_option('display.max_rows', None) 

In [None]:
# Each file in fileList is given an index (which can be viewed in the first loop in this notebook)
# This function takes in an index (that corresponds to a csv file) and converts it into a 
# pandas.DataFrame object 
def dataFrameFromFile(index):
    return pd.read_csv(os.path.join(baseDirectory, fileList[index]))

# Creating a map from Residue Numbers to Residue Indices 
indexToNumMap = dataFrameFromFile(formattedIndices[0])['Residue Number'].to_list()
numToIndexMap = {}
for index, num in enumerate(indexToNumMap):
    numToIndexMap[num] = index
print(numToIndexMap)

In [None]:
# Example of displaying data from a .csv file in a pandas.DataFrame format
dataFrameFromFile(formattedIndices[0])

In [None]:
# The covMapDict maps from covariance index to residue-residue index pairs
covMapDict = np.load('covMapDict.npy', allow_pickle=True).item()

# We reverse this dictionary so that it maps from residue-residue index pairs to covariance index
covMapDictReverse = {}
for key in covMapDict.keys():
    covMapDictReverse[str(covMapDict[key])] = key
print(covMapDictReverse)

In [None]:
covMatrix = np.load('CovarianceMatrix.npy', allow_pickle=True)
print(covMatrix.shape)

In [None]:
# Takes in two integers representing residue numbers
# Maps residue numbers to residue indices using numToIndexMap
# Then uses covMapDict to map residue indices to covariance indices
# Mainly a helper method for resNumbersToCovValue
def resNumbersToCovIndex(res1, res2):
    res1 = numToIndexMap[res1]
    res2 = numToIndexMap[res2]
    if (res1 > res2):
        temp = res1
        res1 = res2
        res2 = temp
    resPairString = '(' + str(res1) + ', ' + str(res2) +')'
    covIndex = covMapDictReverse[resPairString]
    return covIndex
    
# Takes in two tuples (int, int) representing residue pairs
# Uses resNumbersToCovIndex to map each residue pair to a covariance index
def resNumberToCovValue(resPair1, resPair2):
    covIndex1 = resNumbersToCovIndex(resPair1[0], resPair1[1])
    covIndex2 = resNumbersToCovIndex(resPair2[0], resPair2[1])
    return covMatrix[covIndex1][covIndex2]

In [None]:
# Example of querying specific covariance values in the loaded CovarianceMatrix
print('C118-F146; T72-H125: ' + str(resNumberToCovValue((118, 146), (72, 125))))
print('D185-M196; C118-F146: ' + str(resNumberToCovValue((185, 196), (118, 146))))
print('D185-M196; T72-H125: ' + str(resNumberToCovValue((185, 196), (72, 125))))

In [None]:
np.average(covMatrix)