# Computing Graph Weights
---

In [2]:
import pandas as pd

In [3]:
RecList = pd.read_csv('../CSV/recipeList.csv')
ingr_info = pd.read_csv('../CSV/Compound CSVs/ingr_info.tsv', sep="\t")
ingr_comp = pd.read_csv('../CSV/Compound CSVs/ingr_comp.tsv', sep="\t")

## Getting the occurrences of each ingredient

This is done by looping over each row in the recipeList dataStructure and creating a new dictionary key for each new ingredient. If an ingredient is already in the dictionary then 1 is added to its value. 

In [4]:
ioDict = {}

for row in RecList.iterrows(): 
      
    if row[1][1] in ioDict:
        ioDict[row[1][1]] += 1
    else:
        ioDict[row[1][1]] = 0 # should this be 1


Next we find the value of the ingredient that apers the most 

In [5]:
maxVal = 0
for key, val in ioDict.items():
    if val > maxVal:
        maxVal = val
    else:
        continue

The values are then normalized to make them easier to work with down the line

In [6]:
for key, val in ioDict.items():
    ioDict[key] = val/maxVal

Next we calculate all the possible combinations of ingredients. This is done using a simple nested for loop setup

In [7]:
ingPairs = []
keyList = list(ioDict.keys())

for key1idx in range(0, len(keyList)):
    for key2idx in range((key1idx + 1), len(keyList)):
        temp = []
        temp.append(keyList[key1idx])
        temp.append(keyList[key2idx])
        ingPairs.append(temp)

del temp
del keyList

## Calculating the weighted value for common recipes

In [8]:
ipWeights = [0] * len(ingPairs) # ingredient pair weights array

In [9]:
curRecipe = ''
ingList = []
for row in RecList.iterrows():
    if curRecipe == row[1][0]:
        ingList.append(row[1][1])
    else:
        curRecipe = row[1][0]
        for pairidx in range(0, len(ingPairs)):
            if ingPairs[pairidx][0] in ingList and ingPairs[pairidx][1] in ingList:
                ipWeights[pairidx] += 1
        ingList = []

del ingList

In [10]:
normIPweights = [0] * len(ipWeights)
maxWeight = max(ipWeights)

for idx in range(0, len(ipWeights)):
    normIPweights[idx] = ipWeights[idx]/maxWeight

## Calculating the weighted value for common compounds

In [11]:
import enum

class findingCompounds(enum.Enum):
   looking = 1
   found = 2
   done = 3

In [12]:
def findIndex(ingName):
    for row in ingr_info.iterrows():
        if row[1][1] == ingName:
            return row[1][0]
    return -1

In [13]:
def findCompounds(ingID):
    tracker = findingCompounds.looking
    rowCounter = 0
    compList = []

    while (rowCounter < len(ingr_comp)) & (tracker != findingCompounds.done):
        if(ingr_comp['# ingredient id'][rowCounter] == ingID):
            if tracker != findingCompounds.found:
                tracker = findingCompounds.found
            
            compList.append(ingr_comp['compound id'])

        elif tracker == findingCompounds.found:
            tracker = findingCompounds.done
        rowCounter  += 1
    
    return compList

In [14]:
def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3

In [15]:
epWeights = [0] * len(ingPairs) # element pair weights array

In [16]:
for rowIndex in range(0, len(ingPairs)):
    ing1Id = findIndex(ingPairs[rowIndex][0])
    ing2Id = findIndex(ingPairs[rowIndex][1])

    if(ing1Id == -1 or ing2Id == -1):
        epWeights[rowIndex] = -1
    else:
        ing1Comp = findCompounds(ing1Id)
        ing2Comp = findCompounds(ing2Id)
        epWeights[rowIndex] = len(intersection(ing2Comp, ing1Comp))

In [17]:
print(len(epWeights), len(ingPairs))

38226 38226


In [18]:
normEPweights = [0] * len(epWeights)
maxWeight = max(epWeights)

tempWeights = []
for i in epWeights:
    if i != -1:
        tempWeights.append(i)

minWeight = min(tempWeights)

for idx in range(0, len(epWeights)):
    if epWeights[idx] != -1:
        normEPweights[idx] = (epWeights[idx] - minWeight) / (maxWeight - minWeight)
    else:
        normEPweights[idx] = epWeights

## Creating the weights CSV

In [19]:
firstIng = []
secondIng = []

for idx in range(0, len(ingPairs)):
    firstIng.append(ingPairs[idx][0])
    secondIng.append(ingPairs[idx][1])

In [20]:
ingPairWeights = pd.DataFrame(list(zip(firstIng, secondIng, normIPweights, normEPweights)), columns=['First Ingredient', 'Second Ingredient', 'Normalized Common Recipe Weight', 'Normalized Common Compound weight'])

In [21]:
ingPairWeights.to_csv("../CSV/ingPairWeights2.csv", index = False)