# Computing Graph Weights
---

In [70]:
import pandas as pd

In [71]:
RecList = pd.read_csv('../CSV/recipeList.csv')
ingr_info = pd.read_csv('../CSV/Compound CSVs/ingr_info.tsv', sep="\t")
ingr_comp = pd.read_csv('../CSV/Compound CSVs/ingr_comp.tsv', sep="\t")

## Getting the occurrences of each ingredient

This is done by looping over each row in the recipeList dataStructure and creating a new dictionary key for each new ingredient. If an ingredient is already in the dictionary then 1 is added to its value. 

In [72]:
ioDict = {}

for row in RecList.iterrows(): 
      
    if row[1][1] in ioDict:
        ioDict[row[1][1]] += 1
    else:
        ioDict[row[1][1]] = 1 


Next we find the value of the ingredient that apers the most 

In [73]:
maxVal = 0
for key, val in ioDict.items():
    if val > maxVal:
        maxVal = val
    else:
        continue

The values are then normalized to make them easier to work with down the line

In [74]:
for key, val in ioDict.items():
    ioDict[key] = val/maxVal

Next we calculate all the possible combinations of ingredients. This is done using a simple nested for loop setup

In [75]:
ingPairs = []
keyList = list(ioDict.keys())

for key1idx in range(0, len(keyList)):
    for key2idx in range((key1idx + 1), len(keyList)):
        temp = []
        temp.append(keyList[key1idx])
        temp.append(keyList[key2idx])
        ingPairs.append(temp)

del temp
del keyList

## Calculating the weighted value for common recipes

In [76]:
ipWeights = [0] * len(ingPairs) # ingredient pair weights array

In [77]:
curRecipe = ''
ingList = []
for row in RecList.iterrows():
    if curRecipe == row[1][0]:
        ingList.append(row[1][1])
    else:
        curRecipe = row[1][0]
        for pairidx in range(0, len(ingPairs)):
            if ingPairs[pairidx][0] in ingList and ingPairs[pairidx][1] in ingList:
                ipWeights[pairidx] += 1
        ingList = []

del ingList

In [78]:
normIPweights = [0] * len(ipWeights)
maxWeight = max(ipWeights)

for idx in range(0, len(ipWeights)):
    normIPweights[idx] = ipWeights[idx]/maxWeight

## Calculating the weighted value for common compounds

In [79]:
import enum

class findingCompounds(enum.Enum):
   looking = 1
   found = 2
   done = 3

In [80]:
def findIndex(ingName):
    for row in ingr_info.iterrows():
        if row[1][1] == ingName:
            return row[1][0]
    return -1

In [81]:
def findCompounds(ingID):
    tracker = findingCompounds.looking
    rowCounter = 0
    compList = []

    while (rowCounter < len(ingr_comp)) and (tracker != findingCompounds.done):
        if(ingr_comp['# ingredient id'][rowCounter] == ingID):
            if tracker != findingCompounds.found:
                tracker = findingCompounds.found
            
            compList.append(ingr_comp['compound id'][rowCounter])

        elif tracker == findingCompounds.found:
            tracker = findingCompounds.done
        rowCounter  += 1
    
    return compList

In [82]:
def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3

In [83]:
epWeights = [0] * len(ingPairs) # element pair weights array

In [84]:
iiDict = {}

for row in RecList.iterrows(): 
      
    if row[1][1] not in iiDict:

        if isinstance(row[1][2], str):
            iiDict[row[1][1]] = findCompounds(row[1][3])
        else:
            iiDict[row[1][1]] = -1
        

In [87]:
for rowIndex in range(0, len(ingPairs)):
    compsIng1 = iiDict[ingPairs[rowIndex][0]]
    compsIng2 = iiDict[ingPairs[rowIndex][1]]

    if(compsIng2 == -1 or compsIng1 == -1):
        epWeights[rowIndex] = -1
    else:
        print(len(intersection(compsIng1, compsIng2)))
        epWeights[rowIndex] = len(intersection(compsIng1, compsIng2))


2
0
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
2
2
0
0
2
1
0
2
1
1
2
1
0
0
1
0
2
0
0
0
2
2
0
1
0
0
2
0
1
0
0
2
0
3
1
0
0
2
1
0
2
1
1
0
1
0
1
2
0
0
0
1
0
0
1
0
2
0
0
0
0
0
0
0
0
0
0
1
0
0
1
0
0
0
2
0
0
0
0
0
0
1
1
0
0
0
2
2
1
2
16
0
6
20
0
0
0
0
15
0
5
5
1
13
0
4
7
13
0
0
20
16
12
15
3
18
30
18
0
0
22
7
16
5
0
4
10
19
1
15
16
0
13
1
6
16
11
6
0
5
22
3
0
20
9
5
17
3
9
0
10
0
8
29
3
3
0
10
6
2
15
0
17
1
6
13
5
3
11
12
4
13
3
17
1
0
9
0
1
3
23
1
0
3
0
6
16
12
8
0
13
0
6
13
10
17
1
2
4
0
0
0
1
4
0
1
2
2
4
0
1
1
2
0
0
7
5
4
5
1
5
9
5
1
0
7
0
2
1
0
3
1
5
0
4
5
0
4
1
0
4
2
1
0
2
6
0
0
6
2
3
5
1
2
0
3
0
2
6
2
1
0
3
3
1
7
0
5
1
4
4
1
2
2
3
1
4
2
5
1
1
2
0
1
1
10
0
0
1
0
2
4
3
1
0
4
0
1
3
2
3
7
10
6
5
0
36
5
0
0
10
9
0
5
13
14
17
4
6
3
8
6
8
10
1
4
1
17
0
3
8
9
2
0
1
6
11
9
9
5
0
2
1
10
2
1
13
0
7
8
0
0
5
1
0
4
3
1
0
1
0
16
7
1
1
0
1
8
1
0
7
4
1
1
6
4
7
1
3
3
6
0
17
3
8
1
0
0
0
0
0
0
0
0
36
2
3
1
0
6
0
10
12
1
7
22
7
3
0
7
16
0
3
11
9
8
5
13
15
22
6
8
11
14
16
10
8
12
16
12
11
0
13
4
19
5
0
2
7
18
8
11
12
0
6

In [88]:
print(len(epWeights), len(ingPairs))

38226 38226


In [101]:
epWeights

[-1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,
 -1,


In [104]:
normEPweights = [0] * len(epWeights)
maxWeight = max(epWeights)

maxWeight

155

In [105]:
tempWeights = []
for i in epWeights:
    if i != -1:
        tempWeights.append(i)

minWeight = min(tempWeights)

minWeight

0

In [106]:
for idx in range(0, len(epWeights)):
    if epWeights[idx] != -1:
        normEPweights[idx] = (epWeights[idx] - minWeight) / (maxWeight - minWeight)
    else:
        normEPweights[idx] = epWeights[idx]

## Creating the weights CSV

In [107]:
firstIng = []
secondIng = []

for idx in range(0, len(ingPairs)):
    firstIng.append(ingPairs[idx][0])
    secondIng.append(ingPairs[idx][1])

In [108]:
ingPairWeights = pd.DataFrame(list(zip(firstIng, secondIng, normIPweights, normEPweights)), columns=['First Ingredient', 'Second Ingredient', 'Normalized Common Recipe Weight', 'Normalized Common Compound weight'])

In [109]:
ingPairWeights.to_csv("../CSV/ingPairWeights.csv", index = False)