In [150]:
import torch

from cuisine.cookbook import get_ingredient_list, get_cookbook_train, get_cookbook_valid_question, get_cookbook_valid_answer
from cuisine.embedding import import_embedding, create_random_embedding, create_one_hot_embedding
from cuisine.chef import make_recipe_embedding_data, make_ingredient_embedding_data
from cuisine.utils import move_to_top_directory

In [151]:
move_to_top_directory()
%pwd

'/home/felix/cuisine'

In [152]:
cookbook_train = get_cookbook_train()
cookbook_valid_question = get_cookbook_valid_question()
cookbook_valid_answer = get_cookbook_valid_answer()
ingredient_list = get_ingredient_list()

cookbook_train[15]

{'recipe_id': 15,
 'ingredients': [2866, 4243, 4362, 5377, 5408, 6187, 6352, 6568],
 'kitchen_name': 'italian',
 'kitchen_id': 5}

In [153]:
embedding_random_walk_2 = import_embedding("Embp1q2", ingredient_list)
embedding_random_walk_10 = import_embedding("Embp1q10", ingredient_list)
embedding_uniform_rand = create_random_embedding(ingredient_list, 128)
embedding_svd_32 = import_embedding("SVD32", ingredient_list)
embedding_svd_64 = import_embedding("SVD64", ingredient_list)
embedding_svd_128 = import_embedding("SVD128", ingredient_list)
embedding_svd_128 = import_embedding("SVD128", ingredient_list)
embedding_one_hot = create_one_hot_embedding(ingredient_list)

embedding_svd_32[2813]

array([34.63750259, 20.35864365, -1.20763975, -0.97736046,  0.23479278,
        5.40115298,  8.51684704, -0.53078058, -0.53245975, -0.8002612 ,
       -3.08002422,  0.61509874,  1.03727886, -1.68163813,  4.22448238,
        4.41397363, -2.43951389,  1.38136843, -0.5487753 , -0.09986042,
       -1.99060814, -0.10028059,  0.17231477, -0.5892525 , -0.08022406,
       -2.57672924,  4.58388978, -2.31123317, -0.80378753,  1.565003  ,
        0.76383124, -3.8348879 ])

In [154]:
embedding = embedding_one_hot

X_train, y_train = make_recipe_embedding_data(cookbook_train, embedding)
X_valid, _       = make_recipe_embedding_data(cookbook_valid_question, embedding)
_      , y_valid = make_recipe_embedding_data(cookbook_valid_answer, embedding)

X_train.shape

(23547, 6714)

In [155]:
embedding_matrix = make_ingredient_embedding_data(embedding)

embedding_matrix.shape

(6714, 6714)

In [156]:
import numpy as np
X = np.concatenate((X_train, X_valid), axis=0)
y = np.concatenate((y_train, y_valid), axis=0)

train_mask = np.asarray( range(X_train.shape[0]) )
valid_mask = np.asarray( [i + X_train.shape[0] for i in range(X_valid.shape[0]) ] )

In [157]:
import itertools

def make_adjacency_matrix(cookbook, ingredient_list):

    num_ingredients = len(ingredient_list)
    adjacency_matrix = np.zeros(shape=(num_ingredients, num_ingredients), dtype=np.int32)

    for recipe in cookbook:
        for ingredient1, ingredient2 in itertools.permutations(recipe['ingredients'], r=2):
            adjacency_matrix[ingredient1, ingredient2] += 1

    return adjacency_matrix

adjacency_matrix = make_adjacency_matrix(cookbook_train + cookbook_valid_question, ingredient_list)
    

In [158]:
import networkx as nx

G = nx.from_numpy_matrix(adjacency_matrix)
edge_list = np.asarray(G.edges).T
weights = np.asarray(list(nx.get_edge_attributes(G, "weight").values()))


In [159]:
def make_recipe_edge_list(cookbook):

    edge_list = []

    for recipe_idx, recipe in enumerate(cookbook):
        for ingredient in recipe['ingredients']:
            edge_list.append((ingredient, recipe_idx))

    return np.asarray(edge_list).T

recipe_edge_list = make_recipe_edge_list(cookbook_train + cookbook_valid_question)

In [160]:
from torch_geometric.data import HeteroData

graph = HeteroData()

graph['ingredient'].x = torch.tensor(embedding_matrix, dtype=torch.float)
graph['recipe'].x = torch.zeros(size=(len(y), 16))
graph['recipe'].y = torch.tensor(y, dtype=torch.long )

graph['recipe'].train_mask = train_mask
graph['recipe'].valid_mask = valid_mask

graph['ingredient', 'shares_recipe_with', 'ingredient'].edge_index = torch.tensor(edge_list, dtype=torch.long)
graph['ingredient', 'in', 'recipe'].edge_index = torch.tensor(recipe_edge_list, dtype=torch.long)

graph['ingredient', 'shares_recipe_with', 'ingredient'].edge_weight = torch.tensor(weights, dtype=torch.float)

graph


HeteroData(
  [1mingredient[0m={ x=[6714, 6714] },
  [1mrecipe[0m={
    x=[31395, 16],
    y=[31395],
    train_mask=[23547],
    valid_mask=[7848]
  },
  [1m(ingredient, shares_recipe_with, ingredient)[0m={
    edge_index=[2, 388320],
    edge_weight=[388320]
  },
  [1m(ingredient, in, recipe)[0m={ edge_index=[2, 314697] }
)

In [161]:
from torch_geometric.data import InMemoryDataset

class CuisineDataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(CuisineDataset, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
        
    @property
    def raw_file_names(self):
        return []

    @property
    def processed_file_names(self):
        return ['data']

    def download(self):
        pass

    def process(self):

        data, slices = self.collate([graph])
        torch.save((data, slices), self.processed_paths[0])

In [162]:
%rm -rf data/processed
dataset = CuisineDataset(root="data/")
data = dataset[0]

Processing...
Done!


In [166]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import HeteroConv, GCNConv, SAGEConv, GraphConv, Linear


class HeteroGNN(torch.nn.Module):
    def __init__(self):
        super().__init__()

        embedding_dim = data['ingredient'].num_node_features

        self.conv1 = HeteroConv({
            ('ingredient', 'shares_recipe_with', 'ingredient'): SAGEConv(-1, 64, aggr="add")
        })

        self.conv2 = HeteroConv({
            ('ingredient', 'in', 'recipe'): SAGEConv((-1, -1), 64, aggr="add")
        })

        self.linear0 = Linear(-1, 64)
        self.linear1 = Linear(-1, 64)
        self.linear2 = Linear(-1, 64)

            

    def forward(self, data):

        x_dict = data.x_dict 
        edge_index_dict = data.edge_index_dict
        edge_weight_dict = data.edge_weight_dict
        
        # x_dict['ingredient'] = self.linear0(x_dict['ingredient'])
        # x_dict['ingredient'] = self.conv1(x_dict, edge_index_dict)['ingredient']
        # x_dict['ingredient'] = F.relu(x_dict['ingredient'])
        x_dict['recipe'] = self.conv2(x_dict, edge_index_dict)['recipe']

        # x_dict['recipe'] = self.avg_neighbours_of_recipe(x_dict, edge_index_dict)
        # x_dict['recipe'] = self.linear1(x_dict['recipe'].relu())  
        
        x_dict['recipe'] = F.relu(x_dict['recipe'])  
        x_dict['recipe'] = self.linear2(x_dict['recipe'])  

        # x = F.relu(x_dict['recipe'])  
        # x = self.linear2(x)
        # x_dict['recipe'] = x_dict['recipe'] + x

        return F.softmax(x_dict['recipe'], dim=1)


    def avg_neighbours_of_recipe(self, x_dict, edge_index_dict):
        # Legacy code to check if a linear layer plus averaging the outputs of the GCNConv 
        # is the same as using GraphConv

        edges = edge_index_dict['ingredient', 'in', 'recipe']
        num_recipes = x_dict['recipe'].shape[0]
        m = torch.zeros((num_recipes, len(ingredient_list)), device=device)

        m[ edges[1, :], edges[0, :]  ] = 1
        #m = F.normalize(m, p=1, dim=1)
        return torch.mm(m, x_dict['ingredient'])   


In [167]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HeteroGNN().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()
for epoch in range(300):
    out = model(data)
    loss = F.cross_entropy(out[train_mask], data['recipe'].y[train_mask])
    
    writer.add_scalar("Loss/train", loss, epoch)

    pred = model(data).argmax(dim=1)
    acc_train = (pred[train_mask] == data['recipe'].y[train_mask]).sum() / len(train_mask)
    acc_valid = (pred[valid_mask] == data['recipe'].y[valid_mask]).sum() / len(valid_mask)
    writer.add_scalar("Acc_train", acc_train, epoch)
    writer.add_scalar("Acc_valid", acc_valid, epoch)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

writer.flush()

In [168]:
model.eval()
pred = model(data).argmax(dim=1)
correct_train = (pred[train_mask] == data['recipe'].y[train_mask]).sum()
correct_valid = (pred[valid_mask] == data['recipe'].y[valid_mask]).sum()
acc_train = int(correct_train) / len(train_mask)
acc_valid = int(correct_valid) / len(valid_mask)
print(f'Train Accuracy: {acc_train:.2%}: {correct_train} out of {len(train_mask)}')
print(f'Valid Accuracy: {acc_valid:.2%}: {correct_valid} out of {len(valid_mask)}')

Train Accuracy: 85.14%: 20048 out of 23547
Valid Accuracy: 69.83%: 5480 out of 7848
