In [1]:
import torch

from cuisine.cookbook import get_ingredient_list, get_cookbook_train, get_cookbook_valid_question, get_cookbook_valid_answer
from cuisine.embedding import import_embedding, create_random_embedding
from cuisine.chef import make_embedding_data
from cuisine.utils import move_to_top_directory

In [2]:
move_to_top_directory()
%pwd

'/home/felix/cuisine'

In [3]:
cookbook_train = get_cookbook_train()
cookbook_valid_question = get_cookbook_valid_question()
cookbook_valid_answer = get_cookbook_valid_answer()
ingredient_list = get_ingredient_list()

cookbook_train[15]

{'recipe_id': 15,
 'ingredients': [2866, 4243, 4362, 5377, 5408, 6187, 6352, 6568],
 'kitchen_name': 'italian',
 'kitchen_id': 5}

In [4]:
embedding_random_walk_2 = import_embedding("Embp1q2", ingredient_list)
embedding_random_walk_10 = import_embedding("Embp1q10", ingredient_list)
embedding_uniform_rand = create_random_embedding(ingredient_list, 128)
embedding_svd_32 = import_embedding("SVD32", ingredient_list)
embedding_svd_64 = import_embedding("SVD64", ingredient_list)
embedding_svd_128 = import_embedding("SVD128", ingredient_list)

embedding_svd_32[2813]

array([34.63750259, 20.35864365, -1.20763975, -0.97736046,  0.23479278,
        5.40115298,  8.51684704, -0.53078058, -0.53245975, -0.8002612 ,
       -3.08002422,  0.61509874,  1.03727886, -1.68163813,  4.22448238,
        4.41397363, -2.43951389,  1.38136843, -0.5487753 , -0.09986042,
       -1.99060814, -0.10028059,  0.17231477, -0.5892525 , -0.08022406,
       -2.57672924,  4.58388978, -2.31123317, -0.80378753,  1.565003  ,
        0.76383124, -3.8348879 ])

In [5]:
embedding = embedding_svd_64

X_train, y_train = make_embedding_data(cookbook_train, embedding)
X_valid, _       = make_embedding_data(cookbook_valid_question, embedding)
_      , y_valid = make_embedding_data(cookbook_valid_answer, embedding)

In [55]:
import numpy as np
X = np.concatenate((X_train, X_valid), axis=0)
y = np.concatenate((y_train, y_valid), axis=0)

train_mask = np.asarray( range(X_train.shape[0]) )
valid_mask = np.asarray( [i + X_train.shape[0] for i in range(X_valid.shape[0]) ] )

In [56]:
from torch_geometric.data import Data
import pandas as pd

In [57]:
edge_list = pd.read_csv('data/edgeListWeight.csv', delimiter=',', header=None)

In [58]:
edge_list.values[:, :2].T

array([[2813, 2813, 2813, ..., 6202, 6202, 6202],
       [3146, 3229, 3885, ..., 3228, 5466, 5648]])

In [59]:
rawdata = Data(
    x = torch.tensor(X, dtype=torch.float), 
    y = torch.tensor(y, dtype=torch.long), 
    edge_index = torch.tensor(edge_list.values[:, :2].T, dtype=torch.long), 
    edge_attr = torch.tensor(edge_list.values[:, 2:3], dtype=torch.float)
)

rawdata

Data(x=[31395, 64], edge_index=[2, 706792], edge_attr=[706792, 1], y=[31395])

In [70]:
from torch_geometric.data import InMemoryDataset

class CuisineDataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(CuisineDataset, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
        
    @property
    def raw_file_names(self):
        return []

    @property
    def processed_file_names(self):
        return ['data']

    def download(self):
        pass

    def process(self):

        data, slices = self.collate([rawdata])
        torch.save((data, slices), self.processed_paths[0])

In [71]:
dataset = CuisineDataset(root="data/")
dataset[0]

Processing...
Done!


Data(x=[31395, 64], edge_index=[2, 706792], edge_attr=[706792, 1], y=[31395])

In [62]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv


class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)


In [63]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.cross_entropy(out[train_mask], data.y[train_mask].type(torch.long).to(device))
    loss.backward()
    optimizer.step()

In [64]:
dataset[0]

Data(x=[31395, 64], edge_index=[2, 706792], edge_attr=[706792, 1], y=[31395])

In [68]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[valid_mask] == data.y[valid_mask]).sum()
acc = int(correct) / len(valid_mask)
print(f'Accuracy: {correct / len(valid_mask):.2%}: {correct} out of {len(valid_mask)}')

Accuracy: 59.80%: 4693 out of 7848


In [67]:
pred[[0, 0, 0]]

tensor([5, 5, 5], device='cuda:0')

In [None]:
torch.manual_seed(42)

model = GCN().to(device) 

optimizer_name = "Adam"
lr = 1e-1
optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr)
epochs = 200


def train():
  model.train()
  optimizer.zero_grad()
  F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
  optimizer.step()

@torch.no_grad()
def test():
  model.eval()
  logits = model()
  mask1 = data['train_mask']
  pred1 = logits[mask1].max(1)[1]
  acc1 = pred1.eq(data.y[mask1]).sum().item() / mask1.sum().item()
  mask = data['test_mask']
  pred = logits[mask].max(1)[1]
  acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
  return acc1,acc

for epoch in range(1, epochs):
  train()

train_acc,test_acc = test()

print('#' * 70)
print('Train Accuracy: %s' %train_acc )
print('Test Accuracy: %s' % test_acc)
print('#' * 70)