* Pobieramy dane,  wytrenowane modele, wektory oraz słowniki
* Importujemy biblioteki (torch, nltk)

In [17]:
!wget https://pjn-project.s3.eu-central-1.amazonaws.com/model-77000.pth
!wget https://pjn-project.s3.eu-central-1.amazonaws.com/rnn_vocabulary.pickle
!wget https://pjn-project.s3.eu-central-1.amazonaws.com/word2vec.gensim
!wget https://pjn-project.s3.eu-central-1.amazonaws.com/classifier-250.pth

--2019-06-02 19:15:52--  https://pjn-project.s3.eu-central-1.amazonaws.com/model-77000.pth
Resolving pjn-project.s3.eu-central-1.amazonaws.com (pjn-project.s3.eu-central-1.amazonaws.com)... 52.219.74.112
Connecting to pjn-project.s3.eu-central-1.amazonaws.com (pjn-project.s3.eu-central-1.amazonaws.com)|52.219.74.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 55465971 (53M) [application/x-www-form-urlencoded]
Saving to: ‘model-77000.pth.1’


2019-06-02 19:15:53 (68.3 MB/s) - ‘model-77000.pth.1’ saved [55465971/55465971]

--2019-06-02 19:15:55--  https://pjn-project.s3.eu-central-1.amazonaws.com/rnn_vocabulary.pickle
Resolving pjn-project.s3.eu-central-1.amazonaws.com (pjn-project.s3.eu-central-1.amazonaws.com)... 52.219.74.112
Connecting to pjn-project.s3.eu-central-1.amazonaws.com (pjn-project.s3.eu-central-1.amazonaws.com)|52.219.74.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2276673 (2.2M) [binary/octet-stream]
Saving t

In [18]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, LancasterStemmer
import gensim 
from gensim.models import Word2Vec

import numpy as np
import os



int_to_vocab = {}
with open('rnn_vocabulary.pickle', 'rb') as f:
  int_to_vocab = pickle.load(f)
n_vocab = len(int_to_vocab)

checkpoint_lm = torch.load('model-77000.pth')
checkpoint_clas = torch.load('classifier-250.pth')

#device = torch.device('cuda')
device = torch.device('cpu')

ModuleNotFoundError: No module named 'torch'

* Importujemy model języka - generator przepisów

In [0]:
class RNNModule(nn.Module):
  def __init__(self, n_vocab, seq_size, embedding_size, lstm_size):
    super(RNNModule, self).__init__()
    self.seq_size = seq_size
    self.lstm_size = lstm_size
    self.embedding = nn.Embedding(n_vocab, embedding_size)
    self.lstm = nn.LSTM(embedding_size, lstm_size, batch_first=True)
    self.dense = nn.Linear(lstm_size, n_vocab)
  def forward(self, x, prev_state):
    embed = self.embedding(x)
    output, state = self.lstm(embed, prev_state)
    logits = self.dense(output)

    return logits, state
    
  def zero_state(self, batch_size):
    return (torch.zeros(1, batch_size, self.lstm_size), torch.zeros(1, batch_size, self.lstm_size))

net = RNNModule(n_vocab, 32, 64, 64)
net = net.to(device)
net.load_state_dict(checkpoint_lm)


def predict(device, net, int_to_vocab, top_k=5):
  net.eval()
    
  words = ['<START>']
  
  vocab_to_int = {w: k for k, w in int_to_vocab.items()}

  result = []
  state_h, state_c = net.zero_state(1)
  state_h = state_h.to(device)
  state_c = state_c.to(device)
  for w in words:
    result.append(w)
    ix = torch.tensor([[vocab_to_int[w]]]).to(device)
    output, (state_h, state_c) = net(ix, (state_h, state_c))
    
  _, top_ix = torch.topk(output[0], k=top_k)
  choices = top_ix.tolist()
  choice = np.random.choice(choices[0])

  result.append(int_to_vocab[choice])
    
  for _ in range(100):
    ix = torch.tensor([[choice]]).to(device)
    output, (state_h, state_c) = net(ix, (state_h, state_c))

    _, top_ix = torch.topk(output[0], k=top_k)
    choices = top_ix.tolist()
    choice = np.random.choice(choices[0])
    result.append(int_to_vocab[choice])
    if int_to_vocab[choice] == '<END>':
      break

  print(' '.join(result).encode('utf-8'))

* Importujemy model word2vec oraz funkcje pomocnicze dla tytułów

In [0]:
custom_stopwords = ['quick','healthy','good','worlds','best','recipe','cool','easy','tasty','fast','wonderful',' and ',' with ',' in ',' for ',' over ','w/',' n ',' s ']
porter = PorterStemmer()
import re
import gensim

model_word2vec = gensim.models.Word2Vec.load('word2vec.gensim')

def parse_title(title):
  res = title.lower()
  res = re.sub('[^a-z ]+', ' ',res)
  words = nltk.word_tokenize(res) 
  res = ''
  for w in words:
    if w not in stopwords.words('english'):
        res = res + ' ' + w
  for i in custom_stopwords:
    res = res.replace(i, ' ')
  res = re.sub('[ ]+', ' ',res)
  res = re.sub('^[ ]+', '',res)
  res = re.sub('[ ]+$', '',res)
  res = porter.stem(res)
  res = word_tokenize(res)
  return res

def calc_title_vector(words, model):
  words = parse_title(words)
  res = np.zeros((100, ))
  for word in words:
    if word in model:
      res += model[word]
  return res

* Importujemy model oraz funkcje pomocnicze dla generatora listy składników

In [1]:
ingredients_full = ['salt', 'peper', 'sugar', 'oil', 'onion', 'butter', 'egg', 'garlic', 'flour', 'olive', 
               'water', 'milk', 'tomate', 'lemon', 'vanilla', 'bean', 'parsley', 'wine', 'potato', 'beef', 
               'rice', 'orange', 'soda', 'mustard', 'parmesan', 'bread', 'mushroom', 'lime', 'chicken', 'basil',
               'cheese', 'juice', 'chocolate', 'candies', 'cream', 'honey', 'apple', 'pepper', 'strawberry', 
               'yogurt', 'vodka', 'pork', 'soy', 'tomato', 'cinnamon', 'raspberry', 'banana', 'chili']

class IngredientClassifier(nn.Module):
  def __init__(self):
    super(IngredientClassifier, self).__init__()
    self.layer = torch.nn.Sequential(
      nn.Linear(100, 64),
      nn.ReLU(),
      nn.Linear(64, len(ingredients_full)),
      nn.Sigmoid()
    )

  def forward(self, x):
    x = self.layer(torch.tensor(x).to(device).float())
    return x
      
net_clas = IngredientClassifier().to(device)
net_clas.load_state_dict(checkpoint_clas)

NameError: name 'nn' is not defined

In [35]:
from random import randint

Nazwa = "roman salad" #@param {type:"string"}
result = net_clas(calc_title_vector(Nazwa, model_word2vec)).cpu().detach().numpy()

r2 = []
for res in range(0, len(ingredients_full)):
  r2.append((ingredients_full[res], result[res]))
  
ingredients = [r[0] for r in sorted(r2, key=lambda k: k[1], reverse=True)[0:5]]
ingredients
    



tensor([0.4809, 0.4440, 0.4937, 0.4745, 0.4605, 0.4981, 0.4810, 0.4528, 0.4815,
        0.4826, 0.4942, 0.4729, 0.4638, 0.4904, 0.4772, 0.4541, 0.4400, 0.4938,
        0.4672, 0.4680, 0.4818, 0.4475, 0.4387, 0.4821, 0.4817, 0.4684, 0.4465,
        0.4656, 0.4733, 0.4722, 0.4691, 0.4545, 0.4479, 0.4617, 0.4480, 0.4684,
        0.4701, 0.5186, 0.4448, 0.4443, 0.4773, 0.4375, 0.4458, 0.4757, 0.4682,
        0.4846, 0.4727, 0.4713], device='cuda:0', grad_fn=<SigmoidBackward>)