In [2]:
import os
import re
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from transformers import GPT2Tokenizer, GPT2Config, TFGPT2Model

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
current_dir = os.path.dirname(os.path.realpath("__file__"))
repo_dir = os.path.dirname(current_dir)
DATA_FILE = os.path.join(repo_dir,"data","cleaned_data.csv")
data = pd.read_csv(DATA_FILE, encoding="utf-8",sep=',')

In [4]:
data.dropna(inplace=True)

In [6]:
config = GPT2Config.from_pretrained('gpt2',n_layer=6)
config.output_hidden_states = False
model = TFGPT2Model.from_pretrained('gpt2',config=config)

Some layers from the model checkpoint at gpt2 were not used when initializing TFGPT2Model: ['transformer/h_._8/ln_1/gamma:0', 'transformer/h_._6/ln_1/gamma:0', 'transformer/h_._8/mlp/c_proj/weight:0', 'transformer/h_._9/mlp/c_fc/bias:0', 'transformer/h_._8/mlp/c_proj/bias:0', 'transformer/h_._7/mlp/c_proj/bias:0', 'transformer/h_._6/attn/c_attn/bias:0', 'transformer/h_._7/ln_1/gamma:0', 'transformer/h_._8/ln_2/gamma:0', 'transformer/h_._10/ln_1/gamma:0', 'transformer/h_._9/attn/c_attn/bias:0', 'transformer/h_._9/ln_1/beta:0', 'transformer/h_._7/attn/c_attn/bias:0', 'transformer/h_._6/attn/c_proj/bias:0', 'transformer/h_._8/ln_1/beta:0', 'transformer/h_._11/mlp/c_proj/weight:0', 'transformer/h_._10/ln_2/gamma:0', 'transformer/h_._7/attn/c_proj/bias:0', 'transformer/h_._8/attn/c_attn/bias:0', 'transformer/h_._10/attn/c_proj/bias:0', 'transformer/h_._10/attn/c_proj/weight:0', 'transformer/h_._10/attn/c_attn/weight:0', 'transformer/h_._11/ln_2/beta:0', 'transformer/h_._11/attn/c_attn/weigh

In [7]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2', lang='fr')
# tokenizer.pad_token = tokenizer.eos_token
# model.resize_token_embeddings(len(tokenizer))
# model.config.pad_token_id = model.config.eos_token_id

In [77]:
input_sequences = []
target_sequences = []
max_length = 768
for i in range(len(data)):
    recipe = data.iloc[[i]].to_dict("records")[0]
    inputs = tokenizer.encode(recipe["Ingredients"], truncation = True,max_length = max_length,return_tensors="tf")
    targets = tokenizer.encode(recipe["recettes"], truncation = True,max_length = max_length,return_tensors="tf")
    input_sequences.append(inputs)
    target_sequences.append(targets)

In [22]:
max_len_input = max([input_sequences[i].shape[1] for i in range(len(input_sequences))])
max_len_target = max([target_sequences[i].shape[1] for i in range(len(target_sequences))])

In [36]:
target_sequences[0]

<tf.Tensor: shape=(1, 128), dtype=int32, numpy=
array([[   82,   280,   431, 11555,   448,  1453,  1787, 49399,  1313,
          279,  2002,   274,  1059,   260,   198, 27682, 29350,    67,
         2334,  1058,   220,   198,    17,   279,  2002,   274,  1059,
          260,   220,   198,   362,   267,   570,   684,   289,  3694,
          220,   198,   220,   513,   308,   516,  8448, 31907,   289,
         4891,   274,   220,   198,   220,   838,   537,  1126,  1326,
         5306, 14234,   220,   198,   220,   352,   269,    13,    64,
           13,    66, 34611,   220,   198,   220,   352,   279,   924,
           68,  1928, 46395,   220,   198,   352,   730,    84,  8270,
          300,  2899,   959,   220,   198,   352,  1787, 49399,  1313,
          220,   198,   352, 23441, 35833, 23027,   279,   280,   293,
          198,    52,   301,   641,  2915,  1058,   198,    75,   280,
         2395,  5022, 23365,   458, 14220,   415, 40047, 46901,   487,
          415,   269, 13192, 

In [78]:
# max_length = max(max_len_input,max_len_target)

# Pad the input and target sequences to the maximum length
padded_input_sequences = []
padded_target_sequences = []
for inputs, targets in zip(input_sequences, target_sequences):
    padded_input = tf.keras.preprocessing.sequence.pad_sequences(inputs, maxlen=max_length, padding="post")
    padded_target = tf.keras.preprocessing.sequence.pad_sequences(targets, maxlen=max_length, padding="post")
    padded_input_sequences.append(padded_input)
    padded_target_sequences.append(padded_target)

In [79]:
input_sequences = padded_input_sequences
target_sequences = padded_target_sequences

In [80]:
dataset = tf.data.Dataset.from_tensor_slices((input_sequences, target_sequences))

In [81]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)

In [91]:
@tf.function
def train_step(inputs, targets):
    with tf.GradientTape() as tape:
        logits = model(inputs)[0]
        print("model predictions")
        logits = tf.slice(logits, [0, 0, 0], [-1, 1, -1])
        print("logits slicing")
        logits = tf.squeeze(logits, axis=1)
        print("model squeeze")
        targets = tf.cast(targets, dtype=tf.float32)
        print("target casting")
        # print(targets.shape)
        loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=targets)
        print("loss")
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss


In [92]:
from tqdm import tqdm
num_epochs = 10

for epoch in tqdm(range(num_epochs)):
    for inputs, targets in dataset:
        loss = train_step(inputs, targets)

  0%|          | 0/10 [00:00<?, ?it/s]

model predictions
logits slicing
model squeeze
target casting
loss


  0%|          | 0/10 [09:47<?, ?it/s]


KeyboardInterrupt: 

In [95]:

X_train, X_test, y_train, y_test = train_test_split(input_sequences ,target_sequences, test_size=0.2, random_state=42)

In [101]:
# Train the model
history = model.fit(
  X_train,
  y_train,
  batch_size=5,
  epochs=10,
  validation_data=(X_test, y_test),
  verbose=1
)

In [None]:
ingredients = "1 cup flour, 1 tsp baking powder, 1/2 tsp salt, 1/2 cup sugar, 1 egg, 1/2 cup milk, 2 tbsp butter"
generated_recipe = model.generate(input_ids=tokenizer.encode(ingredients), max_length=1024)

# 7. Save the fine-tuned model to disk
tf.saved_model.save(model, "recipe_generator")

In [None]:
config = GPT2Config.from_pretrained('gpt2')
config.output_hidden_states = False
model = TFGPT2Model.from_pretrained('gpt2',config=config)
# model.config = config
model.resize_token_embeddings(len(tokenizer))

All model checkpoint layers were used when initializing TFGPT2Model.

All the layers of TFGPT2Model were initialized from the model checkpoint at gpt2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2Model for predictions without further training.


<transformers.modeling_tf_utils.TFSharedEmbeddings at 0x2449ef70e20>

In [None]:
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)
y_train = np.expand_dims(y_train, axis=-1)
y_test = np.expand_dims(y_test, axis=-1)

In [99]:
# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.CategoricalAccuracy('accuracy')

model.compile(
  loss=loss,
  optimizer=optimizer,
  metrics=metric
)
print("model compiled")



model compiled


In [None]:
dataset = dataset.shuffle(768).batch(32)
history = model.fit(
  dataset,
  batch_size=5,
  epochs=10,
  validation_data=(X_test, y_test),
  verbose=1
)

In [8]:
max_length = 512
input_data = 
sequences = tokenizer.batch_encode_plus(data['recettes'].to_list(), padding=True, truncation=True, return_tensors='tf', max_length=max_length)

# num_classes = len(np.unique(y))
# y = tf.keras.utils.to_categorical(y, num_classes)

KeyboardInterrupt: ignored