In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "distilgpt2"
device = "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model= AutoModelForCausalLM.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pandas as pd
import ast


df = pd.read_csv('sampled_recipes.csv')
df.dropna(subset=['name', 'steps', 'ingredients'], inplace=True)


df['steps'] = df['steps'].apply(ast.literal_eval)
df['ingredients'] = df['ingredients'].apply(ast.literal_eval)


def format_example(row):
    ingredients = ', '.join(row['ingredients'])
    steps = '\n'.join([f"{i+1}. {step}" for i, step in enumerate(row['steps'])])
    return f"Ingredients: {ingredients}\nRecipe Name: {row['name']}\nSteps:\n{steps}"

df['text'] = df.apply(format_example, axis=1)





In [3]:
from datasets import Dataset


def map_dataset(examples):
     return tokenizer(
        examples["text"],

        truncation=True,

        # max_length=512
    )


dataset = Dataset.from_pandas(df[['text']])
dataset = dataset.map(map_dataset, batched=True, batch_size=8)

dataset = dataset.train_test_split(test_size=0.1)


Map: 100%|██████████| 2000/2000 [00:00<00:00, 3021.84 examples/s]


In [4]:
# dataset

In [5]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=False
)


In [6]:
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer

model = AutoModelForCausalLM.from_pretrained(model_name)

training_args = TrainingArguments(
    output_dir="./sub",
    learning_rate=2e-5,
    logging_steps=100,
    num_train_epochs=10,
    weight_decay=0.01,
    no_cuda=True, 

 
)

trainer = Trainer(
    model=model,
    train_dataset=dataset["train"],
    args=training_args,
    data_collator=data_collator,
    eval_dataset=dataset["test"]


)




In [7]:
trainer.train()


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
100,2.8038
200,2.6118
300,2.5139
400,2.4828
500,2.4412
600,2.4065
700,2.4196
800,2.3628
900,2.3642
1000,2.3184


TrainOutput(global_step=2250, training_loss=2.3618876546223957, metrics={'train_runtime': 17680.347, 'train_samples_per_second': 1.018, 'train_steps_per_second': 0.127, 'total_flos': 1673799618723840.0, 'train_loss': 2.3618876546223957, 'epoch': 10.0})

In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch

model_path = './checkpoint-2250'
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained(model_path)
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

ingredients_list = ["tomato", "broccoli",  "grape"]
prompt = f"Ingredients: {', '.join(ingredients_list)}\nRecipe Name:"

inputs = tokenizer(prompt, return_tensors="pt").to(device)

output = model.generate(
    inputs.input_ids,
    max_length=512,
    num_return_sequences=1,
    no_repeat_ngram_size=2,
    

  eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id 
)

generated_text = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
print(generated_text)


Ingredients: tomato, broccoli, grape
Recipe Name: broccoli salad
Steps:
1. preheat oven to 350 degrees
2. in a large bowl , combine broccoli , grape , broccoli and grape juice
3. mix well
4. add broccoli mixture to salad mixture
5. stir well , stirring constantly
6. serve immediately
7. enjoy !
8. you can also use any of the ingredients you like ! you may also like to add a little extra salt or pepper if you prefer ! if using a salad or a vegetable salad , you might also enjoy using it ! :)
9. if your salad is too hot , add the broccoli mix and toss to combine
10. garnish with a sprinkle of green chilies and a dash of salt and pepper
11. for the salad: combine the vegetables , greens , and tomatoes
12. sprinkle with the remaining ingredients
13. top with salad and serve ! !
