In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "distilgpt2"
device = "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model= AutoModelForCausalLM.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  

In [10]:
import pandas as pd
import ast


df = pd.read_csv('sampled_recipes.csv')
df.dropna(subset=['name', 'steps', 'ingredients'], inplace=True)


df['steps'] = df['steps'].apply(ast.literal_eval)
df['ingredients'] = df['ingredients'].apply(ast.literal_eval)


def format_example(row):
    ingredients = ', '.join(row['ingredients'])
    steps = '\n'.join([f"{i+1}. {step}" for i, step in enumerate(row['steps'])])
    return f"Ingredients: {ingredients}\nRecipe Name: {row['name']}\nSteps:\n{steps}"

df['text'] = df.apply(format_example, axis=1)





In [None]:
from datasets import Dataset


def map_dataset(examples):
     return tokenizer(
        examples["text"],

        truncation=True,

        # max_length=512
    )


dataset = Dataset.from_pandas(df[['text']])
dataset = dataset.map(map_dataset, batched=True, batch_size=8)

dataset = dataset.train_test_split(test_size=0.1)


In [None]:
# dataset

In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=False
)


In [None]:
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer

model = AutoModelForCausalLM.from_pretrained(model_name)

training_args = TrainingArguments(
    output_dir="./submission",
    learning_rate=2e-5,
    logging_steps=100,
    num_train_epochs=3,
    weight_decay=0.01,
    no_cuda=True, 

 
)

trainer = Trainer(
    model=model,
    train_dataset=dataset["train"],
    args=training_args,
    data_collator=data_collator,
    eval_dataset=dataset["test"]


)


In [None]:
trainer.train()


In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch

model_path = './submission/checkpoint-675'
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained(model_path)
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

ingredients_list = ["chicken", "salt", "pepper"]
prompt = f"Ingredients: {', '.join(ingredients_list)}\nRecipe Name:"

inputs = tokenizer(prompt, return_tensors="pt").to(device)

output = model.generate(
    inputs.input_ids,
        max_length=500,
    num_return_sequences=1,
    no_repeat_ngram_size=2,

  eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id 
)

generated_text = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
print(generated_text)


Ingredients: chicken, salt, pepper
Recipe Name: turkey casserole
Steps:
1. preheat oven to 350f
2. in a large bowl , combine chicken , salt , pepper , and pepper and combine well
3. add chicken and salt and cook for about 5 minutes
4. remove chicken from heat and stir in salt
5. stir well and serve with a side of chicken
6. enjoy !
7. you can also use any of the ingredients you like ! you may also like: bacon , bacon and bacon
8. if you prefer , you might also enjoy: browned chicken with salt or pepper or brown chicken on top of bacon or bacon on bottom of a plate
9. make sure to add the bacon to the plate and bake for 10 minutes or until the chicken is golden brown
10. serve immediately !
