# Blenderbot - 400M Fine Tuning
This Notebook contains a fine tune examples for huggingface - facebook/blenderbot-400M-distill

In [None]:
import pandas as pd
df = pd.read_json('/content/friends-1-227-Rachel-pair.json')
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM

In [None]:
def train():    # Test training with 3 epochs to see if the model is generating valid outputs
    print("Starting training...")
    model_name = 'facebook/blenderbot-400M-distill'
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)

    inputs = [str(i) for i in df['question'].values]
    outputs = [str(i) for i in df['answer'].values]

    # Tokenize the inputs and outputs
    tokenized_inputs = tokenizer(inputs, padding=True, truncation=True, return_tensors="tf", max_length=64)
    tokenized_outputs = tokenizer(outputs, padding=True, truncation=True, return_tensors="tf", max_length=64)

    # Prepare decoder input IDs and attention masks
    decoder_input_ids = tokenized_outputs["input_ids"][:, :-1]
    decoder_attention_mask = tokenized_outputs["attention_mask"][:, :-1]

    # Check shapes
    print("Input IDs shape:", tokenized_inputs["input_ids"].shape)
    print("Decoder Input IDs shape:", decoder_input_ids.shape)

    # Define training parameters
    batch_size = 16
    epochs = 3
    optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
    
    #model.compile(optimizer=optimizer, loss=model.compute_loss)
    model.compile(optimizer=optimizer, loss=model.hf_compute_loss)

    # Shift the labels to match the decoder input IDs
    labels = tokenized_outputs.input_ids[:, 1:]  # Shift to align with the logits

    # Train the model
    model.fit(
        [tokenized_inputs.input_ids, tokenized_inputs.attention_mask, decoder_input_ids, decoder_attention_mask],
        labels,
        batch_size=batch_size,
        epochs=epochs
    )

    model.save_pretrained('trained_blenderbot_model')

train()


In [None]:
def test():
    model_name = 'facebook/blenderbot-400M-distill'
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = TFAutoModelForSeq2SeqLM.from_pretrained('/content/trained_blenderbot_model')

    inputs = [
              "Do you remember the day we had dessert?",
              "Come on Rach, you don't have what, ten minutes?",
              "Rach, are you okay?",
              "Hey uh, Rach, funny story. I ah, bumped into Joanna on the street yesterday.",
              "Rachel always cries!",
              "We were on a break!",
              "Oh, Rachel,  actually, y'know what, forget it.",
              ]
    for input in inputs:
        current_input = tokenizer(input, return_tensors="tf", padding=True, truncation=True, max_length=64)
        output = model.generate(current_input["input_ids"], max_length=64)[0]
        print(input)
        print(tokenizer.decode(output))


*   Model: /facebook/blenderbot-400M
*   Model online demo linki : https://huggingface.co/facebook/blenderbot-90M
*   epoch / sec: 1m20sec
*   epoch: 3
*   loss: start: 1.4436 / end: 0.6467
*   lr: 5e-5
*   batch size: 16
*   bit: 285ms/step

1.   Come on Rach, you don't have what, ten minutes?
*   Çıktı: I do, I have ten minutes, but I have to go to the dentist.


2.   Rach, are you okay?
*   Çıktı: Yeah, I'm fine. I was just a little shaken up, but I was fine.

3.   Hey uh, Rach, funny story. I ah, bumped into Joanna on the street yesterday.
*   Çıktı: Oh my gosh! Did you see her? Did you talk to her? What did she say?

4.   Rachel always cries!
*   Çıktı: I know, I know. I was just trying to be nice and let you know that I'm sorry.

5.   We were on a break!
*   Çıktı: Breaks are the best! What did y'all do on the break? Did you guys do anything fun?

6.   Oh, Rachel,  actually, y'know what, forget it.
*   Çıktı: I can’t, I have to go to work. I’m gonna miss you.




In [None]:
def train():
    print("Starting training...")
    model_name = 'facebook/blenderbot-400M-distill'
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)

    inputs = [str(i) for i in df['question'].values]
    outputs = [str(i) for i in df['answer'].values]

    # Tokenize the inputs and outputs
    tokenized_inputs = tokenizer(inputs, padding=True, truncation=True, return_tensors="tf", max_length=64)
    tokenized_outputs = tokenizer(outputs, padding=True, truncation=True, return_tensors="tf", max_length=64)

    # Prepare decoder input IDs and attention masks
    decoder_input_ids = tokenized_outputs["input_ids"][:, :-1]
    decoder_attention_mask = tokenized_outputs["attention_mask"][:, :-1]

    # Define training parameters
    batch_size = 32
    epochs = 10
    optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
    #model.compile(optimizer=optimizer, loss=model.compute_loss)
    model.compile(optimizer=optimizer, loss=model.hf_compute_loss)

    # Shift the labels to match the decoder input IDs
    labels = tokenized_outputs.input_ids[:, 1:]  # Shift to align with the logits

    # Train the model
    model.fit(
        [tokenized_inputs.input_ids, tokenized_inputs.attention_mask, decoder_input_ids, decoder_attention_mask],
        labels,
        batch_size=batch_size,
        epochs=epochs
    )

    model.save_pretrained('trained_blenderbot_model')

train()


In [None]:
test()

*   Model: /facebook/blenderbot-400M
*   Model online demo linki : https://huggingface.co/facebook/blenderbot-90M
*   epoch / sec: 1m13sec
*   epoch: 10
*   loss: start: 1.8983 / end: 0.1114
*   lr: 5e-5
*   batch size: 32
*   bit: 490ms/step

1.   Come on Rach, you don't have what, ten minutes?
*   Çıktı: I don't have ten minutes! I can't go ten minutes, I have to wait for you!


2.   Rach, are you okay?
*   Çıktı: Yeah, I'm fine. I just don't know what the guy was thinking. I thought he was going to hit me.

3.   Hey uh, Rach, funny story. I ah, bumped into Joanna on the street yesterday.
*   Çıktı: Joanna on the street? That's so cool! How did you know she was there?

4.   Rachel always cries!
*   Çıktı: I do not want you to be the only one that has ever cried in my entire life!

5.   We were on a break!
*   Çıktı: Oh, so was I! And I was on a break with Ralph Lauren!

6.   Oh, Rachel,  actually, y'know what, forget it.
*   Çıktı: I’ll ask him for you, if you want me to give him a heads-off?

7.   Do you remember the day we had dessert?
*   Çıktı: I do! I remember the first time we had ice cream. Do you remember the last time you had a piece?



In [None]:
def train():
    print("Starting training...")
    model_name = 'facebook/blenderbot-400M-distill'
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)

    inputs = [str(i) for i in df['question'].values]
    outputs = [str(i) for i in df['answer'].values]

    # Tokenize the inputs and outputs
    tokenized_inputs = tokenizer(inputs, padding=True, truncation=True, return_tensors="tf", max_length=64)
    tokenized_outputs = tokenizer(outputs, padding=True, truncation=True, return_tensors="tf", max_length=64)

    # Prepare decoder input IDs and attention masks
    decoder_input_ids = tokenized_outputs["input_ids"][:, :-1]
    decoder_attention_mask = tokenized_outputs["attention_mask"][:, :-1]

    # Define training parameters
    batch_size = 16
    epochs = 50
    optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
    #model.compile(optimizer=optimizer, loss=model.compute_loss)
    model.compile(optimizer=optimizer, loss=model.hf_compute_loss)

    # Shift the labels to match the decoder input IDs
    labels = tokenized_outputs.input_ids[:, 1:]  # Shift to align with the logits

    # Train the model
    model.fit(
        [tokenized_inputs.input_ids, tokenized_inputs.attention_mask, decoder_input_ids, decoder_attention_mask],
        labels,
        batch_size=batch_size,
        epochs=epochs
    )

    model.save_pretrained('trained_blenderbot_model')

train()


*   Model: /facebook/blenderbot-400M
*   Model online demo linki : https://huggingface.co/facebook/blenderbot-400M
*   epoch / sec: 1m13sec
*   epoch: 50
*   loss: start:
*   lr: 5e-5
*   batch size: 16
*   bit: 285ms/step

1.   Come on Rach, you don't have what, ten minutes?
*   Çıktı: do I have ten minutes?!!  Fasten your seat,  pass me the seat, give me ten minutes!!


2.   Rach, are you okay?
*   Çıktı: yeah, it’s just that I’m a little bummed that my sweater’s ruined.

3.   Hey uh, Rach, funny story. I ah, bumped into Joanna on the street yesterday.
*   Çıktı: That was so awkward, I can’t believe you didn’t say, Joanna?

4.   Rachel always cries!
*   Çıktı: Don’t cry!  I’m sorry, but Phoebe, that’s not true!

5.   We were on a break!
*   Çıktı: And y’know Ross why don’t you put that on your answering machine?!

6.   Oh, Rachel,  actually, y'know what, forget it.
*   Çıktı: I’ll ask him for you, if you give me your phone, I will call you right now.

7.   Do you remember the day we had dessert?
*   Çıktı: you mean the dessert that we had?  I remember how we had ice cream on the first date.



In [None]:
test()

In [None]:
def train():
    print("Starting training...")
    model_name = 'facebook/blenderbot-400M-distill'
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)

    inputs = [str(i) for i in df['question'].values]
    outputs = [str(i) for i in df['answer'].values]

    # Tokenize the inputs and outputs
    tokenized_inputs = tokenizer(inputs, padding=True, truncation=True, return_tensors="tf", max_length=64)
    tokenized_outputs = tokenizer(outputs, padding=True, truncation=True, return_tensors="tf", max_length=64)

    # Prepare decoder input IDs and attention masks
    decoder_input_ids = tokenized_outputs["input_ids"][:, :-1]
    decoder_attention_mask = tokenized_outputs["attention_mask"][:, :-1]

    # Define training parameters
    batch_size = 16
    epochs = 100
    optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
    #model.compile(optimizer=optimizer, loss=model.compute_loss)
    model.compile(optimizer=optimizer, loss=model.hf_compute_loss)

    # Shift the labels to match the decoder input IDs
    labels = tokenized_outputs.input_ids[:, 1:]  # Shift to align with the logits

    # Train the model
    model.fit(
        [tokenized_inputs.input_ids, tokenized_inputs.attention_mask, decoder_input_ids, decoder_attention_mask],
        labels,
        batch_size=batch_size,
        epochs=epochs
    )

    model.save_pretrained('trained_blenderbot_model')

train()


In [None]:
test()

*   Model: /facebook/blenderbot-400M
*   Model online demo linki : https://huggingface.co/facebook/blenderbot-400M
*   epoch / sec: 1m20sec
*   epoch: 100
*   loss: start: 1.4188 end: 0.0033
*   lr: 5e-5
*   batch size: 16
*   bit: 280ms/step

1.   Come on Rach, you don't have what, ten minutes?
*   Çıktı: yeah, I told you, it's about the fact that I don't have ten minutes left.


2.   Rach, are you okay?
*   Çıktı: I’m fine, but could you please make sure that she’s wearing her own?

3.   Hey uh, Rach, funny story. I ah, bumped into Joanna on the street yesterday.
*   Çıktı: y'know I had a dream last night, and I can't get it out of my head!

4.   Rachel always cries!
*   Çıktı: Thanks, but y’know what? I don’t want to be known as a pushover.

5.   We were on a break!
*   Çıktı: ’s not funny, Monica! This is about me! I washing the dishes!

6.   Oh, Rachel,  actually, y'know what, forget it.
*   Çıktı: wait, Chandler, this is in the hallway. I mean, that’s about seven seconds.

7.   Do you remember the day we had dessert?
*   Çıktı: because you remember the day you had a dessert with... a bird just grabbed it, and then tried to fly away with it and, and then just dropped it on the street?

