<a href="https://colab.research.google.com/github/Bebe-Ai/Ai-Playground/blob/main/week3_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade transformers datasets

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer
from datasets import Dataset
import pandas as pd
import random


In [None]:
# Example auto-generated Q&A
questions = [
    "What is the delivery time?",
    "Do you ship internationally?",
    "How can I return a product?",
    "What payment methods do you accept?",
    "Is there a warranty?"
]

answers = [
    "Delivery usually takes 3-5 business days.",
    "Yes, we ship worldwide.",
    "You can return any product within 30 days.",
    "We accept credit cards, PayPal, and Apple Pay.",
    "All products have a 1-year warranty."
]

# Generate 500 fake examples
data = []
for i in range(500):
    q = random.choice(questions)
    a = answers[questions.index(q)]
    data.append({"question": q, "answer": a})

df = pd.DataFrame(data)

# Convert to Hugging Face Dataset
dataset = Dataset.from_pandas(df)

# Split train/test
dataset = dataset.train_test_split(test_size=0.2)
train_dataset = dataset["train"]
test_dataset = dataset["test"]

print(train_dataset[0])


In [None]:
tokenizer = AutoTokenizer.from_pretrained("t5-small")  # T5 is great for Q&A

def tokenize(batch):
    # Prefix “question: ” is common for T5-style tasks
    inputs = ["question: " + q for q in batch["question"]]
    targets = batch["answer"]
    model_inputs = tokenizer(inputs, padding=True, truncation=True, max_length=64)
    labels = tokenizer(targets, padding=True, truncation=True, max_length=64)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)


In [None]:
model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")


In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    report_to=[]  # This disables W&B (and any other reporting)
)


In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)


In [None]:
trainer.train()


In [None]:
def answer_question(question):
    input_text = "question: " + question
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=64)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test examples
print(answer_question("Do you ship internationally?"))
print(answer_question("How can I return a product?"))


In [None]:
model.save_pretrained("./customer_support_model")
tokenizer.save_pretrained("./customer_support_model")


In [None]:
def answer_question(question):
    input_text = "question: " + question
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=64)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test examples
print(answer_question("How long does the return time take?"))
print(answer_question("What type o products do you have?"))


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load your trained model
tokenizer = AutoTokenizer.from_pretrained("./customer_support_model")
model = AutoModelForSeq2SeqLM.from_pretrained("./customer_support_model")

# Function to ask the bot a question
def ask_bot(question):
    input_text = "question: " + question
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=80)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

# Interactive loop
print("💬 Customer Support Bot is ready! Type 'quit' to stop.\n")

while True:
    question = input("You: ")
    if question.lower() == "quit":
        print("Bot: Goodbye 👋")
        break
    answer = ask_bot(question)
    print("Bot:", answer)
