In [29]:
from transformers import T5Tokenizer , T5ForConditionalGeneration
import pandas as pd
from datasets import Dataset
from torch.optim import AdamW
import torch

model_name = "/content/drive/MyDrive/LinkedIn Chatbot/flan-t5-small"

tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

df=pd.read_csv("/content/drive/MyDrive/LinkedIn Chatbot/linkedIn.csv")
df = df.rename(columns={"Prompt": "prompt", "Response": "response"})
df = df[["prompt", "response"]]
df.columns = df.columns.str.strip().str.lower()
dataset=Dataset.from_pandas(df)
print(dataset.column_names)

input_tensors = []
df.dropna(subset=["prompt", "response"], inplace=True)
df["prompt"] = df["prompt"].astype(str)
df["response"] = df["response"].astype(str)


for i, row in df.iterrows():
    input_encoding = tokenizer(
        text=row["prompt"],
        max_length=128,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )

    target_encoding = tokenizer(
        text=row["response"],
        max_length=128,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )

    labels = target_encoding["input_ids"]
    labels[labels == tokenizer.pad_token_id] = -100
    input_tensors.append({
        "input_ids": input_encoding["input_ids"].squeeze(0),
        "attention_mask": input_encoding["attention_mask"].squeeze(0),
        "labels": labels.squeeze(0)
    })

optimizer = AdamW(model.parameters(), lr=5e-5)
model.train()

for epoch in range(5):
    for i, tokens in enumerate(input_tensors):
        input_ids = tokens["input_ids"].unsqueeze(0).to(model.device)
        attention_mask = tokens["attention_mask"].unsqueeze(0).to(model.device)
        labels = tokens["labels"].unsqueeze(0).to(model.device)
        output = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = output.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        print(f"Epoch {epoch+1}, Sample {i+1}, Loss: {loss.item():.4f}")

model.save_pretrained("/content/drive/MyDrive/LinkedIn Chatbot/chatbot")
tokenizer.save_pretrained("/content/drive/MyDrive/LinkedIn Chatbot/chatbot")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 3, Sample 506, Loss: 2.3294
Epoch 3, Sample 507, Loss: 2.3969
Epoch 3, Sample 508, Loss: 2.8360
Epoch 3, Sample 509, Loss: 1.5164
Epoch 3, Sample 510, Loss: 2.9322
Epoch 3, Sample 511, Loss: 2.0379
Epoch 3, Sample 512, Loss: 2.6418
Epoch 3, Sample 513, Loss: 1.6700
Epoch 3, Sample 514, Loss: 2.3931
Epoch 3, Sample 515, Loss: 2.0046
Epoch 3, Sample 516, Loss: 2.2953
Epoch 3, Sample 517, Loss: 2.5759
Epoch 3, Sample 518, Loss: 1.7665
Epoch 3, Sample 519, Loss: 2.1731
Epoch 3, Sample 520, Loss: 1.9864
Epoch 3, Sample 521, Loss: 1.8578
Epoch 3, Sample 522, Loss: 2.6043
Epoch 3, Sample 523, Loss: 2.0585
Epoch 3, Sample 524, Loss: 2.9048
Epoch 3, Sample 525, Loss: 2.7408
Epoch 3, Sample 526, Loss: 1.9959
Epoch 3, Sample 527, Loss: 1.7332
Epoch 3, Sample 528, Loss: 2.1729
Epoch 3, Sample 529, Loss: 2.7981
Epoch 3, Sample 530, Loss: 3.4425
Epoch 3, Sample 531, Loss: 2.7242
Epoch 3, Sample 532, Loss: 2.5243
Epoch 3, Sample 5

('/content/drive/MyDrive/LinkedIn Chatbot/chatbot/tokenizer_config.json',
 '/content/drive/MyDrive/LinkedIn Chatbot/chatbot/special_tokens_map.json',
 '/content/drive/MyDrive/LinkedIn Chatbot/chatbot/spiece.model',
 '/content/drive/MyDrive/LinkedIn Chatbot/chatbot/added_tokens.json')

In [31]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

model_path = "/content/drive/MyDrive/LinkedIn Chatbot/chatbot"
tokenizer = T5Tokenizer.from_pretrained('/content/drive/MyDrive/LinkedIn Chatbot/chatbot')
model = T5ForConditionalGeneration.from_pretrained('/content/drive/MyDrive/LinkedIn Chatbot/chatbot')

def generate_response(question, max_length=50):
    input_text = f"prompt: {prompt}"
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(**inputs, temperature=1.0, top_p=0.9, max_length=100,num_beams=4,do_sample=True)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

while True:
   prompt=input("Enter your prompt: ")
   if prompt=="end" or prompt=="End":
      break
   else:
      print(generate_response(prompt))


Enter your promptwrite a linkedIn post for achieving a certificate
Finished a certificate in [Topic] and earned a certificate in [Topic]! #CertifiedCertified #CertifiedCertified
Enter your promptend


In [None]:
!pip uninstall -y transformers
!pip install transformers --upgrade --force-reinstall

Found existing installation: transformers 4.53.2
Uninstalling transformers-4.53.2:
  Successfully uninstalled transformers-4.53.2
Collecting transformers
  Using cached transformers-4.53.2-py3-none-any.whl.metadata (40 kB)
Collecting filelock (from transformers)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.30.0 (from transformers)
  Downloading huggingface_hub-0.33.4-py3-none-any.whl.metadata (14 kB)
Collecting numpy>=1.17 (from transformers)
  Downloading numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting packaging>=20.0 (from transformers)
  Downloading packaging-25.0-py3-none-any.whl.metadata (3.3 kB)
Collecting pyyaml>=5.1 (from transformers)
  Downloading PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)
Collecting regex!=2019.12.17 (fro