In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import os
import torch


from google.colab import drive
drive.mount('/content/drive')


model_name = "google/flan-t5-small"
local_dir = "/content/flan_model"
drive_dir = "/content/drive/MyDrive/flan_model"

print("Downloading model and tokenizer...")
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)


os.makedirs(local_dir, exist_ok=True)
tokenizer.save_pretrained(local_dir)
model.save_pretrained(local_dir)
print(f"✅ Model saved to {local_dir}")


os.makedirs(drive_dir, exist_ok=True)
tokenizer.save_pretrained(drive_dir)
model.save_pretrained(drive_dir)
print(f"✅ Model also saved to {drive_dir}")

print("Loading model from local directory...")
tokenizer = T5Tokenizer.from_pretrained(local_dir)
model = T5ForConditionalGeneration.from_pretrained(local_dir)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"✅ Using device: {device}")


prompt = "summarize: Machine learning is a subfield of artificial intelligence concerned with algorithms that learn from data."
inputs = tokenizer(prompt, return_tensors='pt').to(device)

print("Generating response...")
outputs = model.generate(
    **inputs,
    max_new_tokens=50,
    do_sample=True,
    temperature=0.7,
    top_p=0.9
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("\nGenerated Output:")
print(response)


while True:
    try:
        user_prompt = input("\nEnter a prompt (or type 'exit' to stop): ")
        if user_prompt.lower() == "exit":
            print("Exiting.")
            break

        if not user_prompt.strip():
            print("Empty prompt. Try again.")
            continue

        inputs = tokenizer(user_prompt, return_tensors='pt').to(device)

        outputs = model.generate(
            **inputs,
            max_new_tokens=50,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )

        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print("Response:")
        print(response)

    except KeyboardInterrupt:
        print("\nInterrupted. Exiting.")
        break
    except Exception as e:
        print(f"Error: {e}")


for fine tuning **flant5**

In [None]:
!pip install datasets

In [None]:
from datasets import load_dataset
from transformers import T5Tokenizer,T5ForConditionalGeneration
import transformers
import os
os.environ["WANDB_DISABLED"] = "true"



#load dataset
dataset= load_dataset('json',data_files='/content/drive/MyDrive/data.json')

dataset=dataset["train"].train_test_split(test_size=0.1)

#tokenise dataset

tokenizer=T5Tokenizer.from_pretrained('/content/drive/MyDrive/flan_model')

def tokenize(batch):
    input_enc=tokenizer(batch["input"],padding="max_length",truncation=True,max_length=128)
    output_enc=tokenizer(batch["output"],padding="max_length",truncation=True,max_length=64)


    labels = output_enc["input_ids"]
    labels = [[(token if token != tokenizer.pad_token_id else -100) for token in label_seq] for label_seq in labels]

    input_enc["labels"] = labels
    return input_enc
tokenized_dataset = dataset.map(tokenize, batched=True)

#fine tuning

model=T5ForConditionalGeneration.from_pretrained('google/flan-t5-small')
args=transformers.TrainingArguments(
    output_dir="/content/drive/MyDrive/flan_model_finetuned",
    run_name="flan_finetune_run",
    per_device_train_batch_size=10,
    per_device_eval_batch_size=6,
    num_train_epochs=20,
    eval_strategy="epoch",
    save_total_limit=2,
    fp16=True,
    logging_dir="./logs",
    remove_unused_columns=False,

)

print('machine is learning')

trainer=transformers.Trainer(model=model,
                args=args,
                train_dataset=tokenized_dataset["train"],
                eval_dataset=tokenized_dataset['test'])

trainer.train()

trainer.save_model("/content/drive/MyDrive/flan_model_finetuned/final_model")
tokenizer.save_pretrained("/content/drive/MyDrive/flan_model_finetuned/final_model")

testing the pretrained model

In [None]:

from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration

model_path = "/content/drive/MyDrive/flan_model_finetuned/final_model"

tokenizer = T5Tokenizer.from_pretrained(model_path, local_files_only=True)
model = T5ForConditionalGeneration.from_pretrained(model_path, local_files_only=True)

# Create the pipeline with the loaded model and tokenizer
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)



while True:
    prompt=input('>>>>')
    if prompt.lower() =='exit':
        break
    out=pipe(prompt,max_new_tokens=50)
    print("\n Response:", out[0]['generated_text'])