In [1]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import os
import torch


from google.colab import drive
drive.mount('/content/drive')


model_name = "google/flan-t5-small"
local_dir = "/content/flan_model"
drive_dir = "/content/drive/MyDrive/flan_model"

print("Downloading model and tokenizer...")
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)


os.makedirs(local_dir, exist_ok=True)
tokenizer.save_pretrained(local_dir)
model.save_pretrained(local_dir)
print(f"✅ Model saved to {local_dir}")


os.makedirs(drive_dir, exist_ok=True)
tokenizer.save_pretrained(drive_dir)
model.save_pretrained(drive_dir)
print(f"✅ Model also saved to {drive_dir}")

print("Loading model from local directory...")
tokenizer = T5Tokenizer.from_pretrained(local_dir)
model = T5ForConditionalGeneration.from_pretrained(local_dir)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"✅ Using device: {device}")


prompt = "summarize: Machine learning is a subfield of artificial intelligence concerned with algorithms that learn from data."
inputs = tokenizer(prompt, return_tensors='pt').to(device)

print("Generating response...")
outputs = model.generate(
    **inputs,
    max_new_tokens=50,
    do_sample=True,
    temperature=0.7,
    top_p=0.9
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("\nGenerated Output:")
print(response)


while True:
    try:
        user_prompt = input("\nEnter a prompt (or type 'exit' to stop): ")
        if user_prompt.lower() == "exit":
            print("Exiting.")
            break

        if not user_prompt.strip():
            print("Empty prompt. Try again.")
            continue

        inputs = tokenizer(user_prompt, return_tensors='pt').to(device)

        outputs = model.generate(
            **inputs,
            max_new_tokens=50,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )

        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print("Response:")
        print(response)

    except KeyboardInterrupt:
        print("\nInterrupted. Exiting.")
        break
    except Exception as e:
        print(f"Error: {e}")


Mounted at /content/drive
Downloading model and tokenizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

✅ Model saved to /content/flan_model
✅ Model also saved to /content/drive/MyDrive/flan_model
Loading model from local directory...
✅ Using device: cuda
Generating response...

Generated Output:
Artificial intelligence is an important tool in artificial intelligence applications.

Enter a prompt (or type 'exit' to stop): who is prime minister of india?
Response:
Ramaram Singh

Enter a prompt (or type 'exit' to stop): president of india?
Response:
President of India

Enter a prompt (or type 'exit' to stop): exit
Exiting.


for fine tuning **flant5**

In [2]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.

In [10]:
from datasets import load_dataset
from transformers import T5Tokenizer,T5ForConditionalGeneration
import transformers
import os
os.environ["WANDB_DISABLED"] = "true"



#load dataset
dataset= load_dataset('json',data_files='/content/drive/MyDrive/data.json')

dataset=dataset["train"].train_test_split(test_size=0.1)

#tokenise dataset

tokenizer=T5Tokenizer.from_pretrained('/content/drive/MyDrive/flan_model')

def tokenize(batch):
    input_enc=tokenizer(batch["input"],padding="max_length",truncation=True,max_length=128)
    output_enc=tokenizer(batch["output"],padding="max_length",truncation=True,max_length=64)


    labels = output_enc["input_ids"]
    labels = [[(token if token != tokenizer.pad_token_id else -100) for token in label_seq] for label_seq in labels]

    input_enc["labels"] = labels
    return input_enc
tokenized_dataset = dataset.map(tokenize, batched=True)

#fine tuning

model=T5ForConditionalGeneration.from_pretrained('google/flan-t5-small')
args=transformers.TrainingArguments(
    output_dir="/content/drive/MyDrive/flan_model_finetuned",
    run_name="flan_finetune_run",
    per_device_train_batch_size=10,
    per_device_eval_batch_size=6,
    num_train_epochs=20,
    eval_strategy="epoch",
    save_total_limit=2,
    fp16=True,
    logging_dir="./logs",
    remove_unused_columns=False,

)

print('machine is learning')

trainer=transformers.Trainer(model=model,
                args=args,
                train_dataset=tokenized_dataset["train"],
                eval_dataset=tokenized_dataset['test'])

trainer.train()

trainer.save_model("/content/drive/MyDrive/flan_model_finetuned/final_model")
tokenizer.save_pretrained("/content/drive/MyDrive/flan_model_finetuned/final_model")

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


machine is learning


Epoch,Training Loss,Validation Loss
1,No log,
2,No log,
3,No log,
4,No log,
5,No log,
6,No log,
7,No log,
8,No log,
9,No log,
10,No log,


('/content/drive/MyDrive/flan_model_finetuned/final_model/tokenizer_config.json',
 '/content/drive/MyDrive/flan_model_finetuned/final_model/special_tokens_map.json',
 '/content/drive/MyDrive/flan_model_finetuned/final_model/spiece.model',
 '/content/drive/MyDrive/flan_model_finetuned/final_model/added_tokens.json')

testing the pretrained model

In [11]:

from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration

model_path = "/content/drive/MyDrive/flan_model_finetuned/final_model"

tokenizer = T5Tokenizer.from_pretrained(model_path, local_files_only=True)
model = T5ForConditionalGeneration.from_pretrained(model_path, local_files_only=True)

# Create the pipeline with the loaded model and tokenizer
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)



while True:
    prompt=input('>>>>')
    if prompt.lower() =='exit':
        break
    out=pipe(prompt,max_new_tokens=50)
    print("\n Response:", out[0]['generated_text'])

Device set to use cuda:0


>>>>Who is the Prime Minister of India?

 Response: samuel saraj


KeyboardInterrupt: Interrupted by user