# Training a Model with TensorFlow

In [1]:
import tensorflow as tf
import numpy as np
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
from datasets import load_dataset
import evaluate
import nltk

2025-05-31 01:33:02.903015: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-31 01:33:09.544362: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-05-31 01:33:09.557719: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-05-31 01:33:10.674308: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-31 01:33:13.108520: I tensorflow/core/platform/cpu_feature_guar

In [2]:
# Testing GPU Support
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


2025-05-31 01:34:08.617839: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-31 01:34:08.791186: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-31 01:34:08.791235: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


In [3]:
# Download NLTK punkt for sentence tokenization
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
# loading the dataset
dataset = load_dataset("cnn_dailymail", "3.0.0", cache_dir="./cache")
train_data = dataset['train']
validation_data = dataset['validation']
test_data = dataset['test']

  table = cls._concat_blocks(blocks, axis=0)


In [5]:
# Loading the model and tokenizer
model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir="./cache")
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name, cache_dir="./cache")

Downloading tokenizer_config.json: 100%|██████████| 2.32k/2.32k [00:00<00:00, 4.94MB/s]
Downloading spiece.model: 100%|██████████| 792k/792k [00:00<00:00, 1.22MB/s]
Downloading tokenizer.json: 100%|██████████| 1.39M/1.39M [00:00<00:00, 1.63MB/s]
Downloading config.json: 100%|██████████| 1.21k/1.21k [00:00<00:00, 2.63MB/s]
Downloading model.safetensors: 100%|██████████| 242M/242M [00:43<00:00, 5.61MB/s] 
2025-05-31 01:35:17.932641: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-31 01:35:17.932762: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-31 01:35:17.932789: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUM

In [29]:
# Saving the tokenizer
tokenizer.save_pretrained("./tokenizer")

('./tokenizer/tokenizer_config.json',
 './tokenizer/special_tokens_map.json',
 './tokenizer/tokenizer.json')

In [6]:
# Preprocessing function
def preprocess_function(examples):
    # Add prefix for T5 summarization task
    inputs = ["summarize: " + article for article in examples["article"]]
    model_inputs = tokenizer(
        inputs,
        max_length=128,
        truncation=True,
        padding="max_length",
        return_tensors="tf"
    )
    
    # Tokenize summaries (targets)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            examples["highlights"],
            max_length=128,
            truncation=True,
            padding="max_length",
            return_tensors="tf"
        )
    
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [7]:
# Apply preprocessing to datasets
train_dataset = train_data.map(preprocess_function, batched=True)
val_dataset = validation_data.map(preprocess_function, batched=True)
test_dataset = test_data.map(preprocess_function, batched=True)

Map: 100%|██████████| 287113/287113 [02:24<00:00, 1983.61 examples/s]
Map: 100%|██████████| 13368/13368 [00:05<00:00, 2264.87 examples/s]
Map: 100%|██████████| 11490/11490 [00:05<00:00, 2179.90 examples/s]


In [8]:
# Convert to TensorFlow datasets
def convert_to_tf_dataset(hf_dataset, batch_size=8):
    tf_dataset = tf.data.Dataset.from_tensor_slices((
        {
            "input_ids": hf_dataset["input_ids"],
            "attention_mask": hf_dataset["attention_mask"],
            "labels": hf_dataset["labels"]
        }
    ))
    return tf_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

train_tf_dataset = convert_to_tf_dataset(train_dataset)
val_tf_dataset = convert_to_tf_dataset(val_dataset)
test_tf_dataset = convert_to_tf_dataset(test_dataset)

In [9]:
# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)
model.compile(optimizer=optimizer)

# Training the model
history = model.fit(
    train_tf_dataset,
    validation_data=val_tf_dataset,
    epochs=1,
    batch_size=8
)

2025-05-31 01:40:23.196324: I external/local_xla/xla/service/service.cc:168] XLA service 0x7f95503ddee0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-05-31 01:40:23.197182: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Laptop GPU, Compute Capability 8.9
2025-05-31 01:40:23.441975: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-05-31 01:40:23.737888: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907
I0000 00:00:1748635823.969758    1531 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.




In [10]:
# saving the model
model.save_pretrained("./t5_small_summarization_model")

In [14]:
# Function to generate summaries
def generate_summary(text, model, tokenizer, max_length=128):
    input_text = "summarize: " + text
    inputs = tokenizer(
        input_text,
        max_length=512,
        truncation=True,
        padding="max_length",
        return_tensors="tf"
    )
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=max_length,
        num_beams=4,
        early_stopping=True
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [31]:
# Evaluation

rouge = evaluate.load("rouge")

# Evaluation function
def evaluate_model(dataset, model, tokenizer, num_samples=100):
    predictions = []
    references = []
    for i, example in enumerate(dataset):
        if i >= num_samples:
            break
        pred = generate_summary(example["article"], model, tokenizer)
        predictions.append(pred)
        references.append(example["highlights"])

    results = rouge.compute(predictions=predictions, references=references)
    return results


In [32]:
eval_results = evaluate_model(test_data.select(range(1)), model, tokenizer)
print("Evaluation Results:", eval_results)

Evaluation Results: {'rouge1': 0.4, 'rouge2': 0.23655913978494622, 'rougeL': 0.35789473684210527, 'rougeLsum': 0.35789473684210527}


In [28]:
# Testing with custom text

# Example custom text (replace with your own text)
custom_text = """
The rapid advancement of artificial intelligence (AI) is transforming industries worldwide. From healthcare to finance, AI technologies are being used to automate tasks, improve decision-making, and enhance user experiences. In healthcare, AI is helping doctors diagnose diseases with greater accuracy through image recognition and predictive analytics. In finance, algorithms are optimizing trading strategies and detecting fraud. However, concerns about job displacement, ethical implications, and data privacy remain significant challenges. Governments and organizations are working to establish regulations to ensure AI is used responsibly.
"""

# Generate summary
generated_summary = generate_summary(custom_text, model, tokenizer)
print("Custom Text (truncated to 500 chars):")
print(custom_text[:500] + "..." if len(custom_text) > 500 else custom_text)
print("\nGenerated Summary:")
print(generated_summary)

Custom Text (truncated to 500 chars):

The rapid advancement of artificial intelligence (AI) is transforming industries worldwide. From healthcare to finance, AI technologies are being used to automate tasks, improve decision-making, and enhance user experiences. In healthcare, AI is helping doctors diagnose diseases with greater accuracy through image recognition and predictive analytics. In finance, algorithms are optimizing trading strategies and detecting fraud. However, concerns about job displacement, ethical implications, and...

Generated Summary:
AI technologies are being used to automate tasks, improve decision-making, and enhance user experiences. In healthcare, AI is helping doctors diagnose diseases with greater accuracy through image recognition and predictive analytics. In finance, algorithms are optimizing trading strategies and detecting fraud. But concerns about job displacement, ethical implications, and data privacy remain significant challenges.
