In [None]:
!pip install datasets

In [None]:
!pip install --upgrade datasets

In [None]:
from datasets import load_dataset

ds = load_dataset("knkarthick/dialogsum")

In [None]:
ds

In [None]:
ds['train'][1]['dialogue']

# Without Fine Tunning The Dataset

In [None]:
!pip install transformers

In [None]:
from transformers import pipeline

In [None]:
# here i am using the summarization to make the text summarize

In [None]:
pipe = pipeline('summarization' , model='facebook/bart-large-cnn')

In [None]:
article_1 = ds['train'][1]['dialogue']


In [None]:
pipe(article_1, max_length=40,min_length=10,do_sample=False)

In [None]:
ds['train'][1]['summary']

# With Fine Tunning The Dataset

In [None]:
# This type of method we are using the BART transformers this way
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer

# Load tokenizer and TensorFlow-compatible model
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-cnn')
model = TFAutoModelForSeq2SeqLM.from_pretrained('facebook/bart-large-cnn')


In [None]:
def preprocess(batch):
    source = batch['dialogue']
    target = batch['summary']

    # Tokenize inputs and labels
    model_inputs = tokenizer(source, max_length=128, padding="max_length", truncation=True)
    labels = tokenizer(target, max_length=128, padding="max_length", truncation=True)

    # Replace pad tokens with -100 in labels
    label_ids = [
        [token if token != tokenizer.pad_token_id else -100 for token in label]
        for label in labels["input_ids"]
    ]

    model_inputs["labels"] = label_ids
    return model_inputs

In [None]:
ds = ds.map(preprocess, batched=True)

In [None]:
train_dataset = ds['train'].to_tf_dataset(
    columns=["input_ids", "attention_mask"],
    label_cols=["labels"],
    shuffle=True,
    batch_size=8,
)

In [None]:
val_dataset = ds['test'].to_tf_dataset(
    columns=["input_ids", "attention_mask"],
    label_cols=["labels"],
    shuffle=False,
    batch_size=8,
)

In [None]:
# Compile the model
import tensorflow as tf
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)
model.compile(optimizer=optimizer)

In [None]:
model.fit(train_dataset, validation_data=val_dataset, epochs=2)

# Saving The Model

In [None]:
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer

# Save model and tokenizer to a directory
model.save_pretrained('/content/model_directory')
tokenizer.save_pretrained('/content/model_directory')

In [None]:
model = TFAutoModelForSeq2SeqLM.from_pretrained('/content/model_directory')
tokenizer = AutoTokenizer.from_pretrained('/content/model_directory')

In [None]:
def summarize(text):
    # Tokenize input using TensorFlow tensors
    inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors='tf')

    # Generate summary
    summary_ids = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_length=150,
        min_length=40,
        length_penalty=2.0,
        num_beams=4,
        early_stopping=True
    )

    # Decode generated tokens to text
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)


In [None]:
text = """ React (also known as React.js or ReactJS) is a free and open-source front-end JavaScript library[5][6] that aims to make building user interfaces based on components more "seamless".[5] It is maintained by Meta (formerly Facebook) and a community of individual developers and companies.[7][8][9]

React can be used to develop single-page, mobile, or server-rendered applications with frameworks like Next.js and Remix[a]. Because React is only concerned with the user interface and rendering components to the DOM, React applications often rely on libraries for routing and other client-side functionality.[11][12] A key advantage of React is that it only re-renders those parts of the page that have changed, avoiding unnecessary re-rendering of unchanged DOM elements."""
summary = summarize(text)
print(f'Summary: {summary}')

In [None]:
!pip install gradio

In [None]:
import gradio as gr
iface = gr.Interface(
    fn=summarize,
    inputs=gr.Textbox(lines=10, label="Enter Text to Summarize"),
    outputs=gr.Textbox(label="Summary"),
    title="Text Summarizer",
    description="This app uses a TensorFlow Transformer model to summarize long text into concise form."
)

# Launch the app
iface.launch()