## pip install transformers


In [1]:
pip install transformers



In [3]:
# Import the necessary library
from transformers import pipeline

# Initialize the summarization pipeline with T5 model
summarizer = pipeline("summarization", model="t5-small")

# Define a long text that needs to be summarized
text = """
It’s essential to understand precisely what data is. Most likely, numbers and figures
come to mind when you hear “data.” But it’s so much more than that. Text snippets,
images, and videos are also classified as data. This is important because it means you
have options regarding the type of data you can collect. For example, you can run a text
or sentiment analysis on social media comments to gain insight into your customers’
thoughts and feelings regarding your product or service. In other words, you can count it
as data if it’s trackable.
"""

# Generate a summary of the input text
summary = summarizer(text, max_length=50, min_length=20, do_sample=False)

# Print the summarized output
print("Summary:", summary[0]['summary_text'])

Summary: text snippets, images, and videos are also classified as data . this is important because it means you have options regarding the type of data you collect .


## Handling Large Texts

In [4]:
def chunk_text(text, max_length=500):
    """Splits long text into smaller chunks."""
    words = text.split()
    for i in range(0, len(words), max_length):
        yield " ".join(words[i:i + max_length])

# Summarize each chunk individually
full_summary = ""
for chunk in chunk_text(text):
    summary = summarizer(chunk, max_length=50, min_length=20, do_sample=False)
    full_summary += summary[0]['summary_text'] + " "

print("Complete Summary:", full_summary)

Complete Summary: text snippets, images, and videos are also classified as data . this is important because it means you have options regarding the type of data you collect . 


## Using Other Summarization Models

In [5]:
# Load BART model for summarization
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



## Deploying the Summarizer with Streamlit

In [6]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.39.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<6,>=2.1.5 (from streamlit)
  Downloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading streamlit-1.39.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [3

## Create a streamlit_app.py

In [7]:
import streamlit as st
from transformers import pipeline

# Initialize summarization model
summarizer = pipeline("summarization", model="t5-small")

st.title("Text Summarization App")

# User input text
text = st.text_area("Enter text to summarize:")

if st.button("Summarize"):
    if text:
        summary = summarizer(text, max_length=50, min_length=20, do_sample=False)
        st.write("Summary:", summary[0]['summary_text'])
    else:
        st.write("Please enter some text.")

2024-10-12 19:08:45.094 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2024-10-12 19:08:45.134 Session state does not function when running a script without `streamlit run`


## Evaluation Metrics for Summarization Models

In [8]:
pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=d9b4e2610747e7c4083106a156107d6185f88b56a9cc5e124a8770e8c72007dd
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [9]:
from rouge_score import rouge_scorer

scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
scores = scorer.score(
    "ChatGPT is a language model by OpenAI.",
    "OpenAI developed ChatGPT, a language model."
)
print(scores)

{'rouge1': Score(precision=0.8333333333333334, recall=0.7142857142857143, fmeasure=0.7692307692307692), 'rougeL': Score(precision=0.6666666666666666, recall=0.5714285714285714, fmeasure=0.6153846153846153)}


## This project demonstrates the transformers, summarize text effectively, and deploy models in real-world applications. It also highlights fine-tune pre-trained models and use pipelines efficiently.