In [1]:
!python --version

Python 3.11.12


## Importing Libraries and API Setup


In [2]:
import json
import requests
from bs4 import BeautifulSoup
import re
from transformers import pipeline
from tqdm import tqdm
from google.colab import userdata

api_key = userdata.get('NewsAPI')
base_url = "https://newsapi.org/v2/everything"

# Possible models we could use
# facebook/bart-large-cnn
# facebook/bart-large-xsum
# google/pegasus-xsum
# google/pegasus-multi_news
# google/bigbird-pegasus-large-arxiv
# google/pegasus-cnn_dailymail

## Fetching Article Text and Getting Apple Finance Articles

In essence, this code retrieves news articles about Apple's finances from the News API, extracts relevant information, fetches the full content of each article, and stores all this data in a structured way.

In [3]:
def fetch_full_article_text(url):
    """
    The function fetches the full text content of an article from a given URL by extracting paragraphs
    or main article tags using BeautifulSoup in Python.

    :param url: The `fetch_full_article_text` function is designed to fetch the full text content of an
    article from a given URL. It uses the `requests` library to make an HTTP request to the URL and
    `BeautifulSoup` to parse the HTML content of the page
    :return: The function `fetch_full_article_text(url)` returns the full text content of the article
    found at the specified URL. If there is an error during the process of fetching and parsing the
    webpage, it will return `None`.
    """
    try:
        page = requests.get(url, timeout=5)
        soup = BeautifulSoup(page.content, "html.parser")

        # Extract paragraphs or main article tag
        paragraphs = soup.find_all("p")
        full_text = " ".join([p.get_text() for p in paragraphs])

        return full_text.strip()
    except Exception as e:
        print(f"Error fetching full text from {url}: {e}")
        return None

def get_apple_finance_articles(api_key, base_url, num_articles=5):
    """
    The function `get_apple_finance_articles` retrieves a specified number of finance articles related
    to Apple using an API key and base URL.

    :param api_key: The `api_key` parameter in the `get_apple_finance_articles` function is used to
    authenticate your access to the API. It is a unique identifier that allows you to make requests to
    the API and retrieve data related to Apple finance articles. You need to obtain this API key from
    the API
    :param base_url: The `base_url` parameter in the `get_apple_finance_articles` function is typically
    the URL endpoint of the API you are making a request to in order to fetch Apple finance articles. It
    should be a string representing the base URL of the API service you are using to retrieve the
    articles
    :param num_articles: The `num_articles` parameter in the `get_apple_finance_articles` function
    specifies the number of articles you want to retrieve related to Apple finance. By default, it is
    set to 5, but you can provide a different number when calling the function to fetch a specific
    number of articles, defaults to 5 (optional)
    :return: The function `get_apple_finance_articles` returns a list of dictionaries, where each
    dictionary represents information about an article related to "Apple finance". The dictionary
    contains keys such as "source", "author", "publishedAt", "title", "description", "url", "content",
    and "content_len" for each article. If there is an error during the process of fetching articles,
    it will return an empty list.
    """
    # Keywords and parameters for the API request
    query = "Apple finance"
    query_params = {
        "q": query,
        "apiKey": api_key,
        "language": "en",
        "sortBy": "relevancy",  # Sort results by relevancy
        "pageSize": num_articles  # Limit the number of results
    }

    try:
        response = requests.get(base_url, params=query_params)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        data = response.json()

        articles = []
        if data["status"] == "ok":
            for article in data["articles"]:
                source = article.get("source")
                author = article.get("author")
                publishedAt = article.get("publishedAt")
                title = article.get("title")
                description = article.get("description")
                url = article.get("url")
                content = fetch_full_article_text(url)
                content_len = len(content) if content else 0

                if title and description and url: # Ensure article data exists
                    article_info = {
                        "source": source,
                        "author": author,
                        "publishedAt": publishedAt,
                        "title": title,
                        "description": description,
                        "url": url,
                        "content": content,
                        "content_len": content_len
                    }
                    articles.append(article_info)

        return articles
    except requests.exceptions.RequestException as e:
        print(f"Error fetching articles: {e}")
        return []

# Get the `num_articles` most relevant articles
articles = get_apple_finance_articles(api_key, base_url, num_articles=100)

In [4]:
len(articles)

91

In [5]:
# Export articles to a JSON file
try:
    with open("articles.json", "w") as json_file:
        json.dump(articles, json_file, indent=4)  # Use indent for pretty printing
    print("Articles exported to articles.json")
except Exception as e:
    print(f"Error exporting articles to JSON: {e}")


Articles exported to articles.json


## Data Cleaning and Preprocessing

This part of the code focuses on cleaning and preparing the text of the articles for summarization. It involves two main functions: `clean_text` and `preprocess_articles`. It also defines two variables that dictate the length of tokens or text processed.

In [6]:
MAX_TOKENS = 1024  # model-specific, for BART/PEGASUS
MAX_CHARS = 3000   # rough estimate if tokenization is too heavy

def clean_text(text):
    """
    The `clean_text` function removes HTML tags, normalizes whitespace, removes references, comments,
    and URLs from the input text.

    :param text: The `clean_text` function takes a text input and performs several cleaning operations
    on it. It removes HTML tags, normalizes whitespace, removes references enclosed in square brackets,
    removes comments enclosed in parentheses, and removes URLs. Finally, it strips any leading or
    trailing whitespace before returning the cleaned text
    :return: The `clean_text` function returns the cleaned version of the input text after removing HTML
    tags, normalizing whitespace, removing references, comments, and URLs. The cleaned text is then
    stripped of leading and trailing whitespaces before being returned.
    """
    if not text:
        return ""
    text = re.sub(r"<.*?>", "", text)  # remove HTML tags
    text = re.sub(r"\s+", " ", text) # normalize whitespace
    text = re.sub(r"\[[^]]*\]", "", text)  # remove [references]
    text = re.sub(r"\([^)]*\)", "", text)  # remove (comments)
    text = re.sub(r"http\S+", "", text)    # remove URLs
    return text.strip()

def preprocess_articles(news_items):
    """
    The `preprocess_articles` function processes news articles by cleaning the content and filtering out
    unusable articles based on their length.

    :param news_items: The `preprocess_articles` function takes a list of news items as input. Each news
    item is a dictionary containing information about an article, such as its title, content, URL,
    source, author, publication date, description, and content length
    :return: The `preprocess_articles` function processes a list of news items and returns a new list of
    dictionaries containing information about each article. Each dictionary in the returned list
    includes the following keys:
    """
    processed = []
    for article in news_items:
        clean_content = clean_text(article.get("content", ""))
        if not clean_content or len(clean_content) < 200:
            clean_content = clean_text(article.get("description", ""))

        if not clean_content or len(clean_content) < 200:
            print("Article: \"" + article.get("title", "") + "\" is unsuable!")
            print("Content: \"" + article.get("content", "") + "\"")
            print("-" * 20)
            continue  # skip unusable articles

        # if len(clean_content) > MAX_CHARS:
        #     clean_content = clean_content[:MAX_CHARS]  # truncate

        processed.append({
            "title": article.get("title", ""),
            "url": article.get("url", ""),
            "source": article.get("source", {}).get("name", ""),
            "author": article.get("author", ""),
            "publishedAt": article.get("publishedAt", ""),
            "description": article.get("description", ""),
            "content": article.get("content", ""),
            "content_len": article.get("content_len", ""),
            "clean_content": clean_content,
            "clean_content_len": len(clean_content),
        })
    return processed

In [7]:
preprocessed_articles = preprocess_articles(articles)

Article: "Tesla must do these 3 on its earnings call, or else" is unsuable!
Content: ""
--------------------
Article: "Getting a tax refund from the CRA? Here's how you can get the most out of it" is unsuable!
Content: ""
--------------------
Article: "Bitcoin inches closer to flipping NVIDIA in stunning market surge" is unsuable!
Content: "Please enable JS and disable any ad blocker"
--------------------
Article: "Working from home is turning into 'return to the office or lose your bonus'" is unsuable!
Content: ""
--------------------
Article: "London rents rise to record high for 14th consecutive month" is unsuable!
Content: ""
--------------------
Article: "Stock market today: S&P 500, Nasdaq plunge, Dow drops 1,400 points as Trump's tariffs shock markets" is unsuable!
Content: ""
--------------------
Article: "Stock market today: Dow surges 600 points, S&P 500 has best week since 2023 to cap wild week of tariff-fueled chaos" is unsuable!
Content: ""
--------------------
Article: "I

In [8]:
len(preprocessed_articles)

83

In [9]:
preprocessed_articles[0]

{'title': 'Apple May Face Criminal Charges for Allegedly Lying to a Federal Judge',
 'url': 'https://www.wired.com/story/antitrust-judge-asks-doj-prosecute-apple/',
 'source': 'Wired',
 'author': 'Paresh Dave',
 'publishedAt': '2025-05-01T00:20:26Z',
 'description': 'A US judge says Apple deliberately chose not to comply with an order requiring it to loosen App Store rules—and then tried to cover up its disobedience.',
 'content': 'Apple “willfully chose not to comply” with a court order to loosen its app store restrictions—and one of its executives lied under oath about the company’s plans, a federal judge wrote on Wednesday. Judge Yvonne Gonzalez Rogers has referred the situation to the US Attorney’s Office in San Francisco “to investigate whether criminal contempt proceedings are appropriate.” In 2021, Gonzalez Rogers presided over a lawsuit brought by Fortnite developer Epic Games over the iPhone maker’s allegedly anticompetitive practices that hampered the ability of developers to

##Model Selection and Initialization

This code snippet is responsible for selecting the models that will be used for text summarization and initializing them for use.

In [None]:
# List of models we want to use
model_names = [
    "facebook/bart-large-cnn",
    # "facebook/bart-large-xsum",
    "google/pegasus-xsum",
    "google/pegasus-multi_news",
    # "google/bigbird-pegasus-large-arxiv",
    "google/pegasus-cnn_dailymail"
]

# Create summarizers for each model
summarizers = {model_name: pipeline("summarization", model=model_name, device=-1) for model_name in model_names}

In [11]:
def summarize_text(summarizer, text, max_input_length=1024, min_summary_length_ratio=0.1, max_summary_length_ratio=0.3):
    """
    This function takes a text input, truncates it to a specified length, dynamically calculates summary
    lengths based on input length, and then uses a summarizer model to generate a summary within the
    calculated length limits.

    :param summarizer: The `summarizer` parameter in the `summarize_text` function is expected to be a
    function or model that can generate a summary of a given input text. This function/model should take
    the input text, along with optional parameters like `max_length` and `min_length`, and return
    :param text: The `summarize_text` function takes a `summarizer` function, `text` to be summarized,
    and optional parameters for input length, minimum summary length ratio, and maximum summary length
    ratio
    :param max_input_length: The `max_input_length` parameter in the `summarize_text` function specifies
    the maximum number of characters allowed in the input text that will be passed to the summarizer for
    summarization. If the input text exceeds this length, it will be truncated to fit within this limit
    before being summarized, defaults to 1024 (optional)
    :param min_summary_length_ratio: The `min_summary_length_ratio` parameter in the `summarize_text`
    function represents the minimum ratio of the input text length that the summary should aim to be. It
    dynamically calculates the minimum length of the summary based on this ratio. The function ensures
    that the summary length is at least 20
    :param max_summary_length_ratio: The `max_summary_length_ratio` parameter in the `summarize_text`
    function determines the maximum length of the summary relative to the length of the input text. It
    is used to dynamically calculate the maximum length of the summary based on a ratio of the input
    text length. The function ensures that the
    :return: The function `summarize_text` returns the summarized text based on the input text using the
    provided summarizer model. If there is no content to summarize (empty text), it returns '[No content
    to summarize]'. If there is an error during the summarization process, it returns '[Error during
    summarization]'. Otherwise, it returns the generated summary of the input text within the specified
    length constraints based on the provided summarizer model.
    """
    if not text or text.strip() == '':
        return '[No content to summarize]'

    # Truncate to model's max input length
    input_text = text.strip()[:max_input_length]
    input_len = len(input_text.split())  # count words

    # Dynamically calculate summary lengths
    min_length = max(20, int(input_len * min_summary_length_ratio))  # at least 20 words
    max_length = max(min_length + 10, int(input_len * max_summary_length_ratio))  # ensure max > min

    # Clip to model max length limits
    max_length = min(max_length, 200)  # you can adjust cap
    min_length = min(min_length, max_length - 10) if max_length > 10 else min_length

    try:
        summary = summarizer(input_text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
    except Exception as e:
        print(f"⚠️ Error summarizing: {e}")
        summary = '[Error during summarization]'

    return summary

summarized_articles = []

for idx, article in enumerate(tqdm(preprocessed_articles, desc="Summarizing articles")):
    content = article.get('content', '') or ''
    title = article.get('title', '') or ''
    description = article.get('description', '') or ''

    input_text = content.strip()
    if not input_text:
        input_text = f"{title}. {description}".strip()

    if len(input_text) == 0:
        print(f"⚠️ Article {idx} has no content/title/description.")

    summarized_article = article.copy()  # copy original article

    # Summarize with each model
    for model_name, summarizer in summarizers.items():
        print(f"🔍 Summarizing article {idx} with {model_name}")
        summary = summarize_text(summarizer, input_text)
        summarized_article[f"summary_{model_name}"] = summary

    summarized_articles.append(summarized_article)

Summarizing articles:   0%|          | 0/83 [00:00<?, ?it/s]

🔍 Summarizing article 0 with facebook/bart-large-cnn
🔍 Summarizing article 0 with google/pegasus-xsum
🔍 Summarizing article 0 with google/pegasus-multi_news
🔍 Summarizing article 0 with google/pegasus-cnn_dailymail


Summarizing articles:   1%|          | 1/83 [01:36<2:11:20, 96.10s/it]

🔍 Summarizing article 1 with facebook/bart-large-cnn
🔍 Summarizing article 1 with google/pegasus-xsum
🔍 Summarizing article 1 with google/pegasus-multi_news
🔍 Summarizing article 1 with google/pegasus-cnn_dailymail


Summarizing articles:   2%|▏         | 2/83 [03:11<2:08:48, 95.41s/it]

🔍 Summarizing article 2 with facebook/bart-large-cnn
🔍 Summarizing article 2 with google/pegasus-xsum
🔍 Summarizing article 2 with google/pegasus-multi_news
🔍 Summarizing article 2 with google/pegasus-cnn_dailymail


Summarizing articles:   4%|▎         | 3/83 [04:21<1:52:15, 84.19s/it]

🔍 Summarizing article 3 with facebook/bart-large-cnn
🔍 Summarizing article 3 with google/pegasus-xsum
🔍 Summarizing article 3 with google/pegasus-multi_news
🔍 Summarizing article 3 with google/pegasus-cnn_dailymail


Summarizing articles:   5%|▍         | 4/83 [05:32<1:43:45, 78.80s/it]

🔍 Summarizing article 4 with facebook/bart-large-cnn
🔍 Summarizing article 4 with google/pegasus-xsum
🔍 Summarizing article 4 with google/pegasus-multi_news
🔍 Summarizing article 4 with google/pegasus-cnn_dailymail


Summarizing articles:   6%|▌         | 5/83 [06:54<1:44:01, 80.02s/it]

🔍 Summarizing article 5 with facebook/bart-large-cnn
🔍 Summarizing article 5 with google/pegasus-xsum
🔍 Summarizing article 5 with google/pegasus-multi_news
🔍 Summarizing article 5 with google/pegasus-cnn_dailymail


Summarizing articles:   7%|▋         | 6/83 [08:15<1:42:54, 80.19s/it]

🔍 Summarizing article 6 with facebook/bart-large-cnn
🔍 Summarizing article 6 with google/pegasus-xsum
🔍 Summarizing article 6 with google/pegasus-multi_news
🔍 Summarizing article 6 with google/pegasus-cnn_dailymail


Summarizing articles:   8%|▊         | 7/83 [09:29<1:39:12, 78.32s/it]

🔍 Summarizing article 7 with facebook/bart-large-cnn
🔍 Summarizing article 7 with google/pegasus-xsum
🔍 Summarizing article 7 with google/pegasus-multi_news
🔍 Summarizing article 7 with google/pegasus-cnn_dailymail


Summarizing articles:  10%|▉         | 8/83 [10:51<1:39:30, 79.61s/it]

🔍 Summarizing article 8 with facebook/bart-large-cnn
🔍 Summarizing article 8 with google/pegasus-xsum
🔍 Summarizing article 8 with google/pegasus-multi_news
🔍 Summarizing article 8 with google/pegasus-cnn_dailymail


Summarizing articles:  11%|█         | 9/83 [12:04<1:35:32, 77.47s/it]

🔍 Summarizing article 9 with facebook/bart-large-cnn
🔍 Summarizing article 9 with google/pegasus-xsum
🔍 Summarizing article 9 with google/pegasus-multi_news
🔍 Summarizing article 9 with google/pegasus-cnn_dailymail


Summarizing articles:  12%|█▏        | 10/83 [13:19<1:33:06, 76.53s/it]

🔍 Summarizing article 10 with facebook/bart-large-cnn
🔍 Summarizing article 10 with google/pegasus-xsum
🔍 Summarizing article 10 with google/pegasus-multi_news
🔍 Summarizing article 10 with google/pegasus-cnn_dailymail


Summarizing articles:  13%|█▎        | 11/83 [14:31<1:30:21, 75.29s/it]

🔍 Summarizing article 11 with facebook/bart-large-cnn
🔍 Summarizing article 11 with google/pegasus-xsum
🔍 Summarizing article 11 with google/pegasus-multi_news
🔍 Summarizing article 11 with google/pegasus-cnn_dailymail


Summarizing articles:  14%|█▍        | 12/83 [15:52<1:31:12, 77.07s/it]

🔍 Summarizing article 12 with facebook/bart-large-cnn
🔍 Summarizing article 12 with google/pegasus-xsum
🔍 Summarizing article 12 with google/pegasus-multi_news
🔍 Summarizing article 12 with google/pegasus-cnn_dailymail


Summarizing articles:  16%|█▌        | 13/83 [17:14<1:31:33, 78.48s/it]

🔍 Summarizing article 13 with facebook/bart-large-cnn
🔍 Summarizing article 13 with google/pegasus-xsum
🔍 Summarizing article 13 with google/pegasus-multi_news
🔍 Summarizing article 13 with google/pegasus-cnn_dailymail


Summarizing articles:  17%|█▋        | 14/83 [18:49<1:35:58, 83.45s/it]

🔍 Summarizing article 14 with facebook/bart-large-cnn
🔍 Summarizing article 14 with google/pegasus-xsum
🔍 Summarizing article 14 with google/pegasus-multi_news
🔍 Summarizing article 14 with google/pegasus-cnn_dailymail


Summarizing articles:  18%|█▊        | 15/83 [20:07<1:32:43, 81.82s/it]

🔍 Summarizing article 15 with facebook/bart-large-cnn
🔍 Summarizing article 15 with google/pegasus-xsum
🔍 Summarizing article 15 with google/pegasus-multi_news
🔍 Summarizing article 15 with google/pegasus-cnn_dailymail


Summarizing articles:  19%|█▉        | 16/83 [21:23<1:29:25, 80.08s/it]

🔍 Summarizing article 16 with facebook/bart-large-cnn
🔍 Summarizing article 16 with google/pegasus-xsum
🔍 Summarizing article 16 with google/pegasus-multi_news
🔍 Summarizing article 16 with google/pegasus-cnn_dailymail


Summarizing articles:  20%|██        | 17/83 [22:42<1:27:51, 79.88s/it]

🔍 Summarizing article 17 with facebook/bart-large-cnn
🔍 Summarizing article 17 with google/pegasus-xsum
🔍 Summarizing article 17 with google/pegasus-multi_news
🔍 Summarizing article 17 with google/pegasus-cnn_dailymail


Summarizing articles:  22%|██▏       | 18/83 [24:01<1:26:07, 79.51s/it]

🔍 Summarizing article 18 with facebook/bart-large-cnn
🔍 Summarizing article 18 with google/pegasus-xsum
🔍 Summarizing article 18 with google/pegasus-multi_news
🔍 Summarizing article 18 with google/pegasus-cnn_dailymail


Summarizing articles:  23%|██▎       | 19/83 [25:25<1:26:08, 80.76s/it]

🔍 Summarizing article 19 with facebook/bart-large-cnn
🔍 Summarizing article 19 with google/pegasus-xsum
🔍 Summarizing article 19 with google/pegasus-multi_news
🔍 Summarizing article 19 with google/pegasus-cnn_dailymail


Summarizing articles:  24%|██▍       | 20/83 [26:55<1:27:44, 83.56s/it]

🔍 Summarizing article 20 with facebook/bart-large-cnn
🔍 Summarizing article 20 with google/pegasus-xsum
🔍 Summarizing article 20 with google/pegasus-multi_news
🔍 Summarizing article 20 with google/pegasus-cnn_dailymail


Summarizing articles:  25%|██▌       | 21/83 [28:12<1:24:18, 81.58s/it]

🔍 Summarizing article 21 with facebook/bart-large-cnn
🔍 Summarizing article 21 with google/pegasus-xsum
🔍 Summarizing article 21 with google/pegasus-multi_news
🔍 Summarizing article 21 with google/pegasus-cnn_dailymail


Summarizing articles:  27%|██▋       | 22/83 [29:32<1:22:29, 81.14s/it]

🔍 Summarizing article 22 with facebook/bart-large-cnn
🔍 Summarizing article 22 with google/pegasus-xsum
🔍 Summarizing article 22 with google/pegasus-multi_news
🔍 Summarizing article 22 with google/pegasus-cnn_dailymail


Summarizing articles:  28%|██▊       | 23/83 [31:01<1:23:35, 83.59s/it]

🔍 Summarizing article 23 with facebook/bart-large-cnn
🔍 Summarizing article 23 with google/pegasus-xsum
🔍 Summarizing article 23 with google/pegasus-multi_news
🔍 Summarizing article 23 with google/pegasus-cnn_dailymail


Summarizing articles:  29%|██▉       | 24/83 [32:24<1:21:50, 83.22s/it]

🔍 Summarizing article 24 with facebook/bart-large-cnn
🔍 Summarizing article 24 with google/pegasus-xsum
🔍 Summarizing article 24 with google/pegasus-multi_news
🔍 Summarizing article 24 with google/pegasus-cnn_dailymail


Summarizing articles:  30%|███       | 25/83 [33:51<1:21:32, 84.36s/it]

🔍 Summarizing article 25 with facebook/bart-large-cnn
🔍 Summarizing article 25 with google/pegasus-xsum
🔍 Summarizing article 25 with google/pegasus-multi_news
🔍 Summarizing article 25 with google/pegasus-cnn_dailymail


Summarizing articles:  31%|███▏      | 26/83 [35:10<1:18:42, 82.85s/it]

🔍 Summarizing article 26 with facebook/bart-large-cnn
🔍 Summarizing article 26 with google/pegasus-xsum
🔍 Summarizing article 26 with google/pegasus-multi_news
🔍 Summarizing article 26 with google/pegasus-cnn_dailymail


Summarizing articles:  33%|███▎      | 27/83 [36:44<1:20:29, 86.25s/it]

🔍 Summarizing article 27 with facebook/bart-large-cnn
🔍 Summarizing article 27 with google/pegasus-xsum
🔍 Summarizing article 27 with google/pegasus-multi_news
🔍 Summarizing article 27 with google/pegasus-cnn_dailymail


Summarizing articles:  34%|███▎      | 28/83 [37:59<1:15:55, 82.83s/it]

🔍 Summarizing article 28 with facebook/bart-large-cnn
🔍 Summarizing article 28 with google/pegasus-xsum
🔍 Summarizing article 28 with google/pegasus-multi_news
🔍 Summarizing article 28 with google/pegasus-cnn_dailymail


Summarizing articles:  35%|███▍      | 29/83 [39:12<1:11:57, 79.96s/it]

🔍 Summarizing article 29 with facebook/bart-large-cnn
🔍 Summarizing article 29 with google/pegasus-xsum
🔍 Summarizing article 29 with google/pegasus-multi_news
🔍 Summarizing article 29 with google/pegasus-cnn_dailymail


Summarizing articles:  36%|███▌      | 30/83 [40:42<1:13:07, 82.77s/it]

🔍 Summarizing article 30 with facebook/bart-large-cnn
🔍 Summarizing article 30 with google/pegasus-xsum
🔍 Summarizing article 30 with google/pegasus-multi_news
🔍 Summarizing article 30 with google/pegasus-cnn_dailymail


Summarizing articles:  37%|███▋      | 31/83 [42:03<1:11:30, 82.51s/it]

🔍 Summarizing article 31 with facebook/bart-large-cnn
🔍 Summarizing article 31 with google/pegasus-xsum
🔍 Summarizing article 31 with google/pegasus-multi_news
🔍 Summarizing article 31 with google/pegasus-cnn_dailymail


Summarizing articles:  39%|███▊      | 32/83 [42:50<1:01:02, 71.81s/it]

🔍 Summarizing article 32 with facebook/bart-large-cnn
🔍 Summarizing article 32 with google/pegasus-xsum
🔍 Summarizing article 32 with google/pegasus-multi_news
🔍 Summarizing article 32 with google/pegasus-cnn_dailymail


Summarizing articles:  40%|███▉      | 33/83 [44:10<1:01:45, 74.12s/it]

🔍 Summarizing article 33 with facebook/bart-large-cnn
🔍 Summarizing article 33 with google/pegasus-xsum
🔍 Summarizing article 33 with google/pegasus-multi_news
🔍 Summarizing article 33 with google/pegasus-cnn_dailymail


Summarizing articles:  41%|████      | 34/83 [45:22<59:58, 73.44s/it]  

🔍 Summarizing article 34 with facebook/bart-large-cnn
🔍 Summarizing article 34 with google/pegasus-xsum
🔍 Summarizing article 34 with google/pegasus-multi_news
🔍 Summarizing article 34 with google/pegasus-cnn_dailymail


Summarizing articles:  42%|████▏     | 35/83 [46:44<1:00:53, 76.12s/it]

🔍 Summarizing article 35 with facebook/bart-large-cnn
🔍 Summarizing article 35 with google/pegasus-xsum
🔍 Summarizing article 35 with google/pegasus-multi_news
🔍 Summarizing article 35 with google/pegasus-cnn_dailymail


Summarizing articles:  43%|████▎     | 36/83 [47:59<59:20, 75.76s/it]  

🔍 Summarizing article 36 with facebook/bart-large-cnn
🔍 Summarizing article 36 with google/pegasus-xsum
🔍 Summarizing article 36 with google/pegasus-multi_news
🔍 Summarizing article 36 with google/pegasus-cnn_dailymail


Summarizing articles:  45%|████▍     | 37/83 [48:46<51:22, 67.02s/it]

🔍 Summarizing article 37 with facebook/bart-large-cnn
🔍 Summarizing article 37 with google/pegasus-xsum
🔍 Summarizing article 37 with google/pegasus-multi_news
🔍 Summarizing article 37 with google/pegasus-cnn_dailymail


Summarizing articles:  46%|████▌     | 38/83 [50:06<53:11, 70.91s/it]

🔍 Summarizing article 38 with facebook/bart-large-cnn
🔍 Summarizing article 38 with google/pegasus-xsum
🔍 Summarizing article 38 with google/pegasus-multi_news
🔍 Summarizing article 38 with google/pegasus-cnn_dailymail


Summarizing articles:  47%|████▋     | 39/83 [51:16<51:48, 70.65s/it]

🔍 Summarizing article 39 with facebook/bart-large-cnn
🔍 Summarizing article 39 with google/pegasus-xsum
🔍 Summarizing article 39 with google/pegasus-multi_news
🔍 Summarizing article 39 with google/pegasus-cnn_dailymail


Summarizing articles:  48%|████▊     | 40/83 [52:33<52:04, 72.66s/it]

🔍 Summarizing article 40 with facebook/bart-large-cnn
🔍 Summarizing article 40 with google/pegasus-xsum
🔍 Summarizing article 40 with google/pegasus-multi_news
🔍 Summarizing article 40 with google/pegasus-cnn_dailymail


Summarizing articles:  49%|████▉     | 41/83 [53:54<52:32, 75.07s/it]

🔍 Summarizing article 41 with facebook/bart-large-cnn
🔍 Summarizing article 41 with google/pegasus-xsum
🔍 Summarizing article 41 with google/pegasus-multi_news
🔍 Summarizing article 41 with google/pegasus-cnn_dailymail


Summarizing articles:  51%|█████     | 42/83 [55:09<51:25, 75.26s/it]

🔍 Summarizing article 42 with facebook/bart-large-cnn
🔍 Summarizing article 42 with google/pegasus-xsum
🔍 Summarizing article 42 with google/pegasus-multi_news
🔍 Summarizing article 42 with google/pegasus-cnn_dailymail


Summarizing articles:  52%|█████▏    | 43/83 [56:30<51:15, 76.90s/it]

🔍 Summarizing article 43 with facebook/bart-large-cnn
🔍 Summarizing article 43 with google/pegasus-xsum
🔍 Summarizing article 43 with google/pegasus-multi_news
🔍 Summarizing article 43 with google/pegasus-cnn_dailymail


Summarizing articles:  53%|█████▎    | 44/83 [57:16<44:01, 67.73s/it]

🔍 Summarizing article 44 with facebook/bart-large-cnn
🔍 Summarizing article 44 with google/pegasus-xsum
🔍 Summarizing article 44 with google/pegasus-multi_news
🔍 Summarizing article 44 with google/pegasus-cnn_dailymail


Summarizing articles:  54%|█████▍    | 45/83 [58:33<44:34, 70.39s/it]

🔍 Summarizing article 45 with facebook/bart-large-cnn
🔍 Summarizing article 45 with google/pegasus-xsum
🔍 Summarizing article 45 with google/pegasus-multi_news
🔍 Summarizing article 45 with google/pegasus-cnn_dailymail


Summarizing articles:  55%|█████▌    | 46/83 [59:53<45:13, 73.33s/it]

🔍 Summarizing article 46 with facebook/bart-large-cnn
🔍 Summarizing article 46 with google/pegasus-xsum
🔍 Summarizing article 46 with google/pegasus-multi_news
🔍 Summarizing article 46 with google/pegasus-cnn_dailymail


Summarizing articles:  57%|█████▋    | 47/83 [1:01:13<45:06, 75.18s/it]

🔍 Summarizing article 47 with facebook/bart-large-cnn


Your max_length is set to 30, but your input_length is only 27. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)


🔍 Summarizing article 47 with google/pegasus-xsum


Your max_length is set to 30, but your input_length is only 27. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)


🔍 Summarizing article 47 with google/pegasus-multi_news


Your max_length is set to 30, but your input_length is only 27. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)


🔍 Summarizing article 47 with google/pegasus-cnn_dailymail


Summarizing articles:  58%|█████▊    | 48/83 [1:01:54<37:52, 64.92s/it]

🔍 Summarizing article 48 with facebook/bart-large-cnn
🔍 Summarizing article 48 with google/pegasus-xsum
🔍 Summarizing article 48 with google/pegasus-multi_news
🔍 Summarizing article 48 with google/pegasus-cnn_dailymail


Summarizing articles:  59%|█████▉    | 49/83 [1:03:21<40:39, 71.74s/it]

🔍 Summarizing article 49 with facebook/bart-large-cnn
🔍 Summarizing article 49 with google/pegasus-xsum
🔍 Summarizing article 49 with google/pegasus-multi_news
🔍 Summarizing article 49 with google/pegasus-cnn_dailymail


Summarizing articles:  60%|██████    | 50/83 [1:04:45<41:30, 75.46s/it]

🔍 Summarizing article 50 with facebook/bart-large-cnn
🔍 Summarizing article 50 with google/pegasus-xsum
🔍 Summarizing article 50 with google/pegasus-multi_news
🔍 Summarizing article 50 with google/pegasus-cnn_dailymail


Summarizing articles:  61%|██████▏   | 51/83 [1:06:07<41:08, 77.14s/it]

🔍 Summarizing article 51 with facebook/bart-large-cnn
🔍 Summarizing article 51 with google/pegasus-xsum
🔍 Summarizing article 51 with google/pegasus-multi_news
🔍 Summarizing article 51 with google/pegasus-cnn_dailymail


Summarizing articles:  63%|██████▎   | 52/83 [1:07:38<42:06, 81.50s/it]

🔍 Summarizing article 52 with facebook/bart-large-cnn
🔍 Summarizing article 52 with google/pegasus-xsum
🔍 Summarizing article 52 with google/pegasus-multi_news
🔍 Summarizing article 52 with google/pegasus-cnn_dailymail


Summarizing articles:  64%|██████▍   | 53/83 [1:09:01<40:58, 81.95s/it]

🔍 Summarizing article 53 with facebook/bart-large-cnn
🔍 Summarizing article 53 with google/pegasus-xsum
🔍 Summarizing article 53 with google/pegasus-multi_news
🔍 Summarizing article 53 with google/pegasus-cnn_dailymail


Summarizing articles:  65%|██████▌   | 54/83 [1:09:49<34:37, 71.64s/it]

🔍 Summarizing article 54 with facebook/bart-large-cnn
🔍 Summarizing article 54 with google/pegasus-xsum
🔍 Summarizing article 54 with google/pegasus-multi_news
🔍 Summarizing article 54 with google/pegasus-cnn_dailymail


Summarizing articles:  66%|██████▋   | 55/83 [1:11:01<33:33, 71.90s/it]

🔍 Summarizing article 55 with facebook/bart-large-cnn
🔍 Summarizing article 55 with google/pegasus-xsum
🔍 Summarizing article 55 with google/pegasus-multi_news
🔍 Summarizing article 55 with google/pegasus-cnn_dailymail


Summarizing articles:  67%|██████▋   | 56/83 [1:12:16<32:46, 72.82s/it]

🔍 Summarizing article 56 with facebook/bart-large-cnn
🔍 Summarizing article 56 with google/pegasus-xsum
🔍 Summarizing article 56 with google/pegasus-multi_news
🔍 Summarizing article 56 with google/pegasus-cnn_dailymail


Summarizing articles:  69%|██████▊   | 57/83 [1:13:37<32:33, 75.14s/it]

🔍 Summarizing article 57 with facebook/bart-large-cnn
🔍 Summarizing article 57 with google/pegasus-xsum
🔍 Summarizing article 57 with google/pegasus-multi_news
🔍 Summarizing article 57 with google/pegasus-cnn_dailymail


Summarizing articles:  70%|██████▉   | 58/83 [1:15:01<32:27, 77.88s/it]

🔍 Summarizing article 58 with facebook/bart-large-cnn
🔍 Summarizing article 58 with google/pegasus-xsum
🔍 Summarizing article 58 with google/pegasus-multi_news
🔍 Summarizing article 58 with google/pegasus-cnn_dailymail


Summarizing articles:  71%|███████   | 59/83 [1:16:22<31:33, 78.90s/it]

🔍 Summarizing article 59 with facebook/bart-large-cnn
🔍 Summarizing article 59 with google/pegasus-xsum
🔍 Summarizing article 59 with google/pegasus-multi_news
🔍 Summarizing article 59 with google/pegasus-cnn_dailymail


Summarizing articles:  72%|███████▏  | 60/83 [1:17:08<26:26, 68.99s/it]

🔍 Summarizing article 60 with facebook/bart-large-cnn
🔍 Summarizing article 60 with google/pegasus-xsum
🔍 Summarizing article 60 with google/pegasus-multi_news
🔍 Summarizing article 60 with google/pegasus-cnn_dailymail


Summarizing articles:  73%|███████▎  | 61/83 [1:18:27<26:21, 71.87s/it]

🔍 Summarizing article 61 with facebook/bart-large-cnn
🔍 Summarizing article 61 with google/pegasus-xsum
🔍 Summarizing article 61 with google/pegasus-multi_news
🔍 Summarizing article 61 with google/pegasus-cnn_dailymail


Summarizing articles:  75%|███████▍  | 62/83 [1:19:55<26:54, 76.87s/it]

🔍 Summarizing article 62 with facebook/bart-large-cnn
🔍 Summarizing article 62 with google/pegasus-xsum
🔍 Summarizing article 62 with google/pegasus-multi_news
🔍 Summarizing article 62 with google/pegasus-cnn_dailymail


Summarizing articles:  76%|███████▌  | 63/83 [1:21:53<29:44, 89.22s/it]

🔍 Summarizing article 63 with facebook/bart-large-cnn
🔍 Summarizing article 63 with google/pegasus-xsum
🔍 Summarizing article 63 with google/pegasus-multi_news
🔍 Summarizing article 63 with google/pegasus-cnn_dailymail


Summarizing articles:  77%|███████▋  | 64/83 [1:23:08<26:51, 84.80s/it]

🔍 Summarizing article 64 with facebook/bart-large-cnn
🔍 Summarizing article 64 with google/pegasus-xsum
🔍 Summarizing article 64 with google/pegasus-multi_news
🔍 Summarizing article 64 with google/pegasus-cnn_dailymail


Summarizing articles:  78%|███████▊  | 65/83 [1:24:28<25:02, 83.46s/it]

🔍 Summarizing article 65 with facebook/bart-large-cnn
🔍 Summarizing article 65 with google/pegasus-xsum
🔍 Summarizing article 65 with google/pegasus-multi_news
🔍 Summarizing article 65 with google/pegasus-cnn_dailymail


Summarizing articles:  80%|███████▉  | 66/83 [1:25:13<20:22, 71.90s/it]

🔍 Summarizing article 66 with facebook/bart-large-cnn
🔍 Summarizing article 66 with google/pegasus-xsum
🔍 Summarizing article 66 with google/pegasus-multi_news
🔍 Summarizing article 66 with google/pegasus-cnn_dailymail


Summarizing articles:  81%|████████  | 67/83 [1:26:36<20:05, 75.32s/it]

🔍 Summarizing article 67 with facebook/bart-large-cnn
🔍 Summarizing article 67 with google/pegasus-xsum
🔍 Summarizing article 67 with google/pegasus-multi_news
🔍 Summarizing article 67 with google/pegasus-cnn_dailymail


Summarizing articles:  82%|████████▏ | 68/83 [1:27:50<18:41, 74.75s/it]

🔍 Summarizing article 68 with facebook/bart-large-cnn
🔍 Summarizing article 68 with google/pegasus-xsum
🔍 Summarizing article 68 with google/pegasus-multi_news
🔍 Summarizing article 68 with google/pegasus-cnn_dailymail


Summarizing articles:  83%|████████▎ | 69/83 [1:29:13<18:02, 77.29s/it]

🔍 Summarizing article 69 with facebook/bart-large-cnn
🔍 Summarizing article 69 with google/pegasus-xsum
🔍 Summarizing article 69 with google/pegasus-multi_news
🔍 Summarizing article 69 with google/pegasus-cnn_dailymail


Summarizing articles:  84%|████████▍ | 70/83 [1:30:28<16:34, 76.47s/it]

🔍 Summarizing article 70 with facebook/bart-large-cnn
🔍 Summarizing article 70 with google/pegasus-xsum
🔍 Summarizing article 70 with google/pegasus-multi_news
🔍 Summarizing article 70 with google/pegasus-cnn_dailymail


Summarizing articles:  86%|████████▌ | 71/83 [1:31:55<15:55, 79.60s/it]

🔍 Summarizing article 71 with facebook/bart-large-cnn
🔍 Summarizing article 71 with google/pegasus-xsum
🔍 Summarizing article 71 with google/pegasus-multi_news
🔍 Summarizing article 71 with google/pegasus-cnn_dailymail


Summarizing articles:  87%|████████▋ | 72/83 [1:33:09<14:18, 78.01s/it]

🔍 Summarizing article 72 with facebook/bart-large-cnn
🔍 Summarizing article 72 with google/pegasus-xsum
🔍 Summarizing article 72 with google/pegasus-multi_news
🔍 Summarizing article 72 with google/pegasus-cnn_dailymail


Summarizing articles:  88%|████████▊ | 73/83 [1:33:57<11:31, 69.19s/it]

🔍 Summarizing article 73 with facebook/bart-large-cnn
🔍 Summarizing article 73 with google/pegasus-xsum
🔍 Summarizing article 73 with google/pegasus-multi_news
🔍 Summarizing article 73 with google/pegasus-cnn_dailymail


Summarizing articles:  89%|████████▉ | 74/83 [1:35:13<10:39, 71.07s/it]

🔍 Summarizing article 74 with facebook/bart-large-cnn
🔍 Summarizing article 74 with google/pegasus-xsum
🔍 Summarizing article 74 with google/pegasus-multi_news
🔍 Summarizing article 74 with google/pegasus-cnn_dailymail


Summarizing articles:  90%|█████████ | 75/83 [1:36:28<09:38, 72.29s/it]

🔍 Summarizing article 75 with facebook/bart-large-cnn
🔍 Summarizing article 75 with google/pegasus-xsum
🔍 Summarizing article 75 with google/pegasus-multi_news
🔍 Summarizing article 75 with google/pegasus-cnn_dailymail


Summarizing articles:  92%|█████████▏| 76/83 [1:37:44<08:33, 73.36s/it]

🔍 Summarizing article 76 with facebook/bart-large-cnn
🔍 Summarizing article 76 with google/pegasus-xsum
🔍 Summarizing article 76 with google/pegasus-multi_news
🔍 Summarizing article 76 with google/pegasus-cnn_dailymail


Summarizing articles:  93%|█████████▎| 77/83 [1:39:08<07:39, 76.62s/it]

🔍 Summarizing article 77 with facebook/bart-large-cnn
🔍 Summarizing article 77 with google/pegasus-xsum
🔍 Summarizing article 77 with google/pegasus-multi_news
🔍 Summarizing article 77 with google/pegasus-cnn_dailymail


Summarizing articles:  94%|█████████▍| 78/83 [1:40:29<06:29, 77.94s/it]

🔍 Summarizing article 78 with facebook/bart-large-cnn
🔍 Summarizing article 78 with google/pegasus-xsum
🔍 Summarizing article 78 with google/pegasus-multi_news
🔍 Summarizing article 78 with google/pegasus-cnn_dailymail


Summarizing articles:  95%|█████████▌| 79/83 [1:41:51<05:16, 79.09s/it]

🔍 Summarizing article 79 with facebook/bart-large-cnn
🔍 Summarizing article 79 with google/pegasus-xsum
🔍 Summarizing article 79 with google/pegasus-multi_news
🔍 Summarizing article 79 with google/pegasus-cnn_dailymail


Summarizing articles:  96%|█████████▋| 80/83 [1:43:08<03:55, 78.37s/it]

🔍 Summarizing article 80 with facebook/bart-large-cnn
🔍 Summarizing article 80 with google/pegasus-xsum
🔍 Summarizing article 80 with google/pegasus-multi_news
🔍 Summarizing article 80 with google/pegasus-cnn_dailymail


Summarizing articles:  98%|█████████▊| 81/83 [1:44:14<02:29, 74.81s/it]

🔍 Summarizing article 81 with facebook/bart-large-cnn
🔍 Summarizing article 81 with google/pegasus-xsum
🔍 Summarizing article 81 with google/pegasus-multi_news
🔍 Summarizing article 81 with google/pegasus-cnn_dailymail


Summarizing articles:  99%|█████████▉| 82/83 [1:45:47<01:20, 80.14s/it]

🔍 Summarizing article 82 with facebook/bart-large-cnn
🔍 Summarizing article 82 with google/pegasus-xsum
🔍 Summarizing article 82 with google/pegasus-multi_news
🔍 Summarizing article 82 with google/pegasus-cnn_dailymail


Summarizing articles: 100%|██████████| 83/83 [1:47:04<00:00, 77.41s/it]


## Exporting Data

This function's main job is to take the summarized articles and save them into two separate files: one for training a machine learning model and one for testing it. These files are in JSONL format, which means each line is a JSON object representing a single article and its summary.

### JSONL vs JSON for Machine Learning Datasets
While both JSON and JSONL can store data, JSONL (JSON Lines) is often a better choice for large datasets used in machine learning.
So now the code is account for scalability.

Here's why:

1. Efficiency and Scalability:

  * JSONL: Each article and its summary are stored on a separate line. This makes it much faster and easier to process large datasets because you can read and parse each line independently.
  * JSON: The entire dataset is stored as a single, large JSON object. This can become very slow and memory-intensive to work with when dealing with many articles.

2. Ease of Streaming:

  * JSONL: Its line-by-line structure makes it ideal for streaming data. You can start processing articles immediately as they're read, without having to load the entire dataset into memory first. This is crucial for very large datasets that might not fit in memory.
  * JSON: Requires loading the entire file into memory before processing, which can be problematic for large datasets.

3. Flexibility:

  * JSONL: Each line can have a slightly different structure if needed, as long as it's valid JSON. This can be useful when dealing with data that has variations or optional fields.
  * JSON: Requires a strict, consistent structure for the entire dataset.

In [12]:
def export_articles_to_jsonl(summarized_articles, summary_key, train_file="train_dataset.jsonl", test_file="test_dataset.jsonl", train_size=50):
    """
    This function exports summarized articles to JSONL format, splitting them into training and testing
    datasets based on a specified size.

    :param summarized_articles: The `summarized_articles` parameter is a list of dictionaries where each
    dictionary represents an article with keys like "clean_content" and the specified `summary_key`
    containing the article content and its summary respectively. The function `export_articles_to_jsonl`
    takes this list of articles and exports them to
    :param summary_key: The `summary_key` parameter in the `export_articles_to_jsonl` function is used
    to specify the key in each article dictionary that contains the summary information. This key is
    used to extract the summary from the article dictionary and include it in the exported JSONL files
    :param train_file: The `train_file` parameter is the file path where the training dataset will be
    exported in JSONL format. By default, it is set to "train_dataset.jsonl", defaults to
    train_dataset.jsonl (optional)
    :param test_file: The `test_file` parameter in the `export_articles_to_jsonl` function is the file
    path where the test dataset will be exported in JSONL format. This file will contain articles from
    the `summarized_articles` list starting from the index `train_size` onwards, defaults to
    test_dataset.jsonl (optional)
    :param train_size: The `train_size` parameter in the `export_articles_to_jsonl` function specifies
    the number of articles that will be included in the training dataset. In this case, the function
    will export the first `train_size` articles to the `train_dataset.jsonl` file, and the remaining
    articles will, defaults to 50 (optional)
    """
    try:
        with open(train_file, "w", encoding="utf-8") as f:
            for article in summarized_articles[:train_size]:
                json.dump({"text": article["clean_content"], "summary": article[summary_key]}, f)
                f.write("\n")
    except Exception as e:
        print(f"Error exporting articles to {train_file}: {e}")

    try:
        with open(test_file, "w", encoding="utf-8") as f:
            for article in summarized_articles[train_size:]:
                json.dump({"text": article["clean_content"], "summary": article[summary_key]}, f)
                f.write("\n")
    except Exception as e:
        print(f"Error exporting articles to {test_file}: {e}")


In [13]:
export_articles_to_jsonl(summarized_articles, "summary_facebook/bart-large-cnn", train_file="train_bart-large-cnn.jsonl", test_file="test_bart-large-cnn.jsonl")
export_articles_to_jsonl(summarized_articles, "summary_google/pegasus-xsum", train_file="train_pegasus-xsum.jsonl", test_file="test_pegasus-xsum.jsonl")
export_articles_to_jsonl(summarized_articles, "summary_google/pegasus-multi_news", train_file="train_pegasus-multi_news.jsonl", test_file="test_pegasus-multi_news.jsonl")
export_articles_to_jsonl(summarized_articles, "summary_google/pegasus-cnn_dailymail", train_file="train_pegasus-cnn_dailymail.jsonl", test_file="test_pegasus-cnn_dailymail.jsonl")

In [14]:
# Export articles to JSON files for training (fine-tune) process
try:
    with open("train_dataset.jsonl", "w", encoding="utf-8") as f:
        for article in summarized_articles[0:50]:
            json.dump({"text": article["clean_content"], "summary": article["summary_facebook/bart-large-cnn"]}, f)
            f.write("\n")
except Exception as e:
    print(f"Error exporting articles to JSONL: {e}")

try:
    with open("test_dataset.jsonl", "w", encoding="utf-8") as f:
        for article in summarized_articles[50:]:
            json.dump({"text": article["clean_content"], "summary": article["summary_facebook/bart-large-cnn"]}, f)
            f.write("\n")
except Exception as e:
    print(f"Error exporting articles to JSONL: {e}")

In [15]:
# Export articles to JSON files for prediction process
try:
    with open("train_summarized_articles.jsonl", "w") as json_file:
        json.dump(summarized_articles[0:50], json_file, indent=4)
    print("Articles exported to train_summarized_articles.jsonl")
except Exception as e:
    print(f"Error exporting articles to JSON: {e}")

try:
    with open("test_summarized_articles.jsonl", "w") as json_file:
        json.dump(summarized_articles[50:], json_file, indent=4)
    print("Articles exported to test_summarized_articles.jsonl")
except Exception as e:
    print(f"Error exporting articles to JSON: {e}")

Articles exported to train_summarized_articles.jsonl
Articles exported to test_summarized_articles.jsonl
