##Importing libraries

In [7]:
import pandas as pd
import re
import nltk

# If stopwords and wordnet are not downloaded, use this
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('punkt_tab')

from textblob import TextBlob
# from spellchecker import SpellChecker  # for spelling correction
from nltk.corpus import stopwords, wordnet
from nltk import pos_tag
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem import WordNetLemmatizer

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


##Loading dataset in df(dataframe)

In [8]:
# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/Infosys Springboard/bbc_news_articles.csv')
df.head()

Unnamed: 0,text,summary
0,News Corp eyes video games market\n\nNews Corp...,"According to the Financial Times, chief operat..."
1,Khodorkovsky quits Yukos shares\n\nJailed tyco...,Mr Khodorkovsky handed over his stake after th...
2,Huge rush for Jet Airways shares\n\nIndian air...,Indian airline Jet Airways' initial public off...
3,Mild winter drives US oil down 6%\n\nUS oil pr...,"US oil prices have fallen by 6%, driven down b..."
4,"Argentina, Venezuela in oil deal\n\nArgentina ...",Argentine President Nestor Kirchner and Venezu...


##Dropping rows with missing text or title and Convert text to lowercase (Creating new column: "processed_text") Performing all operations on new column

In [None]:
# Drop rows with missing 'text' or 'title'
df.dropna(subset=['text', 'summary'], inplace=True)

# Convert text to lowercase
df['processed_text'] = df['text'].str.lower()
df.head()

Unnamed: 0,text,summary,processed_text
0,News Corp eyes video games market\n\nNews Corp...,"According to the Financial Times, chief operat...",news corp eyes video games market\n\nnews corp...
1,Khodorkovsky quits Yukos shares\n\nJailed tyco...,Mr Khodorkovsky handed over his stake after th...,khodorkovsky quits yukos shares\n\njailed tyco...
2,Huge rush for Jet Airways shares\n\nIndian air...,Indian airline Jet Airways' initial public off...,huge rush for jet airways shares\n\nindian air...
3,Mild winter drives US oil down 6%\n\nUS oil pr...,"US oil prices have fallen by 6%, driven down b...",mild winter drives us oil down 6%\n\nus oil pr...
4,"Argentina, Venezuela in oil deal\n\nArgentina ...",Argentine President Nestor Kirchner and Venezu...,"argentina, venezuela in oil deal\n\nargentina ..."


##Substitution of contractions

In [None]:
contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not",
                           "didn't": "did not",  "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not",
                           "he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",
                           "I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would",
                           "i'd've": "i would have", "i'll": "i will",  "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",
                           "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam",
                           "mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have", "must've": "must have",
                           "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock",
                           "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have",
                           "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is",
                           "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",
                           "this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would",
                           "there'd've": "there would have", "there's": "there is", "here's": "here is","they'd": "they would", "they'd've": "they would have",
                           "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have",
                           "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are",
                           "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are",
                           "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is",
                           "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have",
                           "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have",
                           "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all",
                           "y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have",
                           "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have",
                           "you're": "you are", "you've": "you have"}

In [None]:
# Expand contractions
df['processed_text'] = df['processed_text'].replace(contraction_mapping, regex=True)
df.head()

Unnamed: 0,text,summary,processed_text
0,News Corp eyes video games market\n\nNews Corp...,"According to the Financial Times, chief operat...",news corp eyes video games market\n\nnews corp...
1,Khodorkovsky quits Yukos shares\n\nJailed tyco...,Mr Khodorkovsky handed over his stake after th...,khodorkovsky quits yukos shares\n\njailed tyco...
2,Huge rush for Jet Airways shares\n\nIndian air...,Indian airline Jet Airways' initial public off...,huge rush for jet airways shares\n\nindian air...
3,Mild winter drives US oil down 6%\n\nUS oil pr...,"US oil prices have fallen by 6%, driven down b...",mild winter drives us oil down 6%\n\nus oil pr...
4,"Argentina, Venezuela in oil deal\n\nArgentina ...",Argentine President Nestor Kirchner and Venezu...,"argentina, venezuela in oil deal\n\nargentina ..."


## Removing HTML tags, extra spaces, punctuation, special characters, URL, Emails

In [None]:
# Remove HTML tags and extra spaces, Remove special characters and punctuation
df['processed_text'] = df['processed_text'].apply(lambda x: re.sub(r'<.*?>', '', x))  # Remove HTML tags if any
df['processed_text'] = df['processed_text'].apply(lambda x: re.sub(r'\s+', ' ', x).strip())  # Remove extra whitespace
df['processed_text'] = df['processed_text'].apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', x))
df['processed_text'] = df['processed_text'].apply(lambda x: re.sub(r'[^\w\s]', '', x))
df['processed_text'] = df['processed_text'].apply(lambda x: x.replace('\n', ' ').strip())  # Remove \n and strip whitespace
df['processed_text'] = df['processed_text'].apply(lambda x: re.sub(r'http\S+|www\S+|@\S+|\S+@\S+', '', x)) # Remove URLs and emails
df.head()

Unnamed: 0,text,summary,processed_text
0,News Corp eyes video games market\n\nNews Corp...,"According to the Financial Times, chief operat...",news corp eyes video games market news corp th...
1,Khodorkovsky quits Yukos shares\n\nJailed tyco...,Mr Khodorkovsky handed over his stake after th...,khodorkovsky quits yukos shares jailed tycoon ...
2,Huge rush for Jet Airways shares\n\nIndian air...,Indian airline Jet Airways' initial public off...,huge rush for jet airways shares indian airlin...
3,Mild winter drives US oil down 6%\n\nUS oil pr...,"US oil prices have fallen by 6%, driven down b...",mild winter drives us oil down 6 us oil prices...
4,"Argentina, Venezuela in oil deal\n\nArgentina ...",Argentine President Nestor Kirchner and Venezu...,argentina venezuela in oil deal argentina and ...


## Initializing lemmatizer and stopwords and spellchecker

In [None]:
# Initialize lemmatizer and stopwords and spellchecker
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
spell = SpellChecker()

##Tokenization by two methods
###1. Splitting text into sentences and word tokenization within each sentence
Creating new column "word_tokens"

In [None]:
# Step 3a: Sentence Tokenization (splitting text into sentences)
df['sentences'] = df['processed_text'].apply(sent_tokenize)

# Step 3b: Word Tokenization within each sentence
df['word_tokens'] = df['sentences'].apply(lambda sentences: [word_tokenize(sentence) for sentence in sentences])
# df['word_tokens'].head()

###2. By using python inbuilt function: .split()
performing tokenization on "processed_text" and checking difference

In [None]:
# Tokenize the text (split into words)
df['processed_text'] = df['processed_text'].apply(lambda x: x.split())
df[['processed_text','word_tokens']].head()

Unnamed: 0,processed_text,word_tokens
0,"[news, corp, eyes, video, games, market, news,...","[[news, corp, eyes, video, games, market, news..."
1,"[khodorkovsky, quits, yukos, shares, jailed, t...","[[khodorkovsky, quits, yukos, shares, jailed, ..."
2,"[huge, rush, for, jet, airways, shares, indian...","[[huge, rush, for, jet, airways, shares, india..."
3,"[mild, winter, drives, us, oil, down, 6, us, o...","[[mild, winter, drives, us, oil, down, 6, us, ..."
4,"[argentina, venezuela, in, oil, deal, argentin...","[[argentina, venezuela, in, oil, deal, argenti..."


## Removing Stopwords

In [None]:
# Remove stopwords
df['processed_text'] = df['processed_text'].apply(lambda x: [word for word in x if word not in stop_words])
df['processed_text'].head()

Unnamed: 0,processed_text
0,"[news, corp, eyes, video, games, market, news,..."
1,"[khodorkovsky, quits, yukos, shares, jailed, t..."
2,"[huge, rush, jet, airways, shares, indian, air..."
3,"[mild, winter, drives, us, oil, 6, us, oil, pr..."
4,"[argentina, venezuela, oil, deal, argentina, v..."


In [None]:
!pip install pyspellchecker

Collecting pyspellchecker
  Downloading pyspellchecker-0.8.2-py3-none-any.whl.metadata (9.4 kB)
Downloading pyspellchecker-0.8.2-py3-none-any.whl (7.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyspellchecker
Successfully installed pyspellchecker-0.8.2


## Spelling Correction

In [None]:
# Correct misspellings (identify and correct each word)
# df['processed_text'] = df['processed_text'].apply(lambda x: [spell.correction(word) if word in spell.unknown([word]) else word for word in x])
# df['processed_text'].head()

In [None]:
# Spelling Correction with TextBlob directly on word tokens
# df['corrected_tokens'] = df['word_tokens'].apply(lambda sentences: [[str(TextBlob(word).correct()) for word in sentence] for sentence in sentences])

In [None]:
# # Spelling Correction using TextBlob on tokenized text after stopwords are removed
# df['processed_text'] = df['processed_text'].apply(lambda x: [str(TextBlob(word).correct()) for word in x])
# df['processed_text'].head()

##Lemmatization by two methods
###1. POS tagging and lemmatization
creating new column for POS tagging and lemmatizing: "lemmatized_tokens"

In [None]:
nltk.download('averaged_perceptron_tagger_eng')
def get_wordnet_pos(tag):
    tag_map = {
        'J': wordnet.ADJ,     # Adjective
        'V': wordnet.VERB,    # Verb
        'N': wordnet.NOUN,    # Noun
        'R': wordnet.ADV      # Adverb
    }
    return tag_map.get(tag[0], wordnet.NOUN)  # Default to NOUN if not found

# POS Tagging and Lemmatization
df['lemmatized_tokens'] = df['processed_text'].apply(lambda x: [lemmatizer.lemmatize(word, get_wordnet_pos(tag)) for word, tag in pos_tag(x)])

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


###2. Direct applying lemmatizer
applying on "processed_text" and checking difference

In [None]:
# Lemmatize words (convert to their base form)
df['processed_text'] = df['processed_text'].apply(lambda x: [lemmatizer.lemmatize(word) for word in x])

In [None]:
df[['processed_text','lemmatized_tokens']].head(15)

Unnamed: 0,processed_text,lemmatized_tokens
0,"[news, corp, eye, video, game, market, news, c...","[news, corp, eye, video, game, market, news, c..."
1,"[khodorkovsky, quits, yukos, share, jailed, ty...","[khodorkovsky, quits, yukos, share, jail, tyco..."
2,"[huge, rush, jet, airway, share, indian, airli...","[huge, rush, jet, airways, share, indian, airl..."
3,"[mild, winter, drive, u, oil, 6, u, oil, price...","[mild, winter, drive, u, oil, 6, u, oil, price..."
4,"[argentina, venezuela, oil, deal, argentina, v...","[argentina, venezuela, oil, deal, argentina, v..."
5,"[u, consumer, confidence, consumer, confidence...","[u, consumer, confidence, consumer, confidence..."
6,"[tobacco, giant, hail, court, ruling, u, tobac...","[tobacco, giant, hail, court, rule, u, tobacco..."
7,"[venezuela, identifies, idle, farm, venezuelan...","[venezuela, identifies, idle, farm, venezuelan..."
8,"[korean, credit, card, firm, rescued, south, k...","[korean, credit, card, firm, rescue, south, ko..."
9,"[bush, outline, toughest, budget, president, b...","[bush, outline, tough, budget, president, bush..."


##Joining tokens back to single string

In [None]:
# Join the tokens back into a single string
df['processed_text'] = df['processed_text'].apply(lambda x: ' '.join(x))

In [None]:
# Join the tokens back into a single string
df['lemmatized_tokens'] = df['lemmatized_tokens'].apply(lambda x: ' '.join(x))
df[['lemmatized_tokens','processed_text']].head()

Unnamed: 0,lemmatized_tokens,processed_text
0,news corp eye video game market news corp medi...,news corp eye video game market news corp medi...
1,khodorkovsky quits yukos share jail tycoon mik...,khodorkovsky quits yukos share jailed tycoon m...
2,huge rush jet airways share indian airline jet...,huge rush jet airway share indian airline jet ...
3,mild winter drive u oil 6 u oil price fall 6 d...,mild winter drive u oil 6 u oil price fallen 6...
4,argentina venezuela oil deal argentina venezue...,argentina venezuela oil deal argentina venezue...


##POS Tagging

In [None]:
import spacy
# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Function to perform POS tagging
def pos_tagging(text):
    doc = nlp(text)
    return [(token.text, token.pos_) for token in doc]

# Apply POS tagging to the 'text' column
df['pos_tags'] = df['text'].apply(pos_tagging)

In [None]:
df.head()

Unnamed: 0,text,summary,processed_text,sentences,word_tokens,lemmatized_tokens,pos_tags
0,News Corp eyes video games market\n\nNews Corp...,"According to the Financial Times, chief operat...",news corp eye video game market news corp medi...,[news corp eyes video games market news corp t...,"[[news, corp, eyes, video, games, market, news...",news corp eye video game market news corp medi...,"[(News, PROPN), (Corp, PROPN), (eyes, NOUN), (..."
1,Khodorkovsky quits Yukos shares\n\nJailed tyco...,Mr Khodorkovsky handed over his stake after th...,khodorkovsky quits yukos share jailed tycoon m...,[khodorkovsky quits yukos shares jailed tycoon...,"[[khodorkovsky, quits, yukos, shares, jailed, ...",khodorkovsky quits yukos share jail tycoon mik...,"[(Khodorkovsky, PROPN), (quits, VERB), (Yukos,..."
2,Huge rush for Jet Airways shares\n\nIndian air...,Indian airline Jet Airways' initial public off...,huge rush jet airway share indian airline jet ...,[huge rush for jet airways shares indian airli...,"[[huge, rush, for, jet, airways, shares, india...",huge rush jet airways share indian airline jet...,"[(Huge, ADJ), (rush, NOUN), (for, ADP), (Jet, ..."
3,Mild winter drives US oil down 6%\n\nUS oil pr...,"US oil prices have fallen by 6%, driven down b...",mild winter drive u oil 6 u oil price fallen 6...,[mild winter drives us oil down 6 us oil price...,"[[mild, winter, drives, us, oil, down, 6, us, ...",mild winter drive u oil 6 u oil price fall 6 d...,"[(Mild, ADJ), (winter, NOUN), (drives, VERB), ..."
4,"Argentina, Venezuela in oil deal\n\nArgentina ...",Argentine President Nestor Kirchner and Venezu...,argentina venezuela oil deal argentina venezue...,[argentina venezuela in oil deal argentina and...,"[[argentina, venezuela, in, oil, deal, argenti...",argentina venezuela oil deal argentina venezue...,"[(Argentina, PROPN), (,, PUNCT), (Venezuela, P..."


In [3]:
!pip install transformers datasets torch

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [9]:
from datasets import Dataset
# Convert the DataFrame to a Hugging Face Dataset
dataset = Dataset.from_pandas(df)

In [12]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
# Load the tokenizer and model
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [13]:
# Preprocess the dataset for the model
def preprocess_function(examples):
    inputs = ["summarize: " + doc for doc in examples["text"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True)

    # Setup the tokenizer for targets
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["summary"], max_length=150, truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/2225 [00:00<?, ? examples/s]



In [14]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=3,
)



In [15]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
)
# trainer.train()

In [16]:
# Save the model
model.save_pretrained("t5-small")
tokenizer.save_pretrained("t5-small")

print("Model fine-tuning completed and saved.")

Model fine-tuning completed and saved.


##Abstractive Summary

In [3]:
from transformers import BartTokenizer, BartForConditionalGeneration
# Load pre-trained BART model and tokenizer
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

def summarize_text(input_text):
    # Tokenize and summarize the input text using BART
    inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs, max_length=100, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)

    # Decode and return the summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

##Extractive Summary

In [18]:
!pip install bert-extractive-summarizer

Collecting bert-extractive-summarizer
  Downloading bert_extractive_summarizer-0.10.1-py3-none-any.whl.metadata (15 kB)
Downloading bert_extractive_summarizer-0.10.1-py3-none-any.whl (25 kB)
Installing collected packages: bert-extractive-summarizer
Successfully installed bert-extractive-summarizer-0.10.1


In [2]:
from summarizer import Summarizer
summarizer = Summarizer()
def generate_summary(input_text):
    summary = summarizer(input_text)
    return summary

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [20]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.9.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

##Gradio Interface

In [4]:
import gradio as gr
# Define a function to select the summarization method
def summarize(input_text, method):
    if method == "Extractive":
        return generate_summary(input_text)
    else:
        return summarize_text(input_text)

# Create Gradio interface
iface = gr.Interface(
    fn=summarize,
    inputs=[
        gr.Textbox(lines=10, label="Enter text to summarize"),
        gr.Dropdown(choices=["Extractive", "Abstractive"], label="Summarization Method")
    ],
    outputs=gr.Textbox(label="Summary"),
    title="Text Summarizer",
    description="Enter text to get the summary and Select summarization method.",
    allow_flagging="never"
)

iface.launch()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ac02232892eea63009.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


