In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [14]:
!pip install gradio transformers

Collecting gradio
  Downloading gradio-4.37.2-py3-none-any.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.0.2 (from gradio)
  Downloading gradio_client-1.0.2-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.2/318.2 kB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

In [15]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

model_path = '/content/drive/My Drive/fine_tuning'
model = T5ForConditionalGeneration.from_pretrained(model_path)
tokenizer = T5Tokenizer.from_pretrained(model_path)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [16]:
def summarize_text(text):
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [18]:
import gradio as gr

interface = gr.Interface(
    fn=summarize_text,
    inputs=gr.Textbox(lines=10, placeholder="Enter Text Here...", label="Input Text"),
    outputs=gr.Textbox(label="Summary"),
    title='Text Summarizer with T5 Model',
    description='Enter your text in the box below and get a concise summary.',
    article="<p style='text-align: center;'>Powered by your fine-tuned T5 Model</p>",
    css='''
    .input_textbox {
        border: 2px solid #007bff;
        border-radius: 5px;
        padding: 10px;
    }
    .output_textbox {
        border: 2px solid #28a745;
        border-radius: 5px;
        padding: 10px;
    }
    '''
)

In [19]:
interface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://998c6f5be9aa8be13b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [21]:
# @title Import librabries
# Mount Google Drive to access files
from google.colab import drive
drive.mount('/content/drive')

# Install necessary packages: gradio for UI, transformers for model handling, and networkx for extractive summarization
!pip install gradio transformers networkx

# Import T5 model and tokenizer from transformers library
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Import Gradio for creating a web-based interface
import gradio as gr

# Import NetworkX for implementing TextRank algorithm
import networkx as nx

# Import CountVectorizer and cosine_similarity from sklearn for text processing and similarity calculations
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [22]:
# Load the fine-tuned T5 model for abstractive summarization from Google Drive
model_path = '/content/drive/My Drive/fine_tuning'
abstractive_model = T5ForConditionalGeneration.from_pretrained(model_path)
abstractive_tokenizer = T5Tokenizer.from_pretrained(model_path)

# Function for abstractive summarization using the T5 model
def abstractive_summarize(text):
    # Encode input text and prepend "summarize: " as T5 expects task-specific prefix
    inputs = abstractive_tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
    # Generate summary using the T5 model
    summary_ids = abstractive_model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
    # Decode the generated summary and return it
    return abstractive_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Function for extractive summarization using the TextRank algorithm
def extractive_summarize(text):
    # Split the input text into sentences
    sentences = text.split(".")
    # Convert sentences to a matrix of token counts
    vectorizer = CountVectorizer().fit_transform(sentences)
    # Convert the matrix to an array
    vectors = vectorizer.toarray()
    # Compute the cosine similarity matrix between sentences
    similarity_matrix = cosine_similarity(vectors)
    # Apply the PageRank algorithm to the similarity matrix
    scores = nx.pagerank(nx.from_numpy_array(similarity_matrix))
    # Rank sentences based on PageRank scores
    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
    # Select the top-ranked sentences to form the summary
    summary = " ".join([s[1] for s in ranked_sentences[:3]])
    return summary


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [33]:
# Function to determine summarization type based on user input
def summarize_text(text, summarization_type):
    # If user selects "Abstractive", use the abstractive_summarize function
    if summarization_type == "Abstractive":
        return abstractive_summarize(text)
    # If user selects "Extractive", use the extractive_summarize function
    elif summarization_type == "Extractive":
        return extractive_summarize(text)

# Define Gradio interface with enhanced styling and graphics
interface = gr.Interface(
    fn=summarize_text,  # Function to be called when the user submits input
    inputs=[
        # Input textbox for the user to enter text, with specific styling
        gr.Textbox(lines=10, placeholder="Enter Text Here..."),
        # Dropdown for the user to select summarization type, with specific styling
        gr.Dropdown(['Abstractive', 'Extractive'], label="Summarization Type")],
    outputs=gr.Textbox(label="Summary"),  # Output textbox for displaying the summary
    title=' TEXT SUMMIFY ',  # Title of the interface
    description='Enter your text in the box below and select the type of summarization (Abstractive or Extractive) to get a summary.',  # Description for the interface
    article="<p style='text-align: center; font-size: 18px;'>Powered by your fine-tuned T5 Model and TextRank for Extractive Summarization</p>",  # Additional information displayed below the interface
    theme="compact",  # Theme of the interface
    css='''
    body {
        font-family: Arial, sans-serif;
        background-color: #f8f9fa;
        margin: 0;
        padding: 0;
    }
    .input_textbox {
        margin-bottom: 10px;
    }
    .output_textbox {
        margin-top: 10px;
    }
    ''',  # Custom CSS for styling the interface
    examples=[
        # Example inputs to demonstrate the functionality
        ['Artificial intelligence (AI) is revolutionizing industries across the globe. From healthcare to finance, AI-powered systems are enhancing efficiency and decision-making processes. Machine learning algorithms, a subset of AI, enable computers to learn from data and make predictions or decisions without explicit programming. Natural Language Processing (NLP), another branch of AI, focuses on enabling computers to understand, interpret, and generate human language. These advancements in AI are paving the way for smarter automation and personalized user experiences.', 'Abstractive'],
        ['The Earth is the third planet from the Sun and the only astronomical object known to harbor and support life. About 29.2% of Earth\'s surface is land consisting of continents and islands. The remaining 70.8% is covered with water, mostly by oceans, seas, gulfs, and other salt-water bodies, but also by lakes, rivers, and other freshwater, which together constitute the hydrosphere. Earth\'s atmosphere consists mostly of nitrogen and oxygen.', 'Extractive']
    ]
)



Sorry, we can't find the page you are looking for.


In [34]:
# @title lauch the interface

interface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://44d8b296abb6dd12f0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


