In [1]:
text="""Karnataka, a state in southern India, is renowned for its rich cultural heritage, diverse landscapes, and economic prowess. Its capital, Bengaluru, is a global tech hub, often dubbed the "Silicon Valley of India." Karnataka boasts historical landmarks like Hampi and Mysore Palace, lush Western Ghats, and stunning beaches along the Arabian Sea. The state's vibrant festivals, classical music, and dance forms, including Carnatic music and Bharatanatyam, reflect its deep-rooted traditions. Additionally, Karnataka is a major producer of coffee and silk, contributing significantly to India's economy and cultural mosaic."""

In [2]:
len(text)

621

In [3]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

In [4]:
nlp=spacy.load('en_core_web_sm')

In [5]:
doc = nlp(text)

In [6]:
tokens=[token.text.lower() for token in doc
        if not token.is_stop and
        not token.is_punct and
        token.text!='\n']

In [7]:
tokens

['karnataka',
 'state',
 'southern',
 'india',
 'renowned',
 'rich',
 'cultural',
 'heritage',
 'diverse',
 'landscapes',
 'economic',
 'prowess',
 'capital',
 'bengaluru',
 'global',
 'tech',
 'hub',
 'dubbed',
 'silicon',
 'valley',
 'india',
 'karnataka',
 'boasts',
 'historical',
 'landmarks',
 'like',
 'hampi',
 'mysore',
 'palace',
 'lush',
 'western',
 'ghats',
 'stunning',
 'beaches',
 'arabian',
 'sea',
 'state',
 'vibrant',
 'festivals',
 'classical',
 'music',
 'dance',
 'forms',
 'including',
 'carnatic',
 'music',
 'bharatanatyam',
 'reflect',
 'deep',
 'rooted',
 'traditions',
 'additionally',
 'karnataka',
 'major',
 'producer',
 'coffee',
 'silk',
 'contributing',
 'significantly',
 'india',
 'economy',
 'cultural',
 'mosaic']

In [8]:
tokens1=[]
stopwords = list(STOP_WORDS)
allowed_pos=['ADJ','PROPN','VERB','NOUN']
for token in doc:
  if token.text in stopwords or token.text in punctuation:
     continue
  if token.pos_ in allowed_pos:
     tokens1.append(token.text)

In [9]:
tokens1

['Karnataka',
 'state',
 'southern',
 'India',
 'renowned',
 'rich',
 'cultural',
 'heritage',
 'diverse',
 'landscapes',
 'economic',
 'prowess',
 'capital',
 'Bengaluru',
 'global',
 'tech',
 'hub',
 'dubbed',
 'Silicon',
 'Valley',
 'India',
 'Karnataka',
 'boasts',
 'historical',
 'landmarks',
 'Hampi',
 'Mysore',
 'Palace',
 'lush',
 'Western',
 'Ghats',
 'stunning',
 'beaches',
 'Arabian',
 'Sea',
 'state',
 'vibrant',
 'festivals',
 'classical',
 'music',
 'dance',
 'forms',
 'including',
 'Carnatic',
 'music',
 'Bharatanatyam',
 'reflect',
 'deep',
 'rooted',
 'traditions',
 'Karnataka',
 'major',
 'producer',
 'coffee',
 'silk',
 'contributing',
 'India',
 'economy',
 'cultural',
 'mosaic']

In [10]:
from collections import Counter

In [11]:
words_freq = Counter(tokens)

In [12]:
words_freq


Counter({'karnataka': 3,
         'state': 2,
         'southern': 1,
         'india': 3,
         'renowned': 1,
         'rich': 1,
         'cultural': 2,
         'heritage': 1,
         'diverse': 1,
         'landscapes': 1,
         'economic': 1,
         'prowess': 1,
         'capital': 1,
         'bengaluru': 1,
         'global': 1,
         'tech': 1,
         'hub': 1,
         'dubbed': 1,
         'silicon': 1,
         'valley': 1,
         'boasts': 1,
         'historical': 1,
         'landmarks': 1,
         'like': 1,
         'hampi': 1,
         'mysore': 1,
         'palace': 1,
         'lush': 1,
         'western': 1,
         'ghats': 1,
         'stunning': 1,
         'beaches': 1,
         'arabian': 1,
         'sea': 1,
         'vibrant': 1,
         'festivals': 1,
         'classical': 1,
         'music': 2,
         'dance': 1,
         'forms': 1,
         'including': 1,
         'carnatic': 1,
         'bharatanatyam': 1,
         'reflect': 

In [13]:
max_freq = max(words_freq.values())

In [14]:
max_freq

3

In [15]:
for word in words_freq.keys():
  words_freq[word]=words_freq[word]/max_freq


In [16]:
words_freq

Counter({'karnataka': 1.0,
         'state': 0.6666666666666666,
         'southern': 0.3333333333333333,
         'india': 1.0,
         'renowned': 0.3333333333333333,
         'rich': 0.3333333333333333,
         'cultural': 0.6666666666666666,
         'heritage': 0.3333333333333333,
         'diverse': 0.3333333333333333,
         'landscapes': 0.3333333333333333,
         'economic': 0.3333333333333333,
         'prowess': 0.3333333333333333,
         'capital': 0.3333333333333333,
         'bengaluru': 0.3333333333333333,
         'global': 0.3333333333333333,
         'tech': 0.3333333333333333,
         'hub': 0.3333333333333333,
         'dubbed': 0.3333333333333333,
         'silicon': 0.3333333333333333,
         'valley': 0.3333333333333333,
         'boasts': 0.3333333333333333,
         'historical': 0.3333333333333333,
         'landmarks': 0.3333333333333333,
         'like': 0.3333333333333333,
         'hampi': 0.3333333333333333,
         'mysore': 0.333333333333333

In [17]:
sent_token = [sent.text for sent in doc.sents]

In [18]:
sent_token

['Karnataka, a state in southern India, is renowned for its rich cultural heritage, diverse landscapes, and economic prowess.',
 'Its capital, Bengaluru, is a global tech hub, often dubbed the "Silicon Valley of India."',
 'Karnataka boasts historical landmarks like Hampi and Mysore Palace, lush Western Ghats, and stunning beaches along the Arabian Sea.',
 "The state's vibrant festivals, classical music, and dance forms, including Carnatic music and Bharatanatyam, reflect its deep-rooted traditions.",
 "Additionally, Karnataka is a major producer of coffee and silk, contributing significantly to India's economy and cultural mosaic."]

In [19]:
sent_score = {}
for sent in sent_token:
  for word in sent.split():
    if word.lower() in words_freq.keys():
      if sent not in sent_score.keys():
        sent_score[sent] = words_freq[word]
      else:
        sent_score[sent] += words_freq[word]
      print(word)


state
southern
renowned
rich
cultural
diverse
economic
global
tech
dubbed
Valley
Karnataka
boasts
historical
landmarks
like
Hampi
Mysore
lush
Western
stunning
beaches
Arabian
vibrant
classical
dance
including
Carnatic
music
reflect
Karnataka
major
producer
coffee
contributing
significantly
economy
cultural


In [20]:
sent_score

{'Karnataka, a state in southern India, is renowned for its rich cultural heritage, diverse landscapes, and economic prowess.': 3.0,
 'Its capital, Bengaluru, is a global tech hub, often dubbed the "Silicon Valley of India."': 1.0,
 'Karnataka boasts historical landmarks like Hampi and Mysore Palace, lush Western Ghats, and stunning beaches along the Arabian Sea.': 2.333333333333333,
 "The state's vibrant festivals, classical music, and dance forms, including Carnatic music and Bharatanatyam, reflect its deep-rooted traditions.": 2.3333333333333335,
 "Additionally, Karnataka is a major producer of coffee and silk, contributing significantly to India's economy and cultural mosaic.": 2.6666666666666665}

In [21]:
import pandas as pd

In [22]:
pd.DataFrame(list(sent_score.items()),columns = ['Sentence','Score'])

Unnamed: 0,Sentence,Score
0,"Karnataka, a state in southern India, is renow...",3.0
1,"Its capital, Bengaluru, is a global tech hub, ...",1.0
2,Karnataka boasts historical landmarks like Ham...,2.333333
3,"The state's vibrant festivals, classical music...",2.333333
4,"Additionally, Karnataka is a major producer of...",2.666667


In [23]:
from heapq import nlargest

In [24]:
num_sentences = 3
n = nlargest(num_sentences, sent_score, key=sent_score.get)

In [25]:
"".join(n)

"Karnataka, a state in southern India, is renowned for its rich cultural heritage, diverse landscapes, and economic prowess.Additionally, Karnataka is a major producer of coffee and silk, contributing significantly to India's economy and cultural mosaic.The state's vibrant festivals, classical music, and dance forms, including Carnatic music and Bharatanatyam, reflect its deep-rooted traditions."

In [26]:
from transformers import pipeline

In [27]:
summarizer = pipeline("summarization",model='t5-base',tokenizer='t5-base',framework='pt')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

In [28]:
text = "Karnataka, situated in southwest India, is celebrated for its cultural richness, diverse geography, and technological prowess. Bengaluru, its capital, is a global IT hub, earning the moniker Silicon Valley of India. The state boasts a rich historical legacy with dynasties like the Chalukyas, Hoysalas, and Vijayanagara Empire leaving indelible marks in architecture, exemplified by Hampi's UNESCO World Heritage site. Karnataka's landscapes range from the verdant Western Ghats and wildlife sanctuaries to its picturesque coastline along the Arabian Sea. It is renowned for its vibrant traditions, delectable cuisine like Bisi Bele Bath, and contributions to classical arts like Carnatic music and Bharatanatyam."

In [29]:
summary=summarizer( text, max_length = 100, min_length=10,do_sample = False)

In [30]:
summary

[{'summary_text': 'Bengaluru is a global IT hub, earning the moniker Silicon Valley of India . the state boasts a rich historical legacy with dynasties like the Chalukyas and Hoysalas . it is renowned for its vibrant traditions, delectable cuisine and contributions to classical arts .'}]

In [31]:
print(summary[0]['summary_text'])

Bengaluru is a global IT hub, earning the moniker Silicon Valley of India . the state boasts a rich historical legacy with dynasties like the Chalukyas and Hoysalas . it is renowned for its vibrant traditions, delectable cuisine and contributions to classical arts .


In [32]:
!apt-get install -y xvfb x11-utils


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libfontenc1 libxfont2 libxkbfile1 libxtst6 libxxf86dga1 x11-xkb-utils xfonts-base
  xfonts-encodings xfonts-utils xserver-common
Suggested packages:
  mesa-utils
The following NEW packages will be installed:
  libfontenc1 libxfont2 libxkbfile1 libxtst6 libxxf86dga1 x11-utils x11-xkb-utils xfonts-base
  xfonts-encodings xfonts-utils xserver-common xvfb
0 upgraded, 12 newly installed, 0 to remove and 45 not upgraded.
Need to get 8,045 kB of archives.
After this operation, 12.8 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libfontenc1 amd64 1:1.1.4-1build3 [14.7 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxfont2 amd64 1:2.0.5-1build1 [94.5 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxkbfile1 amd64 1:1.1.0-1build3 [71.8 kB]
Get:4 http://archive.ubuntu.

In [None]:
# Install required packages
!pip install transformers pyvirtualdisplay

# Import necessary libraries
import tkinter as tk
from transformers import pipeline
from IPython.display import display
from tkinter import Scrollbar, Text
from pyvirtualdisplay import Display

# Install Xvfb and other necessary packages
!apt-get install -y xvfb x11-utils

# Define function to summarize text
def summarize_text():
    text = text_entry.get("1.0", "end-1c")
    summary = summarizer(text, max_length=100, min_length=10, do_sample=False)
    output_text.delete("1.0", "end")
    output_text.insert("1.0", summary[0]['summary_text'])

# Set up virtual display using Xvfb
display = Display(visible=0, size=(800, 600))
display.start()

# Create Tkinter window
window = tk.Tk()
window.title("Text Summarization")

# Create text entry widget
text_entry = Text(window, height=10, width=60)
text_entry.pack(pady=10)

# Create summarize button
summarize_button = tk.Button(window, text='Summarize', command=summarize_text)
summarize_button.pack()

# Create output text widget
output_text = Text(window, height=10, width=60)
output_text.pack(pady=10)

# Load summarization model
summarizer = pipeline("summarization", model='t5-base', tokenizer='t5-base', framework='pt')

# Run Tkinter main loop
window.mainloop()

# Stop virtual display
display.stop()


Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl (15 kB)
Installing collected packages: pyvirtualdisplay
Successfully installed pyvirtualdisplay-3.0
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
x11-utils is already the newest version (7.7+5build2).
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.11).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.


In [None]:
!pip install transformers pyvirtualdisplay
!apt-get install -y xvfb x11-utils  # Install Xvfb and related utilities


In [None]:
from pyvirtualdisplay import Display

# Set up virtual display using Xvfb
display = Display(visible=0, size=(800, 600))
display.start()


In [None]:
# Install required packages
!pip install transformers pyvirtualdisplay

# Import necessary libraries
import tkinter as tk
from transformers import pipeline
from tkinter import Text
from pyvirtualdisplay import Display

# Install Xvfb and other necessary packages
!apt-get install -y xvfb x11-utils

# Set up virtual display using Xvfb
display = Display(visible=0, size=(800, 600))
display.start()

# Define function to summarize text
def summarize_text():
    text = text_entry.get("1.0", "end-1c")
    summary = summarizer(text, max_length=100, min_length=10, do_sample=False)
    output_text.delete("1.0", "end")
    output_text.insert("1.0", summary[0]['summary_text'])

# Create Tkinter window
window = tk.Tk()
window.title("Text Summarization")

# Create text entry widget
text_entry = Text(window, height=10, width=60)
text_entry.pack(pady=10)

# Create summarize button
summarize_button = tk.Button(window, text='Summarize', command=summarize_text)
summarize_button.pack()

# Create output text widget
output_text = Text(window, height=10, width=60)
output_text.pack(pady=10)

# Load summarization model
summarizer = pipeline("summarization", model='t5-base', tokenizer='t5-base', framework='pt')

# Run Tkinter main loop
window.mainloop()

# Stop virtual display
display.stop()


In [None]:
# Install required packages
!pip install transformers pyvirtualdisplay

# Import necessary libraries
import tkinter as tk
from transformers import pipeline
from tkinter import Text
from pyvirtualdisplay import Display

# Install Xvfb and other necessary packages
!apt-get install -y xvfb x11-utils

# Set up virtual display using Xvfb
display = Display(visible=0, size=(800, 600))
display.start()

# Define function to summarize text
def summarize_text():
    text = text_entry.get("1.0", "end-1c")
    summary = summarizer(text, max_length=100, min_length=10, do_sample=False)
    output_text.delete("1.0", "end")
    output_text.insert("1.0", summary[0]['summary_text'])

# Create Tkinter window
window = tk.Tk()
window.title("Text Summarization")

# Create text entry widget
text_entry = Text(window, height=10, width=60)
text_entry.pack(pady=10)

# Create summarize button
summarize_button = tk.Button(window, text='Summarize', command=summarize_text)
summarize_button.pack()

# Create output text widget
output_text = Text(window, height=10, width=60)
output_text.pack(pady=10)

# Load summarization model
summarizer = pipeline("summarization", model='t5-base', tokenizer='t5-base', framework='pt')


In [None]:
window.mainloop()


In [1]:
# Install necessary packages
!pip install transformers ipywidgets

# Import necessary libraries
from ipywidgets import widgets, Layout
from transformers import pipeline

# Define function to summarize text
def summarize_text(b):
    text = text_entry.value
    summary = summarizer(text, max_length=100, min_length=10, do_sample=False)
    output_text.value = summary[0]['summary_text']

# Create text entry widget
text_entry = widgets.Textarea(
    placeholder='Enter text to summarize...',
    layout=Layout(width='90%', height='200px')
)

# Create summarize button
summarize_button = widgets.Button(
    description='Summarize',
    layout=Layout(width='20%')
)
summarize_button.on_click(summarize_text)

# Create output text widget
output_text = widgets.Textarea(
    placeholder='Summarized text will appear here...',
    layout=Layout(width='90%', height='200px'),
    disabled=True
)

# Display widgets
display(widgets.VBox([text_entry, summarize_button, output_text]))

# Load summarization model
summarizer = pipeline("summarization", model='t5-base', tokenizer='t5-base', framework='pt')


Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi
Successfully installed jedi-0.19.1


VBox(children=(Textarea(value='', layout=Layout(height='200px', width='90%'), placeholder='Enter text to summa…

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
