In [2]:
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.probability import FreqDist
from heapq import nlargest

# Sample text
text = """
The Internet of Things (IoT) refers to the billions of physical devices around the world that are now connected to the internet, all collecting and sharing data. Thanks to the arrival of super-cheap computer chips and the ubiquity of wireless networks, it's possible to turn anything, from something as small as a pill to something as big as an airplane, into a part of the IoT. Connecting all these different objects and adding sensors to them adds a level of digital intelligence to devices that would be otherwise dumb, enabling them to communicate real-time data without involving a human being. The Internet of Things is making the fabric of the world around us smarter and more responsive, merging the digital and physical universes.
"""

# Preprocess the text
def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    text = re.sub(r'\[[0-9]*\]', ' ', text)  # Remove references
    text = text.lower()  # Convert to lowercase
    return text

processed_text = preprocess_text(text)

# Tokenize sentences
sentences = sent_tokenize(processed_text)

# Remove stopwords
stop_words = set(stopwords.words('english'))
word_tokens = word_tokenize(processed_text)
filtered_words = [word for word in word_tokens if word not in stop_words and word.isalpha()]

# Calculate word frequencies
word_freq = FreqDist(filtered_words)

# Calculate sentence scores based on word frequencies
sentence_scores = {}
for sent in sentences:
    for word in word_tokenize(sent):
        if word in word_freq.keys():
            if sent not in sentence_scores.keys():
                sentence_scores[sent] = word_freq[word]
            else:
                sentence_scores[sent] += word_freq[word]

# Get the highest-scoring sentences
num_sentences = int(len(sentences) * 0.3)  # Select 30% of sentences
summary_sentences = nlargest(num_sentences, sentence_scores, key=sentence_scores.get)

# Join the summary sentences
summary = ' '.join(summary_sentences)
print("Summary:")
print(summary)


Summary:
 the internet of things (iot) refers to the billions of physical devices around the world that are now connected to the internet, all collecting and sharing data.
