In [30]:
!pip install nltk networkx scikit-learn



In [31]:
import nltk
import numpy as np
import networkx as nx
import re
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer

nltk.download('punkt')
nltk.download('stopwords')

def summarize_text(text, summary_length=2):

    text = re.sub(r'\s+', ' ', text)
    sentences = sent_tokenize(text)

    stop_words = stopwords.words('english')

    def preprocess(sentence):
        sentence = re.sub(r'[^a-zA-Z]', ' ', sentence)
        words = sentence.lower().split()
        words = [word for word in words if word not in stop_words]
        return " ".join(words)

    clean_sentences = [preprocess(sentence) for sentence in sentences]

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(clean_sentences)

    similarity_matrix = (tfidf_matrix * tfidf_matrix.T).toarray()

    nx_graph = nx.from_numpy_array(similarity_matrix)
    scores = nx.pagerank(nx_graph)

    ranked_sentences = sorted(
        ((scores[i], i, s) for i, s in enumerate(sentences)),
        reverse=True
    )

    summary_length = min(summary_length, len(sentences))

    selected_sentences = sorted(
        ranked_sentences[:summary_length],
        key=lambda x: x[1]
    )

    summary = " ".join([sentence[2] for sentence in selected_sentences])

    return summary


# ---- USER INPUT ----
text = input("Enter your paragraph:\n")
length = int(input("How many sentences should the summary contain? "))

print("\nSummary:\n")
print(summarize_text(text, length))

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Enter your paragraph:
Artificial Intelligence is rapidly transforming industries across the world.  It is being used in healthcare, finance, education, and transportation.  Machine learning, a subset of AI, allows computers to learn from data without being explicitly programmed.  Deep learning has further enhanced AI capabilities by enabling neural networks to process complex patterns.  However, AI systems require large amounts of data to function effectively.  There are also concerns about data privacy and algorithmic bias.  Researchers are working to make AI systems more transparent and ethical.
How many sentences should the summary contain? 2

Summary:

Machine learning, a subset of AI, allows computers to learn from data without being explicitly programmed. However, AI systems require large amounts of data to function effectively.


**Sample Input:**

Artificial Intelligence is rapidly transforming industries across the world.
It is being used in healthcare, finance, education, and transportation.
Machine learning, a subset of AI, allows computers to learn from data without being explicitly programmed.
Deep learning has further enhanced AI capabilities by enabling neural networks to process complex patterns.
However, AI systems require large amounts of data to function effectively.
There are also concerns about data privacy and algorithmic bias.
Researchers are working to make AI systems more transparent and ethical.