In [20]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import networkx as nx
import scipy.sparse as sp

def preprocess_text(text):
    # Tokenize into sentences
    sentences = sent_tokenize(text)

    # Tokenize into words
    words = [word_tokenize(sentence) for sentence in sentences]

    # Remove stopwords and perform stemming
    stop_words = set(stopwords.words("english"))
    ps = PorterStemmer()
    words = [[ps.stem(word) for word in sentence if word.lower() not in stop_words] for sentence in words]

    return words

def get_sentence_similarity_matrix(sentences):
    # Flatten the list of sentences into a single list of words
    words = [word for sentence in sentences for word in sentence]

    # Create a TF-IDF vectorizer
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(words)

    # Calculate the cosine similarity matrix
    similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

    return similarity_matrix

def summarize_text(text, num_sentences=3):
    # Preprocess the text
    preprocessed_text = preprocess_text(text)

    # Calculate the sentence similarity matrix
    similarity_matrix = get_sentence_similarity_matrix(preprocessed_text)

    # Create a graph from the similarity matrix
    graph = nx.from_numpy_array(similarity_matrix)

    # Calculate the sentence scores using PageRank algorithm
    scores = nx.pagerank(graph)

    # Sort the sentences based on their scores
    ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(preprocessed_text)), reverse=True)

    # Select the top sentences for the summary
    top_sentences = [sentence for score, sentence in ranked_sentences[:num_sentences]]

    # Flatten the selected sentences into a single string
    summary = ' '.join([' '.join(sentence) for sentence in top_sentences])

    return summary

# Example usage
file_path = r'C:\Users\tando\Downloads\filee.txt'
encoding = 'utf-8'  # Try different encodings if the default encoding causes an error

with open(file_path, 'r', encoding=encoding) as file:
    text = file.read()

summary = summarize_text(text)
print("Summary:")
print(summary)


AttributeError: module 'scipy.sparse' has no attribute 'coo_array'