In [2]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def text_summarizer(text, num_sentences=5):
    # Load the English language model
    nlp = spacy.load("en_core_web_sm")

    # Process the input text
    doc = nlp(text)

    # Tokenize the sentences
    sentences = [sent.text for sent in doc.sents]

    # Remove stop words and create a document matrix
    cv = CountVectorizer(stop_words=list(STOP_WORDS))
    cv_matrix = cv.fit_transform(sentences)

    # Calculate cosine similarity between sentences
    similarity_matrix = cosine_similarity(cv_matrix, cv_matrix)

    # Rank sentences based on similarity
    sentence_ranks = sorted(range(len(sentences)), key=lambda x: sum(similarity_matrix[x]), reverse=True)

    # Select top sentences for the summary
    summary_sentences = [sentences[idx] for idx in sentence_ranks[:num_sentences]]

    # Combine the selected sentences to form the summary
    summary = ' '.join(summary_sentences)

    return summary

# Example usage
input_text = """
Natural language processing (NLP) is a field of artificial intelligence that focuses on the interaction
between computers and humans through natural language. It involves the development of algorithms and
models that enable computers to understand, interpret, and generate human-like text. NLP has applications
in various domains, including machine translation, sentiment analysis, and text summarization.

Text summarization is the process of condensing a piece of text while retaining its essential information.
There are two main approaches to text summarization: extractive and abstractive. Extractive summarization
involves selecting important sentences from the original text, while abstractive summarization involves
generating new sentences that capture the key information.

In this example, we will implement a basic extractive text summarization using spaCy, a popular NLP library in Python.
"""

summary = text_summarizer(input_text)
print("Original Text:\n", input_text)
print("\nSummary:\n", summary)


Original Text:
 
hehe Natural language processing (NLP) is a field of artificial intelligence that focuses on the interaction
between computers and humans through natural language. It involves the development of algorithms and
models that enable computers to understand, interpret, and generate human-like text. NLP has applications
in various domains, including machine translation, sentiment analysis, and text summarization.

Text summarization is the process of condensing a piece of text while retaining its essential information.
There are two main approaches to text summarization: extractive and abstractive. Extractive summarization
involves selecting important sentences from the original text, while abstractive summarization involves
generating new sentences that capture the key information.

In this example, we will implement a basic extractive text summarization using spaCy, a popular NLP library in Python.


Summary:
 There are two main approaches to text summarization: extractive

In [8]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def text_summarizer(text, num_sentences=5):
    # Load the English language model
    nlp = spacy.load("en_core_web_sm")

    # Process the input text
    doc = nlp(text)

    # Tokenize the sentences
    sentences = [sent.text for sent in doc.sents]

    # Remove stop words and create a document matrix
    cv = CountVectorizer(stop_words=list(STOP_WORDS))
    cv_matrix = cv.fit_transform(sentences)

    # Calculate cosine similarity between sentences
    similarity_matrix = cosine_similarity(cv_matrix, cv_matrix)

    # Rank sentences based on similarity
    sentence_ranks = sorted(range(len(sentences)), key=lambda x: sum(similarity_matrix[x]), reverse=True)

    # Select top sentences for the summary
    summary_sentences = [sentences[idx] for idx in sentence_ranks[:num_sentences]]

    # Combine the selected sentences to form the summary
    summary = ' '.join(summary_sentences)

    return summary

def summarize_from_file(input_file_path, output_file_path, num_sentences=5):
    # Read the content of the input text file
    with open(input_file_path, 'r', encoding='utf-8') as file:
        file_content = file.read()

    # Call the text_summarizer function with the file content
    summary = text_summarizer(file_content, num_sentences)

    # Write the summary to the output text file
    with open(output_file_path, 'w', encoding='utf-8') as output_file:
        output_file.write(summary)

# Example usage
input_file_path = "/Users/mubaraq/Desktop/textsumpython.txt"  # Replace with the path to your input text file
output_file_path = "/Users/mubaraq/Desktop/outputtextpython.txt"  # Replace with the desired path for the output summary file

summarize_from_file(input_file_path, output_file_path)
