In [1]:
# Importing necessary libraries
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest

In [2]:
# Loading the English language model from spaCy
nlp = spacy.load("en_core_web_sm")

In [3]:
# Creating a spaCy Doc object by processing a text
doc = nlp("data science and ai has great career ahead")

In [4]:
# Displaying tokens in the document
for token in doc:
    print(token.text)

data
science
and
ai
has
great
career
ahead


In [5]:
# Performing more detailed token analysis, including lemmatization, part-of-speech tagging, etc.
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
            token.shape_, token.is_alpha, token.is_stop)

data datum NOUN NNS compound xxxx True False
science science NOUN NN ROOT xxxx True False
and and CCONJ CC cc xxx True True
ai ai AUX VBP aux xx True False
has have VERB VBZ conj xxx True True
great great ADJ JJ amod xxxx True False
career career NOUN NN dobj xxxx True False
ahead ahead ADV RB advmod xxxx True False


In [6]:
# Displaying only part-of-speech information
for token in doc:
    print(token.pos_)

NOUN
NOUN
CCONJ
AUX
VERB
ADJ
NOUN
ADV


In [7]:
# Creating a large text for demonstration
text = """Text summarization involves condensing a given piece of text while retaining its key information 
and meaning. It is a critical component in natural language processing and aids in extracting the most 
relevant content from lengthy documents. Techniques for text summarization encompass various approaches, 
such as extractive and abstractive summarization. Extractive summarization involves selecting and 
combining essential sentences directly from the original text, while abstractive summarization generates
concise and coherent summaries by rephrasing and interpreting the content. This process is particularly
valuable for handling vast amounts of data, enhancing document understanding, and facilitating quicker 
comprehension. Employing advanced technologies, including machine learning and natural language 
processing models like OpenAI's GPT, text summarization contributes to efficient information retrieval 
and aids in decision-making processes across diverse domains."""

In [8]:
# Tokenizing the text
doc = nlp(text)

In [9]:
# Creating a list of stopwords from spaCy and punctuation marks
stopwords = list(STOP_WORDS)
punctuation = punctuation

In [10]:
# Calculating word frequencies in the text
word_frequencies = {}
for word in doc:
    if word.text.lower() not in stopwords and word.text.lower() not in punctuation:
        if word.text not in word_frequencies.keys():
            word_frequencies[word.text] = 1
        else:
            word_frequencies[word.text] += 1

In [11]:
# Normalizing word frequencies
max_frequency = max(word_frequencies.values())
for word in word_frequencies.keys():
    word_frequencies[word] = word_frequencies[word] / max_frequency

In [12]:
# Tokenizing sentences in the text
sentence_tokens = [sent for sent in doc.sents]

In [13]:
# Calculating sentence scores based on word frequencies
sentence_scores = {}
for sent in sentence_tokens:
    for word in sent:
        if word.text.lower() in word_frequencies.keys():
            if sent not in sentence_scores.keys():
                sentence_scores[sent] = word_frequencies[word.text.lower()]
            else:
                sentence_scores[sent] += word_frequencies[word.text.lower()]

In [14]:
# Selecting the top 40% of sentences based on scores
select_length = int(len(sentence_tokens) * 0.4)
summary = nlargest(select_length, sentence_scores, key=sentence_scores.get)

In [15]:
# Extracting the final summary as a list of words
final_summary = [word.text for word in summary]

In [16]:
final_summary

['Extractive summarization involves selecting and \ncombining essential sentences directly from the original text, while abstractive summarization generates\nconcise and coherent summaries by rephrasing and interpreting the content.',
 "Employing advanced technologies, including machine learning and natural language \nprocessing models like OpenAI's GPT, text summarization contributes to efficient information retrieval \nand aids in decision-making processes across diverse domains."]

In [18]:
summary

[Extractive summarization involves selecting and 
 combining essential sentences directly from the original text, while abstractive summarization generates
 concise and coherent summaries by rephrasing and interpreting the content.,
 Employing advanced technologies, including machine learning and natural language 
 processing models like OpenAI's GPT, text summarization contributes to efficient information retrieval 
 and aids in decision-making processes across diverse domains.]