In [None]:
# Text summerization using spacy

In [1]:
text = """
Natural language processing (NLP) is a field of artificial intelligence that focuses on the interaction 
between computers and humans using natural language. The ultimate objective of NLP is to enable computers 
to understand, interpret, and generate human-like text. In recent years, NLP has seen tremendous advancements 
with the development of deep learning models and large-scale language models like GPT-3. These models have 
revolutionized various NLP tasks, including machine translation, sentiment analysis, and text summarization.
"""

In [2]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

In [3]:
stopwords = list(STOP_WORDS)
stopwords

['formerly',
 'just',
 'the',
 'show',
 'meanwhile',
 'several',
 '’d',
 '’m',
 "'re",
 'may',
 'will',
 'was',
 'yours',
 'mine',
 'much',
 'back',
 'him',
 'due',
 'perhaps',
 'per',
 'make',
 'one',
 "'d",
 'somewhere',
 'becoming',
 'throughout',
 'between',
 'take',
 'thereafter',
 'out',
 'down',
 'therefore',
 'amount',
 'someone',
 'besides',
 'even',
 'twenty',
 'most',
 'call',
 'about',
 'third',
 'this',
 'them',
 'go',
 'front',
 'because',
 'though',
 'via',
 'further',
 'wherever',
 'did',
 'quite',
 'afterwards',
 'became',
 'towards',
 'get',
 'an',
 'how',
 'ourselves',
 'ten',
 'thereupon',
 "'s",
 'along',
 'during',
 'something',
 "'m",
 'were',
 'made',
 'should',
 '’ll',
 '’s',
 'herein',
 'sometime',
 'rather',
 'whenever',
 'each',
 'others',
 'please',
 'hers',
 'but',
 'become',
 'seems',
 'anything',
 'than',
 'either',
 'am',
 'where',
 'part',
 'seem',
 'of',
 'top',
 'almost',
 'beyond',
 'four',
 'every',
 'indeed',
 'hereupon',
 'you',
 'ca',
 'hereafte

In [4]:
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [None]:
# Load the pretrained english language model

In [5]:
nlp = spacy.load('en_core_web_sm')

In [6]:
doc = nlp(text)

In [7]:
doc


Natural language processing (NLP) is a field of artificial intelligence that focuses on the interaction 
between computers and humans using natural language. The ultimate objective of NLP is to enable computers 
to understand, interpret, and generate human-like text. In recent years, NLP has seen tremendous advancements 
with the development of deep learning models and large-scale language models like GPT-3. These models have 
revolutionized various NLP tasks, including machine translation, sentiment analysis, and text summarization.

In [8]:
tokens = [token.text for token in doc]

In [9]:
tokens

['\n',
 'Natural',
 'language',
 'processing',
 '(',
 'NLP',
 ')',
 'is',
 'a',
 'field',
 'of',
 'artificial',
 'intelligence',
 'that',
 'focuses',
 'on',
 'the',
 'interaction',
 '\n',
 'between',
 'computers',
 'and',
 'humans',
 'using',
 'natural',
 'language',
 '.',
 'The',
 'ultimate',
 'objective',
 'of',
 'NLP',
 'is',
 'to',
 'enable',
 'computers',
 '\n',
 'to',
 'understand',
 ',',
 'interpret',
 ',',
 'and',
 'generate',
 'human',
 '-',
 'like',
 'text',
 '.',
 'In',
 'recent',
 'years',
 ',',
 'NLP',
 'has',
 'seen',
 'tremendous',
 'advancements',
 '\n',
 'with',
 'the',
 'development',
 'of',
 'deep',
 'learning',
 'models',
 'and',
 'large',
 '-',
 'scale',
 'language',
 'models',
 'like',
 'GPT-3',
 '.',
 'These',
 'models',
 'have',
 '\n',
 'revolutionized',
 'various',
 'NLP',
 'tasks',
 ',',
 'including',
 'machine',
 'translation',
 ',',
 'sentiment',
 'analysis',
 ',',
 'and',
 'text',
 'summarization',
 '.',
 '\n']

In [10]:
punctuation = punctuation + '\n'

In [11]:
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\n'

In [None]:
# find out the word frequency

In [12]:
word_frequencies = {}
for word in doc:
    if word.text.lower() not in stopwords:
        if word.text.lower() not in punctuation:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text] = 1
            else:
                word_frequencies[word.text] += 1

In [13]:
word_frequencies

{'Natural': 1,
 'language': 3,
 'processing': 1,
 'NLP': 4,
 'field': 1,
 'artificial': 1,
 'intelligence': 1,
 'focuses': 1,
 'interaction': 1,
 'computers': 2,
 'humans': 1,
 'natural': 1,
 'ultimate': 1,
 'objective': 1,
 'enable': 1,
 'understand': 1,
 'interpret': 1,
 'generate': 1,
 'human': 1,
 'like': 2,
 'text': 2,
 'recent': 1,
 'years': 1,
 'seen': 1,
 'tremendous': 1,
 'advancements': 1,
 'development': 1,
 'deep': 1,
 'learning': 1,
 'models': 3,
 'large': 1,
 'scale': 1,
 'GPT-3': 1,
 'revolutionized': 1,
 'tasks': 1,
 'including': 1,
 'machine': 1,
 'translation': 1,
 'sentiment': 1,
 'analysis': 1,
 'summarization': 1}

In [14]:
max_frequency = max(word_frequencies.values())

In [15]:
max_frequency

4

In [None]:
# Normalize the frequency

In [16]:
for word in word_frequencies.keys():
    word_frequencies[word] = word_frequencies[word]/max_frequency

In [17]:
word_frequencies

{'Natural': 0.25,
 'language': 0.75,
 'processing': 0.25,
 'NLP': 1.0,
 'field': 0.25,
 'artificial': 0.25,
 'intelligence': 0.25,
 'focuses': 0.25,
 'interaction': 0.25,
 'computers': 0.5,
 'humans': 0.25,
 'natural': 0.25,
 'ultimate': 0.25,
 'objective': 0.25,
 'enable': 0.25,
 'understand': 0.25,
 'interpret': 0.25,
 'generate': 0.25,
 'human': 0.25,
 'like': 0.5,
 'text': 0.5,
 'recent': 0.25,
 'years': 0.25,
 'seen': 0.25,
 'tremendous': 0.25,
 'advancements': 0.25,
 'development': 0.25,
 'deep': 0.25,
 'learning': 0.25,
 'models': 0.75,
 'large': 0.25,
 'scale': 0.25,
 'GPT-3': 0.25,
 'revolutionized': 0.25,
 'tasks': 0.25,
 'including': 0.25,
 'machine': 0.25,
 'translation': 0.25,
 'sentiment': 0.25,
 'analysis': 0.25,
 'summarization': 0.25}

In [None]:
# sentence token

In [18]:
sentence_tokens = [sent for sent in doc.sents]

In [19]:
sentence_tokens

[
 Natural language processing (NLP) is a field of artificial intelligence that focuses on the interaction 
 between computers and humans using natural language.,
 The ultimate objective of NLP is to enable computers 
 to understand, interpret, and generate human-like text.,
 In recent years, NLP has seen tremendous advancements 
 with the development of deep learning models and large-scale language models like GPT-3.,
 These models have 
 revolutionized various NLP tasks, including machine translation, sentiment analysis, and text summarization.]

In [20]:
sentence_score = {}
for sent in sentence_tokens:
    for word in sent:
        if word.text.lower() in word_frequencies.keys():
            if sent not in sentence_score.keys():
                sentence_score[sent] = word_frequencies[word.text.lower()]
            else:
                sentence_score[sent] += word_frequencies[word.text.lower()]

In [21]:
sentence_score

{
 Natural language processing (NLP) is a field of artificial intelligence that focuses on the interaction 
 between computers and humans using natural language.: 4.25,
 The ultimate objective of NLP is to enable computers 
 to understand, interpret, and generate human-like text.: 3.25,
 In recent years, NLP has seen tremendous advancements 
 with the development of deep learning models and large-scale language models like GPT-3.: 5.25,
 These models have 
 revolutionized various NLP tasks, including machine translation, sentiment analysis, and text summarization.: 3.25}

In [22]:
from heapq import nlargest

In [49]:
select_length = int(len(sentence_score)*0.6)

In [50]:
select_length

2

In [51]:
summary = nlargest(select_length,sentence_score,key =  sentence_score.get)

In [52]:
summary

[In recent years, NLP has seen tremendous advancements 
 with the development of deep learning models and large-scale language models like GPT-3.,
 
 Natural language processing (NLP) is a field of artificial intelligence that focuses on the interaction 
 between computers and humans using natural language.]

In [54]:
final_summary = [word.text for word in summary]

In [55]:
final_summary

['In recent years, NLP has seen tremendous advancements \nwith the development of deep learning models and large-scale language models like GPT-3.',
 '\nNatural language processing (NLP) is a field of artificial intelligence that focuses on the interaction \nbetween computers and humans using natural language.']

In [56]:
summary = " ".join(final_summary)

In [57]:
summary

'In recent years, NLP has seen tremendous advancements \nwith the development of deep learning models and large-scale language models like GPT-3. \nNatural language processing (NLP) is a field of artificial intelligence that focuses on the interaction \nbetween computers and humans using natural language.'

In [58]:
print('word count original: ',len(text))
print('after Summarization: ',len(summary))

word count original:  541
after Summarization:  302
