## Deep Learning Speech Summarization using Transformers

#### Build a Deep Learning Model for Summarization

In [None]:
!pip3 install transformers==4.11.3
!pip3 install summarizer
!pip3 install bert-extractive-summarizer
!pip3 install keybert
!pip3 install keybert[flair]
!pip3 install keybert[gensim]
!pip3 install keybert[spacy]
!pip3 install keybert[use]
!pip3 install spacy
!pip3 install "gensim==3.8.3"

In [2]:
from summarizer import Summarizer,TransformerSummarizer

In [None]:
with open('/Users/sharath/Desktop/ST_Engg_Project/Data/data.txt') as f:
    body = f.readline()
    print(body)

#### Count the total number of characters in the text

In [None]:
print(len(body))

#### Count the total number of words in the text

In [5]:
words = len(body.split())
print(words)

4827


#### Count the total number of sentences in the text

In [6]:
sentences=body.count('.')
print(sentences)

319


In [None]:
bert_model = Summarizer()
BERT_summary = ''.join(bert_model(body, ratio=0.1))
print(BERT_summary)

In [None]:
bert_model = Summarizer()
BERT_summary = ''.join(bert_model(body, num_sentences=16))
print(BERT_summary)

In [None]:
GPT2_model = TransformerSummarizer(transformer_type="GPT2",transformer_model_key="gpt2-medium")
GPT2_summary = ''.join(GPT2_model(body, ratio=0.1))
print(GPT2_summary)

In [None]:
GPT2_model = TransformerSummarizer(transformer_type="GPT2",transformer_model_key="gpt2-medium")
GPT2_summary = ''.join(GPT2_model(body, num_sentences=16))
print(GPT2_summary)

In [None]:
model = TransformerSummarizer(transformer_type="XLNet",transformer_model_key="xlnet-base-cased")
XLNET_summary = ''.join(model(body, ratio=0.1))
print(XLNET_summary)

In [None]:
model = TransformerSummarizer(transformer_type="XLNet",transformer_model_key="xlnet-base-cased")
XLNET_summary = ''.join(model(body, num_sentences=16))
print(XLNET_summary)

### Machine Learning Model for Summarization

In [13]:
from gensim.summarization.summarizer import summarize
from gensim.summarization import keywords

In [14]:
'''Get the summary of the text'''

def get_summary(text, pct):
    summary = summarize(text,ratio=pct,split=True)
    return summary

'''Get the keywords of the text'''

def get_keywords(text):
    res = keywords(text, ratio=0.1, words=None, split=False, scores=False, pos_filter=None, lemmatize=False, deacc=False)
    res = res.split('\n')
    return res

In [None]:
print('Printing Summary')
print('--------------------------')
print(get_summary(body, 0.2))
print ('-------------------------')
print('Printing Keywords')
print('--------------------------')
print(get_keywords(body))

In [16]:
from keybert import KeyBERT

kw_model = KeyBERT()
keywords = kw_model.extract_keywords(body)

In [17]:
kw_model.extract_keywords(body, keyphrase_ngram_range=(1, 1), stop_words=None)

[('earnings', 0.3934),
 ('ibm', 0.3764),
 ('securities', 0.3513),
 ('sec', 0.3432),
 ('shareholders', 0.2969)]

In [18]:
kw_model.extract_keywords(body, keyphrase_ngram_range=(1, 2), stop_words=None)

[('earnings presentation', 0.4919),
 ('2019 earnings', 0.4866),
 ('sec filings', 0.4683),
 ('ibm investor', 0.4623),
 ('operating earnings', 0.4476)]

In [19]:
kw_model.extract_keywords(body, keyphrase_ngram_range=(1, 3), stop_words=None)

[('2019 earnings presentation', 0.5533),
 ('murphy with ibm', 0.4967),
 ('the ibm investor', 0.4943),
 ('earnings presentation here', 0.4925),
 ('earnings presentation', 0.4919)]

In [20]:
keywords = kw_model.extract_keywords(body, highlight=True)

In [21]:
kw_model.extract_keywords(body, keyphrase_ngram_range=(3, 3), stop_words='english', 
                              use_maxsum=True, nr_candidates=20, top_n=10)


[('1995 statements involve', 0.415),
 ('issue earnings share', 0.4153),
 ('ibm want welcome', 0.4169),
 ('reporting 2019 updated', 0.4265),
 ('securities litigation reform', 0.46),
 ('company sec filings', 0.4629),
 ('murphy ibm ma', 0.4652),
 ('quarter 2019 earnings', 0.4762),
 ('patricia murphy ibm', 0.4981),
 ('earnings presentation jim', 0.5561)]

In [22]:
kw_model.extract_keywords(body, keyphrase_ngram_range=(3, 3), stop_words='english', 
                              use_mmr=True, diversity=0.8, top_n=10)


[('earnings presentation jim', 0.5561),
 ('europe hybrid cloud', -0.0045),
 ('regulatory process continue', 0.1805),
 ('non gap measures', 0.1978),
 ('arrays performance reflects', 0.0837),
 ('reshaping business address', 0.0736),
 ('pay debt red', 0.0417),
 ('cavanaugh ibm senior', 0.3487),
 ('miss patricia murphy', 0.2805),
 ('remind recently changes', 0.1393)]

### [Refer this link to know more about KeyBert](https://pypi.org/project/keybert/)