In [69]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import string
import spacy

from matplotlib.pyplot import imread
from wordcloud import WordCloud, STOPWORDS
%matplotlib inline

### Loading the dataset

In [70]:
df=pd.read_csv("Wings-Of-Fire-converted.csv")

In [71]:
df

Unnamed: 0,"ACHIEVERS I started my work at NASA at the Langley Research Centre (LRC) in Hampton, Virginia."
0,This is primarily an R&D centre for advanced a...
1,of LRC is of a piece of sculpture depicting a ...
2,research and the other technological developme...
3,interconnection between research and developme...
4,"Centre (GSFC) at Greenbelt, Maryland. This Cen..."
...,...
1386,"them to his own satisfaction. Judgement aside,..."
1387,"without pain. As for my afflicted friends, the..."
1388,may charge me with murder – Or want of sense (...
1389,approach to a false pretence Was never among m...


In [72]:
#Changing the column name
df.columns=['Observations']

In [73]:
df

Unnamed: 0,Observations
0,This is primarily an R&D centre for advanced a...
1,of LRC is of a piece of sculpture depicting a ...
2,research and the other technological developme...
3,interconnection between research and developme...
4,"Centre (GSFC) at Greenbelt, Maryland. This Cen..."
...,...
1386,"them to his own satisfaction. Judgement aside,..."
1387,"without pain. As for my afflicted friends, the..."
1388,may charge me with murder – Or want of sense (...
1389,approach to a false pretence Was never among m...


In [74]:
df.dropna(inplace=True)         #checking for null values
df.isnull().sum()

Observations    0
dtype: int64

In [75]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1391 entries, 0 to 1390
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Observations  1391 non-null   object
dtypes: object(1)
memory usage: 21.7+ KB


# Text Preprocessing

## Converting list into string

In [76]:
df=[Observations.strip() for Observations in df.Observations] # remove both the leading and the trailing characters
df=[Observations for Observations in df if Observations] # removes empty strings, because they are considered in Python as False
df[0:100]

['This is primarily an R&D centre for advanced aerospace technology. One of my most vivid memories',
 'of LRC is of a piece of sculpture depicting a charioteer driving two horses, one representing scientific',
 'research and the other technological development, metaphorically encapsulating the',
 'interconnection between research and development. From LRC I went to the Goddard Space Flight',
 'Centre (GSFC) at Greenbelt, Maryland. This Centre develops and manages most of NASA’s earth-',
 'orbiting science and applications satellites. It operates NASA’s tracking networks for all space',
 'missions. Towards the end of my visit, Iwent to the Wallops Flight Facility at Wallops Island in East',
 'Coast, Virginia. This place was the base for NASA’s sounding rocket programme. Here, I saw a',
 'painting prominently displayed in the reception lobby. It depicted a battle scene with a few rockets',
 'flying in the background. A painting with this theme should be the most commonplace thing at a',


In [77]:
# Joining the list into one string/text
df_Observations=' '.join(df)
df_Observations



## summarization using spacy(auto trained model)

## Tokenization & stopwords removation

In [78]:
import spacy
import en_core_web_sm
 
nlp = en_core_web_sm.load()

 
# Convert text into spacy formatted document
doc = nlp(df_Observations)
 
clean_token = []
for token in doc:
    if not (token.is_stop):
        clean_token.append(token.text)
         
print('Before:-------')
print(doc,'\n')
 
# Join sentence without stop words and print
print('After:-------')
' '.join(clean_token)

Before:-------

After:-------




In [79]:
# Join sentence without stop words and print
print('After:-------')
' '.join(clean_token)

After:-------




In [80]:
df2=' '.join(clean_token)
df2



In [81]:
len(df2)

90903

# Lexrank model

In [82]:
## importing liabaries
import sumy

In [83]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer

In [84]:
## Fit our data to the model to summarized
parser=PlaintextParser.from_string(df2,Tokenizer('english'))

In [85]:
parser

<sumy.parsers.plaintext.PlaintextParser at 0x26e879f5c70>

In [86]:
## import model
from sumy.summarizers.lex_rank import LexRankSummarizer

In [87]:
summarizer=LexRankSummarizer()

In [88]:
summary=summarizer(parser.document,70)

## Checking the summarized sentences

In [89]:
for sentence in summary:
    print(sentence)

” realised people world meet problems head .
mother narrated incident Holy Book — God created man , asked angels prostrate Adam .
People work virtually round clock enthusiasm Prof. Sarabhai new , country — new design new method fabrication - - - way administrative procedure .
rocket launch site later blossomed Thumba Equatorial Rocket Launch Station ( TERLS ) .
view , wide - ranging programme scientific technological development rocket fuels , propulsion systems , aeronautics , aerospace materials , advanced fabrication techniques , rocket motor instrumentation , control guidance systems , telemetry , tracking systems scientific instruments experimentation space launched Space Science Technology Centre Physical Research Laboratory Ahmedabad .
distinguishes sounding rocket Satellite Launch Vehicle ( SLV ) missile ?
fact , different kinds rockets .
development rockets resulted fully indigenous capability production sounding rockets propellants .
Prof. Sarabhai took challenge giving physi

### Join the scentences to make a continous summary

In [90]:
final_summary = " ".join(map(str, summary[0:]))

In [91]:
final_summary

'” realised people world meet problems head . mother narrated incident Holy Book — God created man , asked angels prostrate Adam . People work virtually round clock enthusiasm Prof. Sarabhai new , country — new design new method fabrication - - - way administrative procedure . rocket launch site later blossomed Thumba Equatorial Rocket Launch Station ( TERLS ) . view , wide - ranging programme scientific technological development rocket fuels , propulsion systems , aeronautics , aerospace materials , advanced fabrication techniques , rocket motor instrumentation , control guidance systems , telemetry , tracking systems scientific instruments experimentation space launched Space Science Technology Centre Physical Research Laboratory Ahmedabad . distinguishes sounding rocket Satellite Launch Vehicle ( SLV ) missile ? fact , different kinds rockets . development rockets resulted fully indigenous capability production sounding rockets propellants . Prof. Sarabhai took challenge giving phys

### Removing punctuations marks from the summary

In [92]:
import string
translate_table = dict((ord(char), None) for char in string.punctuation)   
final_summary.translate(translate_table)

'” realised people world meet problems head  mother narrated incident Holy Book — God created man  asked angels prostrate Adam  People work virtually round clock enthusiasm Prof Sarabhai new  country — new design new method fabrication    way administrative procedure  rocket launch site later blossomed Thumba Equatorial Rocket Launch Station  TERLS   view  wide  ranging programme scientific technological development rocket fuels  propulsion systems  aeronautics  aerospace materials  advanced fabrication techniques  rocket motor instrumentation  control guidance systems  telemetry  tracking systems scientific instruments experimentation space launched Space Science Technology Centre Physical Research Laboratory Ahmedabad  distinguishes sounding rocket Satellite Launch Vehicle  SLV  missile  fact  different kinds rockets  development rockets resulted fully indigenous capability production sounding rockets propellants  Prof Sarabhai took challenge giving physical dimensions dream  ” fact 

In [93]:
len(final_summary),len(df2)

(4687, 90903)

In [94]:
!pip install rouge



## MODEL EVALUATION

In [95]:
from rouge import Rouge

In [96]:
rouge = Rouge()

In [97]:
rouge.get_scores(final_summary, df2)

[{'rouge-1': {'r': 0.09266498045527707, 'p': 1.0, 'f': 0.1696127930605092},
  'rouge-2': {'r': 0.0518033946251768,
   'p': 0.9057187017001546,
   'f': 0.09800150411907965},
  'rouge-l': {'r': 0.09266498045527707, 'p': 1.0, 'f': 0.1696127930605092}}]

## Luhn Summarizer

In [98]:
from sumy.summarizers.luhn import LuhnSummarizer
luhn_summarizer=LuhnSummarizer()

In [99]:
## Building the model
summary1=luhn_summarizer(parser.document,20)

In [100]:
## checking the sentences
for sentence in summary1:
    print(sentence)

Right day INCOSPAR formed , aware need organize integrated national space programme , equipment manufacture rockets launch facilities developed produced indigenously .
view , wide - ranging programme scientific technological development rocket fuels , propulsion systems , aeronautics , aerospace materials , advanced fabrication techniques , rocket motor instrumentation , control guidance systems , telemetry , tracking systems scientific instruments experimentation space launched Space Science Technology Centre Physical Research Laboratory Ahmedabad .
development payloads sounding rockets , instead getting certain payload engineering fit rocket , discussed matter threadbare payload scientists working different organ - izations different locations .
X - ray payloads look stars ; payloads fitted radio frequency mass spectrometers analyse gas composition upper atmosphere ; sodium payloads find wind conditions , direction velocity .
evening , news India taking indigenous development device 

In [101]:
## Joining the sentences
final_summary2 = " ".join(map(str, summary1[0:]))

In [102]:
final_summary2

'Right day INCOSPAR formed , aware need organize integrated national space programme , equipment manufacture rockets launch facilities developed produced indigenously . view , wide - ranging programme scientific technological development rocket fuels , propulsion systems , aeronautics , aerospace materials , advanced fabrication techniques , rocket motor instrumentation , control guidance systems , telemetry , tracking systems scientific instruments experimentation space launched Space Science Technology Centre Physical Research Laboratory Ahmedabad . development payloads sounding rockets , instead getting certain payload engineering fit rocket , discussed matter threadbare payload scientists working different organ - izations different locations . X - ray payloads look stars ; payloads fitted radio frequency mass spectrometers analyse gas composition upper atmosphere ; sodium payloads find wind conditions , direction velocity . evening , news India taking indigenous development device

In [103]:
## Remove punctuations mark from the summary
import string
translate_table = dict((ord(char), None) for char in string.punctuation)   
final_summary2.translate(translate_table)

'Right day INCOSPAR formed  aware need organize integrated national space programme  equipment manufacture rockets launch facilities developed produced indigenously  view  wide  ranging programme scientific technological development rocket fuels  propulsion systems  aeronautics  aerospace materials  advanced fabrication techniques  rocket motor instrumentation  control guidance systems  telemetry  tracking systems scientific instruments experimentation space launched Space Science Technology Centre Physical Research Laboratory Ahmedabad  development payloads sounding rockets  instead getting certain payload engineering fit rocket  discussed matter threadbare payload scientists working different organ  izations different locations  X  ray payloads look stars  payloads fitted radio frequency mass spectrometers analyse gas composition upper atmosphere  sodium payloads find wind conditions  direction velocity  evening  news India taking indigenous development device help short run  offs hi

In [104]:
## Word count of original data& dummary
len(df2),len(final_summary2)

(90903, 4228)

## Model evaluation

In [105]:
rouge.get_scores(final_summary2, df2)

[{'rouge-1': {'r': 0.08852609795355254, 'p': 1.0, 'f': 0.16265314594977076},
  'rouge-2': {'r': 0.04862093352192362,
   'p': 0.9700176366843033,
   'f': 0.09260038632892273},
  'rouge-l': {'r': 0.08852609795355254, 'p': 1.0, 'f': 0.16265314594977076}}]

## LSA Summarizer

In [106]:
from sumy.summarizers.lsa import LsaSummarizer
lsa_summarizer=LsaSummarizer()

In [107]:
lsa_summary=lsa_summarizer(parser.document,25)

In [108]:
for sentence in lsa_summary:
    print(sentence)

bunch young , inexperienced , energetic enthusiastic persons given task fleshing spirit selfreliance field science technology general , space research particular .
Right day INCOSPAR formed , aware need organize integrated national space programme , equipment manufacture rockets launch facilities developed produced indigenously .
exceptions like Prof. Oda Sudhakar , bring work personal touch magic based individual character , personality , inner motives , dreams crystallized hearts .
found going head attempting task capability skill , Prof. Sarabhai reassign activity way lower pressure permit better quality work performed .
Iwas filled emotions — happiness , gratitude , sense fulfilment lines littleknown poet nineteenth- century crossed mind : days prepare meet alike anvil , bear – hammer , strike .
plan mainly centred early ideas born INCOSPAR ; included utilization satellites television developmental education , meteorological observations remote sensing management natural resources 

In [109]:
final_summary3 = " ".join(map(str, lsa_summary[0:]))

In [110]:
final_summary3

'bunch young , inexperienced , energetic enthusiastic persons given task fleshing spirit selfreliance field science technology general , space research particular . Right day INCOSPAR formed , aware need organize integrated national space programme , equipment manufacture rockets launch facilities developed produced indigenously . exceptions like Prof. Oda Sudhakar , bring work personal touch magic based individual character , personality , inner motives , dreams crystallized hearts . found going head attempting task capability skill , Prof. Sarabhai reassign activity way lower pressure permit better quality work performed . Iwas filled emotions — happiness , gratitude , sense fulfilment lines littleknown poet nineteenth- century crossed mind : days prepare meet alike anvil , bear – hammer , strike . plan mainly centred early ideas born INCOSPAR ; included utilization satellites television developmental education , meteorological observations remote sensing management natural resources

In [111]:
import string
translate_table = dict((ord(char), None) for char in string.punctuation)   
final_summary3.translate(translate_table)

'bunch young  inexperienced  energetic enthusiastic persons given task fleshing spirit selfreliance field science technology general  space research particular  Right day INCOSPAR formed  aware need organize integrated national space programme  equipment manufacture rockets launch facilities developed produced indigenously  exceptions like Prof Oda Sudhakar  bring work personal touch magic based individual character  personality  inner motives  dreams crystallized hearts  found going head attempting task capability skill  Prof Sarabhai reassign activity way lower pressure permit better quality work performed  Iwas filled emotions — happiness  gratitude  sense fulfilment lines littleknown poet nineteenth century crossed mind  days prepare meet alike anvil  bear – hammer  strike  plan mainly centred early ideas born INCOSPAR  included utilization satellites television developmental education  meteorological observations remote sensing management natural resources  Recognising immense soc

In [112]:
## Final lenght of the words
len(df2),len(final_summary3)

(90903, 4404)

## Model Evaluation

In [113]:
rouge.get_scores(final_summary3, df2)

[{'rouge-1': {'r': 0.1004828696252012, 'p': 1.0, 'f': 0.1826159615666594},
  'rouge-2': {'r': 0.05012376237623763,
   'p': 0.9593908629441624,
   'f': 0.09527009903107445},
  'rouge-l': {'r': 0.1004828696252012, 'p': 1.0, 'f': 0.1826159615666594}}]

## LSA With stopwords

In [114]:
from sumy.utils import get_stop_words
summarizer_lsa2=LsaSummarizer()
summarizer_lsa2.stop_words=get_stop_words('english')

In [115]:
summary2=summarizer_lsa2(parser.document,20)

In [116]:
for sentence in summary2:
    print(sentence)

Iwas filled emotions — happiness , gratitude , sense fulfilment lines littleknown poet nineteenth- century crossed mind : days prepare meet alike anvil , bear – hammer , strike .
plan mainly centred early ideas born INCOSPAR ; included utilization satellites television developmental education , meteorological observations remote sensing management natural resources .
Recognising immense socio- economic benefits space technology , Prof. Sarabhai decided 1969 , - steam ahead task establishing indigenous capability building launching satellites .
talked rocket launching station , envisaging facilities like launch pads , block houses , radar , telemetry — things taken granted Indian space research today .
Dr Brahm Prakash formed Project Advisory Committees advise specialized areas like rocket motors , materials fabrication , control guidance , electronics , mission launching .
Group responsible looking overall executive aspects SLV-3 : project management , including administration , planni

In [117]:
final_summary5 = " ".join(map(str, summary2[0:]))

In [118]:
final_summary5

'Iwas filled emotions — happiness , gratitude , sense fulfilment lines littleknown poet nineteenth- century crossed mind : days prepare meet alike anvil , bear – hammer , strike . plan mainly centred early ideas born INCOSPAR ; included utilization satellites television developmental education , meteorological observations remote sensing management natural resources . Recognising immense socio- economic benefits space technology , Prof. Sarabhai decided 1969 , - steam ahead task establishing indigenous capability building launching satellites . talked rocket launching station , envisaging facilities like launch pads , block houses , radar , telemetry — things taken granted Indian space research today . Dr Brahm Prakash formed Project Advisory Committees advise specialized areas like rocket motors , materials fabrication , control guidance , electronics , mission launching . Group responsible looking overall executive aspects SLV-3 : project management , including administration , plann

In [119]:
len(df),len(final_summary5)

(1390, 3590)

In [120]:
import string
translate_table = dict((ord(char), None) for char in string.punctuation)   
final_summary5.translate(translate_table)

'Iwas filled emotions — happiness  gratitude  sense fulfilment lines littleknown poet nineteenth century crossed mind  days prepare meet alike anvil  bear – hammer  strike  plan mainly centred early ideas born INCOSPAR  included utilization satellites television developmental education  meteorological observations remote sensing management natural resources  Recognising immense socio economic benefits space technology  Prof Sarabhai decided 1969   steam ahead task establishing indigenous capability building launching satellites  talked rocket launching station  envisaging facilities like launch pads  block houses  radar  telemetry — things taken granted Indian space research today  Dr Brahm Prakash formed Project Advisory Committees advise specialized areas like rocket motors  materials fabrication  control guidance  electronics  mission launching  Group responsible looking overall executive aspects SLV3  project management  including administration  planning evaluation  subsystems spe

## Model Evaluatiuon

In [121]:
rouge.get_scores(final_summary5, df2)

[{'rouge-1': {'r': 0.08461715336859048, 'p': 1.0, 'f': 0.15603137443591172},
  'rouge-2': {'r': 0.04199080622347949,
   'p': 0.9654471544715447,
   'f': 0.08048119201712463},
  'rouge-l': {'r': 0.08461715336859048, 'p': 1.0, 'f': 0.15603137443591172}}]

# Text rank

In [122]:
from sumy.summarizers.text_rank import TextRankSummarizer
text_summary=TextRankSummarizer()

In [123]:
summary_result=text_summary(parser.document,20)

In [124]:
for sentence in summary_result:
    print(sentence)

People work virtually round clock enthusiasm Prof. Sarabhai new , country — new design new method fabrication - - - way administrative procedure .
Prof. Sarabhai talking Satellite Launch Vehicle ( SLV ) , asked , breath , studies rocketassisted - system ( RATO ) military aircraft .
view , wide - ranging programme scientific technological development rocket fuels , propulsion systems , aeronautics , aerospace materials , advanced fabrication techniques , rocket motor instrumentation , control guidance systems , telemetry , tracking systems scientific instruments experimentation space launched Space Science Technology Centre Physical Research Laboratory Ahmedabad .
development work carried Space Science Technology Centre assistance Defence Research Development Organization ( DRDO ) , HAL , DTD&P(Air ) Air Headquarters .
time , Prof. Sarabhai hand - picked team form dream Indian SLV .
Suddenly , senior scientist worked closely Prof. Sarabhai turned enquired , “ , presentations project tea

In [125]:
textrank_final = " ".join(map(str, summary_result[0:]))

In [126]:
textrank_final

'People work virtually round clock enthusiasm Prof. Sarabhai new , country — new design new method fabrication - - - way administrative procedure . Prof. Sarabhai talking Satellite Launch Vehicle ( SLV ) , asked , breath , studies rocketassisted - system ( RATO ) military aircraft . view , wide - ranging programme scientific technological development rocket fuels , propulsion systems , aeronautics , aerospace materials , advanced fabrication techniques , rocket motor instrumentation , control guidance systems , telemetry , tracking systems scientific instruments experimentation space launched Space Science Technology Centre Physical Research Laboratory Ahmedabad . development work carried Space Science Technology Centre assistance Defence Research Development Organization ( DRDO ) , HAL , DTD&P(Air ) Air Headquarters . time , Prof. Sarabhai hand - picked team form dream Indian SLV . Suddenly , senior scientist worked closely Prof. Sarabhai turned enquired , “ , presentations project te

In [127]:
## Removing punctuations marks
import string
translate_table = dict((ord(char), None) for char in string.punctuation)   
textrank_final.translate(translate_table)

'People work virtually round clock enthusiasm Prof Sarabhai new  country — new design new method fabrication    way administrative procedure  Prof Sarabhai talking Satellite Launch Vehicle  SLV   asked  breath  studies rocketassisted  system  RATO  military aircraft  view  wide  ranging programme scientific technological development rocket fuels  propulsion systems  aeronautics  aerospace materials  advanced fabrication techniques  rocket motor instrumentation  control guidance systems  telemetry  tracking systems scientific instruments experimentation space launched Space Science Technology Centre Physical Research Laboratory Ahmedabad  development work carried Space Science Technology Centre assistance Defence Research Development Organization  DRDO   HAL  DTDPAir  Air Headquarters  time  Prof Sarabhai hand  picked team form dream Indian SLV  Suddenly  senior scientist worked closely Prof Sarabhai turned enquired  “  presentations project team members based work  complex Thumba  incl

In [128]:
len(df2),len(textrank_final)

(90903, 3700)

## Model evaluation

In [129]:
rouge.get_scores(textrank_final, df2)

[{'rouge-1': {'r': 0.06921131294550471, 'p': 1.0, 'f': 0.12946236438057673},
  'rouge-2': {'r': 0.04172560113154172,
   'p': 0.963265306122449,
   'f': 0.07998644217987462},
  'rouge-l': {'r': 0.06921131294550471, 'p': 1.0, 'f': 0.12946236438057673}}]

# BART MODEL

In [130]:
# Importing the model
from transformers import BartForConditionalGeneration, BartTokenizer, BartConfig

In [131]:
tokenizer=BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model=BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

In [132]:
df3=df[0:30]
df3

['This is primarily an R&D centre for advanced aerospace technology. One of my most vivid memories',
 'of LRC is of a piece of sculpture depicting a charioteer driving two horses, one representing scientific',
 'research and the other technological development, metaphorically encapsulating the',
 'interconnection between research and development. From LRC I went to the Goddard Space Flight',
 'Centre (GSFC) at Greenbelt, Maryland. This Centre develops and manages most of NASA’s earth-',
 'orbiting science and applications satellites. It operates NASA’s tracking networks for all space',
 'missions. Towards the end of my visit, Iwent to the Wallops Flight Facility at Wallops Island in East',
 'Coast, Virginia. This place was the base for NASA’s sounding rocket programme. Here, I saw a',
 'painting prominently displayed in the reception lobby. It depicted a battle scene with a few rockets',
 'flying in the background. A painting with this theme should be the most commonplace thing at a',


In [133]:
df4=' '.join(df3)
df4

'This is primarily an R&D centre for advanced aerospace technology. One of my most vivid memories of LRC is of a piece of sculpture depicting a charioteer driving two horses, one representing scientific research and the other technological development, metaphorically encapsulating the interconnection between research and development. From LRC I went to the Goddard Space Flight Centre (GSFC) at Greenbelt, Maryland. This Centre develops and manages most of NASA’s earth- orbiting science and applications satellites. It operates NASA’s tracking networks for all space missions. Towards the end of my visit, Iwent to the Wallops Flight Facility at Wallops Island in East Coast, Virginia. This place was the base for NASA’s sounding rocket programme. Here, I saw a painting prominently displayed in the reception lobby. It depicted a battle scene with a few rockets flying in the background. A painting with this theme should be the most commonplace thing at a Flight Facility, but the painting caugh

In [134]:
# Encoding the inputs and passing them to model.generate()
inputs = tokenizer.batch_encode_plus([df4],return_tensors='pt')
summary_ids = model.generate(inputs['input_ids'], early_stopping=True)

In [135]:
# Decoding and printing the summary
bart_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(bart_summary)

This is primarily an R&D centre for advanced aerospace technology. I was happy to see an Indian glorified by NASA as a hero of warfare rocketry. My impression of the American people can be summarized by a quotation from Benjamin Franklin, “Those things that hurt instruct!”


In [136]:
rouge.get_scores(bart_summary, df4)

[{'rouge-1': {'r': 0.1423728813559322, 'p': 1.0, 'f': 0.24925815805545531},
  'rouge-2': {'r': 0.09401709401709402,
   'p': 0.9777777777777777,
   'f': 0.17153995941315278},
  'rouge-l': {'r': 0.1423728813559322, 'p': 1.0, 'f': 0.24925815805545531}}]

## GPT2 Transformers

In [137]:
# Importing model and tokenizer
from transformers import GPT2Tokenizer,GPT2LMHeadModel

In [138]:
# Instantiating the model and tokenizer with gpt-2
tokenizer=GPT2Tokenizer.from_pretrained('gpt2')
model=GPT2LMHeadModel.from_pretrained('gpt2')

Downloading:   0%|          | 0.00/0.99M [00:01<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/665 [00:01<?, ?B/s]

Downloading:   0%|          | 0.00/523M [00:00<?, ?B/s]

In [139]:
# Encoding text to get input ids & pass them to model.generate()
inputs=tokenizer.batch_encode_plus([df_Observations],return_tensors='pt',max_length=1000,truncation=True)
summary_ids=model.generate(inputs['input_ids'],early_stopping=True)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Input length of input_ids is 1000, but ``max_length`` is set to 20. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.


In [140]:
# Decoding and printing summary

GPT_summary=tokenizer.decode(summary_ids[0],skip_special_tokens=True)
print(GPT_summary)

This is primarily an R&D centre for advanced aerospace technology. One of my most vivid memories of LRC is of a piece of sculpture depicting a charioteer driving two horses, one representing scientific research and the other technological development, metaphorically encapsulating the interconnection between research and development. From LRC I went to the Goddard Space Flight Centre (GSFC) at Greenbelt, Maryland. This Centre develops and manages most of NASA’s earth- orbiting science and applications satellites. It operates NASA’s tracking networks for all space missions. Towards the end of my visit, Iwent to the Wallops Flight Facility at Wallops Island in East Coast, Virginia. This place was the base for NASA’s sounding rocket programme. Here, I saw a painting prominently displayed in the reception lobby. It depicted a battle scene with a few rockets flying in the background. A painting with this theme should be the most commonplace thing at a Flight Facility, but the painting caught

In [141]:
df_Observations



## Model evaluation

In [142]:
rouge.get_scores(GPT_summary, df_Observations)

[{'rouge-1': {'r': 0.07787257215465601, 'p': 1.0, 'f': 0.14449309397774932},
  'rouge-2': {'r': 0.04295051353874883, 'p': 1.0, 'f': 0.08236347280025712},
  'rouge-l': {'r': 0.07787257215465601, 'p': 1.0, 'f': 0.14449309397774932}}]

# BERT MODEL

In [143]:
!pip install bert-extractive-summarizer



In [144]:
from summarizer import Summarizer

body = 'Text body that you want to summarize with BERT'
body2 = 'Something else you want to summarize with BERT'
model = Summarizer()
model(body)
model(body2)

Downloading:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.25G [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/28.0 [00:13<?, ?B/s]

TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType

In [145]:
model = Summarizer()
result = model(df_Observations, min_length=60,max_length=500)
full = ''.join(result)
print(full)

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/226k [00:04<?, ?B/s]



## MODEL EVALUATION

In [146]:
rouge.get_scores(full, df_Observations)

[{'rouge-1': {'r': 0.28371755309493557,
   'p': 0.9993606138107417,
   'f': 0.4419623887511261},
  'rouge-2': {'r': 0.18814192343604108,
   'p': 0.9518748154709182,
   'f': 0.31418408339128917},
  'rouge-l': {'r': 0.28371755309493557,
   'p': 0.9993606138107417,
   'f': 0.4419623887511261}}]

# Evaluation table

In [149]:
pip install tabulate

Note: you may need to restart the kernel to use updated packages.
Collecting tabulate
  Downloading tabulate-0.8.9-py3-none-any.whl (25 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.8.9


In [150]:
from tabulate import tabulate

In [151]:
#create table
table = [["Recall", 0.09266,0.08852,0.06921,0.100,0.077,0.28,0.28], 
        ["Prescion", 1.0,1.0,1.0,1.0,1.0,0.99,0.99], 
        ["F-score", 0.1696,0.1626,0.1294,0.1626,0.1444,0.44,0.44]] 
        

#define column names
col_names = ["score", "Lexrank","Luhn","Textrank","LSA","GPT2","BART","BERT"]

#display table
print(tabulate(table, headers=col_names, tablefmt="fancy_grid", showindex="always"))

╒════╤══════════╤═══════════╤═════════╤════════════╤════════╤════════╤════════╤════════╕
│    │ score    │   Lexrank │    Luhn │   Textrank │    LSA │   GPT2 │   BART │   BERT │
╞════╪══════════╪═══════════╪═════════╪════════════╪════════╪════════╪════════╪════════╡
│  0 │ Recall   │   0.09266 │ 0.08852 │    0.06921 │ 0.1    │ 0.077  │   0.28 │   0.28 │
├────┼──────────┼───────────┼─────────┼────────────┼────────┼────────┼────────┼────────┤
│  1 │ Prescion │   1       │ 1       │    1       │ 1      │ 1      │   0.99 │   0.99 │
├────┼──────────┼───────────┼─────────┼────────────┼────────┼────────┼────────┼────────┤
│  2 │ F-score  │   0.1696  │ 0.1626  │    0.1294  │ 0.1626 │ 0.1444 │   0.44 │   0.44 │
╘════╧══════════╧═══════════╧═════════╧════════════╧════════╧════════╧════════╧════════╛
