# Importing necessory libraries

In [1]:
import numpy as np
import os
from textblob import TextBlob
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer
from sumy.models.dom import Sentence
from sumy.evaluation.rouge import rouge_1,rouge_2

# Getting the articles and summaries directory paths 

In [2]:
article_path='BBC Business News/News Articles/business/'
summary_path='BBC Business News/Summaries/business/'

# Observations

1. Two approaches were used one for Extractive and one for Abstractive summarization.
2. Rouge 1 and Rouge 2 were used as performance metrics.
3. Scores were calculated by comparing actual summary with retrieved summary of four different sentence counts [10,15,20,25].
4. Best scores were obtained for sentence count of 25 in both approaches.
5. Overall Extractive summarizer performed better statistically in all four sentence counts.

# Lex Rank Summarizer

In [3]:
#iterating over different number of sentence counts
for sc in [10,15,20,25]:
    r1=[]
    r2=[]
    
    #Iterating through the directory
    for i in os.listdir(article_path):
        #getting the file path
        file = article_path+i
        #parsing the artile file
        parser = PlaintextParser.from_file(file, Tokenizer("english"))
        #initialising the summarizer
        summarizer = LexRankSummarizer()
        #summarizing the article
        summary = summarizer(parser.document, sc) 
        
        #acquiring the actual summary
        f=open(summary_path+i,'r')
        act=f.read()
        act_sum=act.split('.')
        final=[]
        #typecasting into sentences using Sentence model
        for i in act_sum:
            final.append(Sentence(i,Tokenizer('english')))
            
        #calculating the rouge scores for the summary obtained
        r1.append(rouge_1(summary,final))
        r2.append(rouge_2(summary,final))
    
    #averaging the rouge scores for a sentence count
    print('For sentence count:',sc)
    print('Average Rouge 1 score:',sum(r1)/len(r1))
    print('Average Rouge 2 score:',sum(r2)/len(r2),'\n\n')
    

For sentence count: 10
Average Rouge 1 score: 0.82778062914676
Average Rouge 2 score: 0.7688573473617388 


For sentence count: 15
Average Rouge 1 score: 0.9435007804415468
Average Rouge 2 score: 0.9238472052171837 


For sentence count: 20
Average Rouge 1 score: 0.981043067434811
Average Rouge 2 score: 0.9744264282003102 


For sentence count: 25
Average Rouge 1 score: 0.9927635451570361
Average Rouge 2 score: 0.9902403055537793 




# Text Rank Summarizer

In [4]:
#iterating over different number of sentence counts
for sc in [10,15,20,25]:
    r1=[]
    r2=[]
    for i in os.listdir(article_path):
        #getting the file path
        file = article_path+i 
        #parsing the artile file
        parser = PlaintextParser.from_file(file, Tokenizer("english"))
        #initialising the summarizer
        summarizer = TextRankSummarizer()
        #summarizing the article
        summary = summarizer(parser.document, sc) 
        
        #acquiring the actual summary
        f=open(summary_path+i,'r')
        act=f.read()
        act_sum=act.split('.')
        final=[]
        #typecasting into sentences using Sentence model
        for i in act_sum:
            final.append(Sentence(i,Tokenizer('english')))
        
        #calculating the rouge scores for the summary obtained
        r1.append(rouge_1(summary,final))
        r2.append(rouge_2(summary,final))
        
    #averaging the rouge scores for a sentence count
    print('For sentence count:',sc)
    print('Average Rouge 1 score:',sum(r1)/len(r1))
    print('Average Rouge 2 score:',sum(r2)/len(r2),'\n\n')

For sentence count: 10
Average Rouge 1 score: 0.8918947999730512
Average Rouge 2 score: 0.8547664373690037 


For sentence count: 15
Average Rouge 1 score: 0.9690153869942072
Average Rouge 2 score: 0.9570604773680071 


For sentence count: 20
Average Rouge 1 score: 0.9909734472686471
Average Rouge 2 score: 0.9871444862737312 


For sentence count: 25
Average Rouge 1 score: 0.9965085581123175
Average Rouge 2 score: 0.9950434965358156 


