In [1]:
#Extractive Summarization Model-II
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize, sent_tokenize

In [2]:
def _create_dictionary_table(text_string) -> dict:
    #removing stop words
    stop_words = set(stopwords.words("english"))
    words = word_tokenize(text_string)
    #reducing words to their root form
    stem = PorterStemmer()
    #creating dictionary for the word frequency table
    frequency_table = dict()
    for wd in words:
        wd = stem.stem(wd)
        if wd in stop_words:
            continue
        if wd in frequency_table:
            frequency_table[wd] += 1
        else:
            frequency_table[wd] = 1

    return frequency_table

In [3]:
def _calculate_sentence_scores(sentences, frequency_table) -> dict:   

    #algorithm for scoring a sentence by its words
    sentence_weight = dict()

    for sentence in sentences:
        sentence_wordcount = (len(word_tokenize(sentence)))
        sentence_wordcount_without_stop_words = 0
        for word_weight in frequency_table:
            if word_weight in sentence.lower():
                sentence_wordcount_without_stop_words += 1
                if sentence[:4] in sentence_weight:
                    sentence_weight[sentence[:4]] += frequency_table[word_weight]
                else:
                    sentence_weight[sentence[:4]] = frequency_table[word_weight]

        sentence_weight[sentence[:4]] = sentence_weight[sentence[:4]] / sentence_wordcount_without_stop_words

       

    return sentence_weight

In [4]:
def _calculate_average_score(sentence_weight) -> int:
   
    #calculating the average score for the sentences
    sum_values = 0
    for entry in sentence_weight:
        sum_values += sentence_weight[entry]

    #getting sentence average value from source text
    average_score = (sum_values / len(sentence_weight))

    return average_score

In [5]:
def _get_article_summary(sentences, sentence_weight, threshold):
    sentence_counter = 0
    article_summary = ''

    for sentence in sentences:
        if sentence[:4] in sentence_weight and sentence_weight[sentence[:4]] >= (1.25*threshold):
            article_summary += " " + sentence
            sentence_counter += 1

    return article_summary


In [6]:
def _run_article_summary(article):
    
    #creating a dictionary for the word frequency table
    frequency_table = _create_dictionary_table(article)

    #tokenizing the sentences
    sentences = sent_tokenize(article)

    #algorithm for scoring a sentence by its words
    sentence_scores = _calculate_sentence_scores(sentences, frequency_table)

    #getting the threshold
    threshold = _calculate_average_score(sentence_scores)

    #producing the summary
    article_summary = _get_article_summary(sentences, sentence_scores,threshold)

    return article_summary

In [7]:
f=open('input.txt','r',encoding='utf-8')
article_content2=f.read().replace('\n','')
f.close()
summary_results = _run_article_summary(article_content2)
print(summary_results)

 In the wake of the Axis defeat, Germany and Japan were occupied, and war crimes tribunals were conducted against German and Japanese leaders.World War II is generally considered to have begun on 1 September 1939, when Nazi Germany, under Adolf Hitler, invaded Poland. The United Kingdom and France subsequently declared war on Germany on 3 September. As such, the European Axis powers declared war on the United States in solidarity. During 1944 and 1945, Japan suffered reversals in mainland Asia, while the Allies crippled the Japanese Navy and captured key western Pacific islands.The war in Europe concluded with the liberation of German-occupied territories, and the invasion of Germany by the Western Allies and the Soviet Union, culminating in the fall of Berlin to Soviet troops, Hitler's suicide and the German unconditional surrender on 8 May 1945. The United Nations (UN) was established to foster international co-operation and prevent future conflicts,[1] great powers—China, France, th