<a href="https://colab.research.google.com/github/Divya-Devarashetti/Text-Summarization-ML/blob/main/Text_Summarization_ML_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#importing some libraries
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize, sent_tokenize
import bs4 as BeautifulSoup
import urllib.request  

#fetching the content from the URL
text_summ = urllib.request.urlopen('https://en.wikipedia.org/wiki/Sports_car')

summ_read = text_summ.read()

#parsing the URL content and storing in a variable
summ_parsed = BeautifulSoup.BeautifulSoup(summ_read,'html.parser')

#returning <p> tags
paragraphs = summ_parsed.find_all('p')

summ_content = ''

#looping through the paragraphs and adding them to the variable
for p in paragraphs:  
    summ_content += p.text


def _creating_dictionary_table(text_string) -> dict:
   
    #remove stop words
    stop_words = set(stopwords.words("english"))
    
    words = word_tokenize(text_string)
    
    #reducing words to their root form
    stem = PorterStemmer()
    
    #creating dictionary for the word frequency table
    frequency_table = dict()
    for w in words:
        w = stem.stem(w)
        if w in stop_words:
            continue
        if w in frequency_table:
            frequency_table[w] += 1
        else:
            frequency_table[w] = 1

    return frequency_table


def _calculated_sentence_scores(sentences, frequency_table) -> dict:   

    #algorithm for scoring a sentence by its words
    sentence_weight = dict()

    for sentence in sentences:
        sentence_wordcount = (len(word_tokenize(sentence)))
        sentence_wordcount_without_stop_words = 0
        for word_weight in frequency_table:
            if word_weight in sentence.lower():
                sentence_wordcount_without_stop_words += 1
                if sentence[:7] in sentence_weight:
                    sentence_weight[sentence[:7]] += frequency_table[word_weight]
                else:
                    sentence_weight[sentence[:7]] = frequency_table[word_weight]

        sentence_weight[sentence[:7]] = sentence_weight[sentence[:7]] / sentence_wordcount_without_stop_words

       

    return sentence_weight

def _calculated_average_score(sentence_weight) -> int:
   
    #calculating the average score for the sentences
    sum_values = 0
    for entry in sentence_weight:
        sum_values += sentence_weight[entry]

    #getting sentence average value from source text
    average_score = (sum_values / len(sentence_weight))

    return average_score

def _get_article_summary(sentences, sentence_weight, threshold):
    sentence_counter = 0
    article_summary = ''

    for sentence in sentences:
        if sentence[:7] in sentence_weight and sentence_weight[sentence[:7]] >= (threshold):
            article_summary += " " + sentence
            sentence_counter += 1

    return article_summary

def _run_article_summary(article):
    
    #creating a dictionary for the word frequency table
    frequency_table = _creating_dictionary_table(article)

    #tokenizing the sentences
    sentences = sent_tokenize(article)

    #algorithm for scoring a sentence by its words
    sentence_scores = _calculated_sentence_scores(sentences, frequency_table)

    #getting the threshold
    threshold = _calculated_average_score(sentence_scores)

    #producing the summary
    article_summary = _get_article_summary(sentences, sentence_scores, 1.5 * threshold)

    return article_summary

if __name__ == '__main__':
    summ_results = _run_article_summary(summ_content)
    print(summ_results)

 [8][9]
In the United Kingdom, early recorded usage of the "sports car" was in The Times newspaper in 1919. Examples of FR layout sports cars are the Caterham 7, Mazda MX-5, and the Dodge Viper. Examples of MR layout sports cars are the Ferrari 488, Ford GT and Toyota MR2. [39]
Although front-wheel drive with the engine at the front (FF layout) is the most common layout for cars in general, it is not as common amongst traditional sports cars. Examples of FF layout sports cars are the Fiat Barchetta, Saab Sonett, or Opel Tigra. [54] The model was named after King Alfonso XIII of Spain, a patron of the car's chief designer and an enthusiast for the marque. E.g. Vintage cars at the time.
