<a href="https://colab.research.google.com/github/Onkar-2803/Text_Summarization/blob/main/Extractive_Text_Summarization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from nltk.tokenize import sent_tokenize, word_tokenize 
def get_sentences(article):
  extracts=sent_tokenize(article)
  sentences=[]
  for extract in extracts:
    #print(extract)
    clean_sentence=extract.replace("[^a-zA-Z0-9]"," ")   ## Removing special characters
    #print(clean_sentence)
    obtained=word_tokenize(clean_sentence) 
    #print(obtained)
    sentences.append(obtained)

  return sentences

In [2]:
from nltk.cluster.util import cosine_distance
def get_similarity(sent_1,sent_2,stop_words):
  
  sent_1=[w.lower() for w in sent_1]
  sent_2=[w.lower() for w in sent_2]

  total=list(set(sent_1+sent_2)) ## Removing duplicate words in total set

  vec_1= [0] * len(total)
  vec_2= [0] * len(total)


  ## Count Vectorization of two sentences
  for w in sent_1:
    if w not in stop_words:
      vec_1[total.index(w)]+=1

  for w in sent_2:
    if w not in stop_words:
      vec_2[total.index(w)]+=1


  return 1-cosine_distance(vec_1,vec_2)

In [3]:
from nltk.corpus import stopwords
import numpy as np
def build_matrix(sentences):
  stop_words = stopwords.words('english')

  sim_matrix=np.zeros((len(sentences),len(sentences)))
  ## Adjacency matrix

  for id1 in range(len(sentences)):
    for id2 in range(len(sentences)):
      if id1==id2:  #escaping diagonal elements
        continue
      else:
        sim_matrix[id1][id2]=get_similarity(sentences[id1],sentences[id2],stop_words)

  return sim_matrix

In [4]:
def pagerank(text, eps=0.000001, d=0.85):
    score_mat = np.ones(len(text)) / len(text)
    delta=1
    ### iterative approach
    while delta>eps:
        score_mat_new = np.ones(len(text)) * (1 - d) / len(text) + d * text.T.dot(score_mat)
        delta = abs(score_mat_new - score_mat).sum()
        score_mat = score_mat_new

    return score_mat_new

In [5]:
def summarizer(article,req=3):
  summarized=[]

  sentence=get_sentences(article)

  sim_matrix=build_matrix(sentence)

  score=pagerank(sim_matrix)

  ranked_sentence = sorted(((score[i],s) for i,s in enumerate(sentence)), reverse=True)
  #print(ranked_sentence[2])
  
  for i in range(req):
      #print(ranked_sentence[i][1])
      summarized.append(" ".join(ranked_sentence[i][1]))

  return summarized

In [6]:
Article='Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types systems and systems of mixed types.'
len(Article)


564

In [7]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
Summary=summarizer(Article)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [8]:
print(Summary)

['These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types systems and systems of mixed types .', 'Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given .', 'Compatibility of systems of linear constraints over the set of natural numbers .']


In [20]:
Article_2 = '''The police are entrusted with the duty of maintaining the peace and harmony of a society. Moreover, they also have the right to arrest and control people who do not follow the law. As a result, they are important as they protect our society.
Enforcing the laws of the land, the police also has the right to punish people who do not obey the law. Consequently, we, as citizens, feel safe and do not worry much about our lives and property.'''

In [21]:
Summary=summarizer(Article_2)

In [22]:
print(Summary)

['Enforcing the laws of the land , the police also has the right to punish people who do not obey the law .', 'Moreover , they also have the right to arrest and control people who do not follow the law .', 'As a result , they are important as they protect our society .']
