In [None]:
import pickle
import pandas as pd
with open('/content/dic.pickle', 'rb') as handle:
    dic = pickle.load(handle)
df= pd.DataFrame.from_dict(dic,orient='index',columns=['real_summary','reviews'])
df

Unnamed: 0,real_summary,reviews
By0ANxbRW,Proposed network compression method offers lim...,This paper proposes a deep neural network com...
BkCV_W-AZ,The reviewers agree this is a really interesti...,Quality and clarityThe paper provides a gameth...
H1Y8hhg0b,"The results in the paper are interesting, and ...",The paper introduces a technique for optimizin...
BkVsWbbAW,Thank you for submitting you paper to ICLR. Th...,This paper introduces a neural network archite...
HyXBcYg0b,The authors make an experimental study of the ...,The paper proposes a new neural network model ...
...,...,...
HkjL6MiTb,Reviewers unanimous in assessment that manuscr...,The authors tackle the problem of estimating r...
BJgPCveAW,"The paper received weak scores: 4,4,5. R2 comp...",The paper seems to claims that) certain ConvNe...
B1X4DWWRb,The submission provides an interesting way to ...,The paper proposes a novel way of causal infer...
B1KFAGWAZ,The authors present a centralized neural contr...,This paper investigates multiagent reinforceme...


In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
nltk.download("stopwords")
stop_words = stopwords.words('english')

def read_article(file_name):
    filedata = file_name
    sentences = []
    article = filedata.split(".")

    for sentence in article:
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    sentences.pop() 
    return sentences

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
 
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
 
    all_words = list(set(sent1 + sent2))
 
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
 
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
 
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
 
    return 1 - cosine_distance(vector1, vector2)
 
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix


def generate_summary(file_name, top_n=5):
    
    summarize_text = []

    # Step 1 - Read text anc split it
    sentences =  read_article(file_name)
 
    # Step 2 - Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Step 3 - Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Step 4 - Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
    #print("Indexes of top ranked_sentence order are ", ranked_sentence)   
    #print(ranked_sentence)

    for i in range(top_n):
      summarize_text.append(" ".join(ranked_sentence[i][1]))

    return ". ".join(summarize_text)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
reviews= df['reviews']
summary=[]
for review in reviews:
    article = review.split(".")
    if(len(article)<=2):
      summary.append(review)
    else:
      summary.append(generate_summary(review, 2))

In [None]:
df['auto_summary']=summary
df

Unnamed: 0,real_summary,reviews,auto_summary
By0ANxbRW,Proposed network compression method offers lim...,This paper proposes a deep neural network com...,This paper proposes a deep neural network com...
BkCV_W-AZ,The reviewers agree this is a really interesti...,Quality and clarityThe paper provides a gameth...,This paper introduces the concepts of counterf...
H1Y8hhg0b,"The results in the paper are interesting, and ...",The paper introduces a technique for optimizin...,This paper presents a continuous surrogate for...
BkVsWbbAW,Thank you for submitting you paper to ICLR. Th...,This paper introduces a neural network archite...,This paper reports on a system for sequential ...
HyXBcYg0b,The authors make an experimental study of the ...,The paper proposes a new neural network model ...,The paper proposes an adaptation of existing G...
...,...,...,...
HkjL6MiTb,Reviewers unanimous in assessment that manuscr...,The authors tackle the problem of estimating r...,This paper introduces siamese neural networks ...
BJgPCveAW,"The paper received weak scores: 4,4,5. R2 comp...",The paper seems to claims that) certain ConvNe...,This paper examines sparse connection patterns...
B1X4DWWRb,The submission provides an interesting way to ...,The paper proposes a novel way of causal infer...,This paper proposes a deep learning architectu...
B1KFAGWAZ,The authors present a centralized neural contr...,This paper investigates multiagent reinforceme...,This paper investigates multiagent reinforceme...


In [None]:
real_summary= df['real_summary'].tolist()
auto_summary= df['auto_summary'].tolist()

In [None]:
with open('ref.txt', 'w') as f:
    for item in real_summary:
        f.write("%s\n" % item)

In [None]:
with open('hyp.txt', 'w') as f:
    for item in auto_summary:
        f.write("%s\n" % item)

In [None]:
import json 
def dump(hyp,ref,filename):
  # Data to be written 
  dictionary ={ 
      "hyp" : hyp, 
      "ref" : ref
  } 
    
  with open(filename, "a") as outfile: 
      json.dump(dictionary, outfile) 
      outfile.write(',\n')

In [None]:
for i in range(0,len(real_summary)):
    dump(auto_summary[i],real_summary[i],"TextRankMeta.json")