In [68]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer 
model_name = "Callidior/bert2bert-base-arxiv-titlegen" 
import pandas as pd
from tqdm import tqdm
import torch


In [83]:
from rouge_score import rouge_scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True, tokenizer=tokenizer)

In [75]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [6]:
text = "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data."

In [91]:
single_inputs = tokenizer.encode(text, return_tensors="pt").to(device)

In [92]:
greedy_output = model.generate(single_inputs, max_length=32, num_beams=5)

In [93]:
tokenizer.batch_decode(greedy_output, skip_special_tokens=True)

['transformer : distilling convolutions with recurrence for neural machine translation']

In [94]:
scores = scorer.score('The quick brown fox jumps over the lazy dog','The quick brown dog jumps on the log.')

## Batch update

In [69]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [43]:
df = pd.read_csv('ML-Arxiv-Papers.csv')

In [52]:
title = []
abstract = []
len_title = []
len_abstract = []

In [53]:
for index, row in tqdm(df.iterrows()):
    t = row['title']
    a = row['abstract']
    title.append(t)
    abstract.append(a)


117592it [00:02, 41924.38it/s]


In [77]:
inputs = tokenizer.batch_encode_plus(abstract[:10], return_tensors="pt", truncation=True, padding=True).to(device)

In [78]:
with torch.no_grad():
    generated_ids = model.generate(**inputs,max_length=12, num_beams=5)
generated_texts = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

In [79]:
generated_texts

['learning from partial observations : an information - theoretic',
 'topology design for optimal consensus in sensor networks',
 'shortest path with monitoring under partial monitoring',
 'a neural network approach to ordinal regression',
 'parametric machine learning and monte carlo optimization : from monte',
 'how many nodes are needed to make a difference?',
 'inapproximability of correlation clustering',
 'joint universal learning for variable - rate lossy coding of',
 'feature selection for classification and regression using the hilbert - schmidt',
 'maximum weight matching using max - product belief propagation']

In [81]:
title[:10]

['Learning from compressed observations',
 'Sensor Networks with Random Links: Topology Design for Distributed\n  Consensus',
 'The on-line shortest path problem under partial monitoring',
 'A neural network approach to ordinal regression',
 'Parametric Learning and Monte Carlo Optimization',
 'Preconditioned Temporal Difference Learning',
 'A Note on the Inapproximability of Correlation Clustering',
 'Joint universal lossy coding and identification of stationary mixing\n  sources',
 'Supervised Feature Selection via Dependence Estimation',
 'Equivalence of LP Relaxation and Max-Product for Weighted Matching in\n  General Graphs']

In [87]:
scores = []
for i in range(len(generated_texts)):
    scores.append(scorer.score(generated_texts[i],title[i]))
    

In [88]:
for i in range(len(generated_texts)):
    print(scores[i])

{'rouge1': Score(precision=0.75, recall=0.2727272727272727, fmeasure=0.39999999999999997), 'rougeL': Score(precision=0.75, recall=0.2727272727272727, fmeasure=0.39999999999999997)}
{'rouge1': Score(precision=0.5454545454545454, recall=0.75, fmeasure=0.631578947368421), 'rougeL': Score(precision=0.36363636363636365, recall=0.5, fmeasure=0.4210526315789474)}
{'rouge1': Score(precision=0.5, recall=0.7142857142857143, fmeasure=0.588235294117647), 'rougeL': Score(precision=0.5, recall=0.7142857142857143, fmeasure=0.588235294117647)}
{'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}
{'rouge1': Score(precision=1.0, recall=0.6363636363636364, fmeasure=0.7777777777777778), 'rougeL': Score(precision=1.0, recall=0.6363636363636364, fmeasure=0.7777777777777778)}
{'rouge1': Score(precision=0.14285714285714285, recall=0.1, fmeasure=0.11764705882352941), 'rougeL': Score(precision=0.14285714285714285, recall=0.1, fmeasure=0.11764705882