In [1]:
import warnings
warnings.filterwarnings('ignore')

import torch
import pytorch_lightning as pl
from transformers import T5TokenizerFast 
from summarization.transfer import SummaryModule, Summary
import re 
import numpy as np
import pandas as pd

In [2]:
np.random.seed(47)

In [3]:
model_save_name = 'summary_t5transformer_5_epochs_pretrained.pt'
model_pretrained = 't5-base'
path = f"models/{model_save_name}"

In [4]:
model = SummaryModule(model_pretrained)
model.load_state_dict(torch.load(path))
tokenizer = T5TokenizerFast.from_pretrained(model_pretrained)

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
data = pd.read_csv('data/wikihowAll.csv')
print(data.shape)
data.dropna(inplace=True)

(215365, 3)


In [7]:
df_train, df_test = train_test_split(data, test_size=0.1, random_state=47)

In [8]:
def clean(text):
    return re.sub('[,\n]', '', text)

In [9]:
# 83352
# 156552
# 83568

In [10]:
df_test.shape

(21430, 3)

In [11]:
df_test['headline'] = df_test['headline'].apply(clean)
df_test['text'] = df_test['text'].apply(clean)

In [12]:
summarize = Summary(model, tokenizer)

In [13]:
def show(row):
    model_summary = summarize(row['text'])
    print('Text :\n', row['text'], '\n')
    print('Summary: \n', model_summary, '\n')
#     print('Headline: \n', row['headline'])

In [14]:
show(df_test.iloc[2,:])

Text :
  As with the basic partner dance the leader steps back on his or her right foot while the follower steps back on his or her left.Continue holding hands as you step back. The leader lets go of the follower’s right hand and uses his or her left hand to guide the turn. The leader gently pulls the follower forward on his or her right foot. The follower plants his or her right foot which he or she will turn on.Almost simultaneously the leader also lifts the follower’s right hand and arm to begin the turn. As the leader turns the follower he or she steps forward and to the side on his or her left foot and completes the turn bringing them to neutral.The count is 1 when they both step back 2 when the follower steps forward and the turn begins 3 when the leader steps forward and to the side to complete the turn and 4 when they come back together to neutral. 

Summary: 
 Step back on your right foot.Step forward on his or her left foot.Let the leader turn the follower. 



In [15]:
show(df_test.loc[156552])

Text :
  The amount will serve about 3 to 4 people.; Bring it to a boil and add the salt. Use a wooden spoon to stir it into the water. The polenta and water should form a paste after about two minutes. Keep stirring it with a spoon for about ten minutes. The polenta is ready when the texture becomes creamy.Don't overcook the polenta or it may become too mushy.Taste the polenta and decide how creamy or grainy you like it. Remove it from heat when it reaches the texture you like best.Serve the polenta with vegetables chili meat or fish - the possibilities are endless. 

Summary: 
 Prepare the polenta.Put the water in a saucepan.Cook the polenta for about two minutes.Remove it from the heat. 



In [16]:
show(df_test.loc[83568])

Text :
  A right triangle is a triangle that has one right (90 degree) angle. The side of the triangle opposite the right angle is always the longest side and it is called the hypotenuse. Right triangles show up frequently on math tests and fortunately there is a very handy formula for finding the length of unknown sides! The Pythagorean Theorem tells us that for any right triangle with sides of length a and b and hypotenuse of length c a2 + b2 = c2. Remember that the longest side of the triangle is called the hypotenuse. It will be opposite the right angle and must be labeled c. Label the two shorter sides a and b. It doesn't really matter which is which the math will turn out the same!  Remember that a2 + b2 = c2. Substitute the side lengths in for the corresponding letters in the equation.If for example you know that side a = 3 and side b = 4 then plug those values into the formula as follows: 32 + 42 = c2.If you know the length of side a = 6 and the hypotenuse c = 10 then you shoul