In [1]:
import re
import string
import numpy as np
import math
import nltk
import io

from nltk import word_tokenize, pos_tag
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import sent_tokenize, word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from itertools import islice
from rouge import Rouge



In [2]:
# nltk.download('stopwords')
# nltk.download()  # uncomment these lines once they are not downloaded

In [3]:
def preprocessing(article):
    # lines = article.split(".")   # splits the whole article into lines
#     print(article)

    stemmer = PorterStemmer()
    stopwords_english = stopwords.words('english')
#     article = re.sub(r"\n\n([A-Za-z0-9])", ". ", article)
    article_preprocessed = []
    
    lines = sent_tokenize(article)
#     print(lines)
    for line in lines:
        line_preprocessed = []
#         line = re.sub(r'[\d]|','',line)
        line = re.sub(r'(\d)\.(\d)', r'\1\2', line)
        line = re.sub(r'[^\w\s\d]|\n',' ',line)
        words = word_tokenize(line)
#         print(words)
        for word in words:
            if (word not in stopwords_english and word not in string.punctuation):
                word_stemmed = stemmer.stem(word)  
                line_preprocessed.append(word_stemmed)
        line_preprocessed = " ".join(line_preprocessed)
        article_preprocessed.append(line_preprocessed)
    return article_preprocessed

In [4]:
with open("articles/original (" + str(1) +").txt", "r", encoding="utf8") as file:
    file.readline()
    contents = file.read()
    article_preprocessed = preprocessing(contents)
    
# print(article_preprocessed)
# print(len(article_preprocessed))

In [5]:
# print(contents)

In [6]:
# def convert_list_to_string(content):  # converts list of lists to list of strings
#     content_modified = []   # list of strings
#     for line in content:
#         line_as_string = " ".join(line)
#         content_modified.append(line_as_string)
# #     print(content_modified)
#     return content_modified

In [7]:
# print(convert_list_to_string(article_preprocessed))

In [8]:
### Feature 1

def calculate_TF_IDF(content):
#     print(content)
    vectorizer = TfidfVectorizer()   # Create a TfidfVectorizer object
    vectorizer.fit(content)   # Fit the vectorizer to the documents
    feature_names = vectorizer.get_feature_names()

#     print("Feature names:")   # Print the feature names
#     print(feature_names)   
#     print(len(feature_names))
    tfidf_matrix = vectorizer.transform(content)   # Transform the documents into a TF-IDF matrix
    np.set_printoptions(threshold=np.inf)
#     print("TF-IDF matrix:")   # Print the TF-IDF matrix
#     print(tfidf_matrix.toarray())
    return tfidf_matrix.toarray()
    

In [9]:
# article_modified = (article_preprocessed)
# calculate_TF_IDF(article_modified)

In [10]:
def calculate_each_sentence_score(tf_idf_matrix):
    score_dict = {}   # dictionary that stores keys as summation of tf_idf scores for all word in current line 
                      # and values of dictionary as index of the line in the article
    for index, line in enumerate(tf_idf_matrix):
#         print(line)
#         print('at')
#         print(index)
        score = np.sum(line)
        score_dict[score] = index
        
    sorted_keys = sorted(score_dict.items(), reverse=True)   # sort the dictionary by keys in the descending order
    score_dict_reversed = dict(sorted_keys)

#     print(score_dict_reversed)
    return score_dict_reversed

In [11]:
def summary_generation(article, score_dict_reversed, number_of_sentences):
    lines = article.split(".")   # list contains strings, where these strings are original lines
    # note: this should split on different characters; for example: \n\n and dot followed by number
    score_dict = dict(islice(score_dict_reversed.items(), number_of_sentences))   # select only certain number of lines
                                                                                  # to be displayed 
        
    sort_data = sorted(score_dict.items(), key=lambda x: x[1])   # sort the dictionary by value (index of lines) in
                                                                 # the ascending order to display lines ordered as the 
                                                                 # original article 
    score_dict_ascending = dict(sort_data)
#     print(score_dict_ascending)
#     print(score_dict_reversed)
    output_list = []
    for key in score_dict_ascending:
        output_list.append(lines[score_dict_ascending[key]])
            
    output_string = ".".join(output_list)
    output_string += "."
    return output_string

In [12]:
article_modified = (article_preprocessed)
# for sentence in article_modified:
#     print(sentence)
tf_idf_matrix = calculate_TF_IDF(article_modified)
dict_scores = calculate_each_sentence_score(tf_idf_matrix)
output = summary_generation(contents, dict_scores, 8)
print(output)


Quarterly profits at US media giant TimeWarner jumped 76% to $1. But its own internet business, AOL, had has mixed fortunes. It lost 464,000 subscribers in the fourth quarter profits were lower than in the preceding three quarters. It hopes to increase subscribers by offering the online service free to TimeWarner internet customers and will try to sign up AOL's existing customers for high-speed broadband. TimeWarner also has to restate 2000 and 2003 results following a probe by the US Securities Exchange Commission (SEC), which is close to concluding.

Time Warner's fourth quarter profits were slightly better than analysts' expectations. But its film division saw profits slump 27% to $284m, helped by box-office flops Alexander and Catwoman, a sharp contrast to year-earlier, when the third and final film in the Lord of the Rings trilogy boosted results.09bn.




In [13]:
### Feature 2

def sentence_length(content):
    max_length = 0
    for sentence in content:
        # print(sentence)
        if len(sentence) > max_length:
            max_length = len(sentence)
            
    sentence_length_feature = []
    for sentence in content:
        sentence_length_feature.append(len(sentence) / max_length)
    return sentence_length_feature

In [14]:
sentence_length(article_preprocessed)

[0.6357615894039735,
 0.6291390728476821,
 0.3443708609271523,
 0.4370860927152318,
 0.31788079470198677,
 0.2119205298013245,
 0.48344370860927155,
 0.6291390728476821,
 0.7947019867549668,
 0.6158940397350994,
 0.423841059602649,
 1.0,
 0.5165562913907285,
 0.7947019867549668,
 0.6423841059602649,
 0.46357615894039733,
 0.3509933774834437,
 0.5298013245033113,
 0.7483443708609272,
 0.2913907284768212]

In [15]:
### Feature 3

def numerical_data(content):
    numerical_data_feature = []
#     temp = []
    for sentence in content:
#         sentence_removed_dots_commas = sentence.replace(",", "").replace(".", "")
        numerical_data = re.findall(r'\d+', sentence)
        numerical_data_feature.append(len(numerical_data) / len(sentence.split()))
#         print(sentence)
#         print(len(numerical_data))
#         print(len(sentence.split()))
    return numerical_data_feature

In [16]:
print(numerical_data(article_preprocessed))

[0.25, 0.0, 0.3333333333333333, 0.0, 0.1111111111111111, 0.0, 0.16666666666666666, 0.06666666666666667, 0.0, 0.13333333333333333, 0.0, 0.07692307692307693, 0.35714285714285715, 0.0, 0.125, 0.0, 0.1, 0.07142857142857142, 0.0, 0.0]


In [17]:
def generate_Y_labels(original, summarized):
    Y_list = []
    original_sentences = sent_tokenize(original)
    
    original_sentences[0] = original_sentences[0][1:] # to remove the \n
    summarized_sentences = sent_tokenize(summarized)
    
    
#     for i in range (0, len(original_sentences)):
#         original_sentences[i] = re.sub(r"\\'", "'", original_sentences[i])
        
    
#     for original_sentence in original_sentences:
#         print(original_sentence)
        
#     original_list = re.split(r"\n\n|\.(?!\d)", original)
    
#     original_list_removed_empty = [x for x in original_list if x]   # removes empty elements
            
#     original_list_no_quotation = [x.replace('"', '') for x in original_list_removed_empty]
#     original_list_no_quotation = [x.replace("'", '') for x in original_list_no_quotation]
#     original_list_no_first_space = [x.lstrip() for x in original_list_no_quotation]
#     original_list_no_first_space = [x for x in original_list_no_first_space if x]
    
#     summarized_list = re.split(r"\.(?!\d)", summarized)
    
#     summarized_list_removed_empty = [x for x in summarized_list if x]   # removes empty elements
            
#     summarized_list_no_quotation = [x.replace('"', '') for x in summarized_list_removed_empty]
#     summarized_list_no_quotation = [x.replace("'", '') for x in summarized_list_no_quotation]
#     summarized_list_no_first_space = [x.lstrip() for x in summarized_list_no_quotation]
#     summarized_list_no_first_space = [x for x in summarized_list_no_first_space if x]
    
    
#     print(original_list_no_first_space)
#     print(summarized_list_no_first_space)
    
#     print(len(original_list_no_first_space))
#     print(len(summarized_list_no_first_space))
    
    for original_sentence in original_sentences:
        added = 0
        for summarized_sentence in summarized_sentences:
            if original_sentence in summarized_sentence:
                Y_list.append(1)
                added = 1
                break
        if added == 0:
            Y_list.append(0)
    
    return Y_list, original_sentences

In [18]:
with open("articles/original (" + str(301) +").txt", "r", encoding="utf8") as file:
    file.readline()
    original = file.read()
    article_preprocessed = preprocessing(original)

with open("articles/summarized (" + str(301) +").txt", "r", encoding="utf8") as file:
    summarized = file.read()

Y = generate_Y_labels(original, summarized)
# print(Y)

In [19]:
def generate_X_labels(preprocessed_artcile):
#     print(preprocessed_artcile)
    sentence_length_feature = sentence_length(preprocessed_artcile)
#     print(len(sentence_length_feature))
#     article_modified = convert_list_to_string(preprocessed_artcile)
    numerical_data_feature = numerical_data(preprocessed_artcile)
#     print(len(article_modified))
    tf_idf_matrix = calculate_TF_IDF(preprocessed_artcile)
    tf_idf_score = []
    for index, line in enumerate(tf_idf_matrix):
#         print(line)
#         print('at')
#         print(index)
        tf_idf_score.append(np.sum(line))
    max_score = max(tf_idf_score)
    tf_idf_score = tf_idf_score/max_score
    
    matrix = np.column_stack((tf_idf_score, sentence_length_feature, numerical_data_feature))
#     matrix = np.column_stack((tf_idf_score, sentence_length_feature))
    
#     print(matrix)
#     matrix = np.array(tf_idf_score).reshape(len(tf_idf_score), 1)
#     print(len(matrix))
    # matrix = matrix[:len(matrix)-1]
    return matrix

In [20]:
X = generate_X_labels(article_preprocessed)
print(X)
print(len(X))

[[0.85459232 0.87378641 0.        ]
 [0.64306346 0.34951456 0.28571429]
 [0.9092098  0.86407767 0.        ]
 [0.79707349 0.66019417 0.        ]
 [0.86838014 0.86407767 0.07692308]
 [0.87598802 0.77669903 0.        ]
 [0.71890163 0.5631068  0.        ]
 [1.         0.86407767 0.05882353]
 [0.72100819 0.5631068  0.        ]
 [0.86809826 0.88349515 0.1875    ]
 [0.9092098  0.89320388 0.        ]
 [0.73098164 0.57281553 0.        ]
 [0.47543868 0.2038835  0.        ]
 [0.9957197  1.         0.        ]]
14


In [21]:
# m = len(Y)  # training set size
# m2 = len(X)
# print(m)
# print(m2)
nn_input_dim = 3 # input layer dimensionality (we have two input features)
nn_output_dim = 1  # output layer dimensionality (we have one output)

# Gradient descent parameters
alpha = 0.1  # learning rate for gradient descent
# print(Y)
# print(X)

In [22]:
def sigmoid(x):
    # TODO 1: Compute the sigmoid function at the given x (~1 line)
    # For example: sigmoid(2) should compute the value of sigmoid function at x = 2.
    # Hint: Use np.exp instead of math.exp to allow for vectorization.
    #----------------------------------------------------------------------------------------------
    sig = (1/(1+np.exp(-x)))
    #----------------------------------------------------------------------------------------------
    
    return sig


In [23]:
X_matrix = []
X = []
Y = []
sentences = []
for i in range (1, 301):
    with open("articles/original (" + str(i) +").txt", "r", encoding="utf8") as file:
        file.readline() # to remove the title
        original_test = file.read()
        article_preprocessed_test = preprocessing(original_test)
    
    with open("articles/summarized (" + str(i) +").txt", "r", encoding="utf8") as file:
        summarized_text = file.read()
    # print(summarized_text)
    
    X_i = generate_X_labels(article_preprocessed_test)
    Y_i, original_list_no_first_space = generate_Y_labels(original_test, summarized_text)
    if(len(X_i) != len(Y_i)):
        print('Error! features and labels are not equal in length')
    Y.extend(Y_i)
    X_matrix.extend(X_i)
    sentences.extend(original_list_no_first_space)
    # print(i)

# for article in X_matrix:
#     for x in article:
#         X.append(x)

for x in X_matrix:
    X.append(x.tolist())
    
X = np.matrix(X)
# print(X)
# print(Y)


    
# for i in range (0, len(X)):
#     print("tf-idf: %f and true value: %f at sentence\n %s\n" % (X[i], Y[i], sentences[i]))

# m = len(X)
m = 500
print(len(X))
print(len(Y))
# print(type(x))


# #     predicton = predict(model, X_test)
# #     print(predicton)

4764
4764


In [24]:
def build_model(nn_hdim, num_passes=20000, print_loss=False):
    
    np.random.seed(0)
    W1 = np.random.randn(nn_hdim, nn_input_dim) / np.sqrt(nn_input_dim)
    b1 = np.zeros((nn_hdim, 1))
    W2 = np.random.randn(nn_output_dim, nn_hdim) / np.sqrt(nn_hdim)
    b2 = np.zeros((nn_output_dim, 1))

    model = {}

    for i in range(0, num_passes):
        DW1 = 0
        DW2 = 0
        Db1 = 0
        Db2 = 0
        cost = 0

        for j in range(0, m):
            a0 = X[j, :].reshape(-1, 1)  # Every training example is a column vector.
            y = Y[j]
            
            z1 = np.dot(W1 , a0 )+ b1
            a1 = np.tanh(z1)
            z2 = np.dot(W2 , a1) + b2
            a2 = sigmoid(z2)
            
#             if (i == num_passes -1 ):
#                 print('True value: %f, got: %f'% (y, a2))

            cost_j = -1 * ((np.log(a2) * y + (1-y)* np.log(1-a2)))

            da2 =  ( -y/a2  + (1-y)/(1-a2) )
            dz2 =  da2 * a2 * ( 1 - a2)
            dW2 = np.dot(dz2 , a1.T)
            db2 = dz2

            da1 =  np.dot(dz2,W2).T
            dz1 = np.multiply(da1 , 1 - np.square(a1) )
            dW1 = np.dot(dz1 , a0.T )
            db1 = dz1

            DW1 += dW1
            DW2 += dW2
            Db2 += db2
            Db1 += db1
            cost += cost_j
        
        DW1 /= m
        DW2 /= m
        Db1 /= m
        Db2 /= m
        cost /= m

        W1 -= alpha * DW1
        b1 -= alpha * Db1
        W2 -= alpha * DW2
        b2 -= alpha * Db2

        model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}

        if print_loss and i % 1000 == 0:
            print("Loss after iteration %i: %f" % (i, cost))

    return model

In [25]:
# Helper function to predict an output (0 or 1)
def predict(model, x):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    a0 = x.T
    
    # TODO 6 (aka TODO 2): Apply forward propagation on every test example a0 (a column vector 2x1) with its
    #  corresponding label y. It is required to compute z1, a1, z2, and a2  (SAME AS TODO2).
    # -----------------------------------------------------------------------------------------------
    z1 = np.dot(W1 , a0) + b1
    a1 = np.tanh(z1)
    z2 = np.dot(W2 , a1) + b2
    a2 = sigmoid(z2)
    # ------------------------------------------------------------------------------------------------
    # Applying a threshold of 0.5 (i.e. predictions greater than 0.5 are mapped to 1, and 0 otherwise)
#     prediction = np.round(a2)
    prediction = a2
    
    return prediction

In [26]:
model = build_model(nn_hdim=8, num_passes=10001, print_loss=True)

Loss after iteration 0: 0.802750
Loss after iteration 1000: 0.669059
Loss after iteration 2000: 0.668345
Loss after iteration 3000: 0.668035
Loss after iteration 4000: 0.667802
Loss after iteration 5000: 0.667587
Loss after iteration 6000: 0.667381
Loss after iteration 7000: 0.667179
Loss after iteration 8000: 0.666977
Loss after iteration 9000: 0.666775
Loss after iteration 10000: 0.666570


In [27]:
def test(file_number, compression_ratio):
    with open("articles/original (" + str(file_number) +").txt", "r", encoding="utf8") as file:
        file.readline()
        original_test = file.read()
        article_preprocessed_test = preprocessing(original_test)
        # print(original_test)

    with open("articles/summarized (" + str(file_number) +").txt", "r", encoding="utf8") as file:
        summarized_text = file.read()
        # print(summarized_text)
    
    X_test = generate_X_labels(article_preprocessed_test)
    predicton = predict(model, X_test)
    Y_test, original_sentences = generate_Y_labels(original_test, summarized_text)
    
    highest = np.argsort(predicton[0]) [::-1]
        
    output_sentences = []
    output_indices = []
    
    num_sentences_summarized = math.ceil(compression_ratio * len(original_sentences))
#     print(num_sentences_summarized)
    for i in range (0, num_sentences_summarized):
        output_sentences.append(original_sentences[highest[i]])
        output_indices.append(highest[i])
        
    output_sentences = ''.join(output_sentences)
    
#     correct = 0
#     missed = 0
    
#     Y_true_indices = [i for i, x in enumerate(Y_test) if x == 1]
# #     print(Y_test)
#     for true_index in Y_true_indices:
#         if true_index in output_indices:
#             correct += 1
#         else:
#             missed += 1
            
# #     missed = num_sentences_summarized - correct
#     wrong = num_sentences_summarized - correct
    
#     precision_nn = correct / (correct + wrong)
#     recall_nn = correct / (correct + missed)
    
#     print('correct: %f , wrong: %f , missed: %f' % (correct, wrong, missed))
#     print('Precision for document: %i is : %f' % (file_number, precision_nn))
#     print('Recall for document: %i is : %f \n' % (file_number, recall_nn))   
    
    rouge = Rouge(metrics=['rouge-n', 'rouge-l'], max_n=2)
    scores_nn = rouge.get_scores(output_sentences, summarized_text)
    print('article number: %d' % (file_number))
    rouge_1_nn = scores_nn['rouge-1']['f']
    rouge_2_nn = scores_nn['rouge-2']['f']
    rouge_l_nn = scores_nn['rouge-l']['f']
    print('nn accuracy')
    print('Rouge 1 score is: %f' % (rouge_1_nn))
    print('Rouge 2 score is: %f' % (rouge_2_nn))
    print('Rouge l score is: %f' % (rouge_l_nn))
    
    
    
    ### classified using TF_idf score
    
#     tf_idf_matrix = calculate_TF_IDF(article_preprocessed_test)
#     dict_scores = calculate_each_sentence_score(tf_idf_matrix)
#     output = summary_generation(original_test, dict_scores, num_sentences_summarized)

    output_tf_idf = extractive_summary(original_test, num_sentences_summarized)
    
    scores_tf_idf = rouge.get_scores(output_tf_idf, summarized_text)
    
    rouge_1_tf_idf = scores_tf_idf['rouge-1']['f']
    rouge_2_tf_idf = scores_tf_idf['rouge-2']['f']
    rouge_l_tf_idf = scores_tf_idf['rouge-l']['f']
    print('t-idf accuracy')
    print('Rouge 1 score is: %f' % (rouge_1_tf_idf))
    print('Rouge 2 score is: %f' % (rouge_2_tf_idf))
    print('Rouge l score is: %f' % (rouge_l_tf_idf))
    
#     tf_idf_score = calculate_each_sentence_score(calculate_TF_IDF(article_preprocessed_test))
#     score_dict = dict(islice(tf_idf_score.items(), num_sentences_summarized))
#     score_list = list(score_dict.values())
    
# #     print(score_list)
#     correct_tf_idf = 0
#     missed_tf_idf = 0
    
#     for true_index in Y_true_indices:
#         if true_index in score_list:
#             correct_tf_idf += 1
#         else:
#             missed_tf_idf += 1
            
#     wrong_tf_idf = num_sentences_summarized - correct_tf_idf
    
#     precision_tf_idf = correct_tf_idf / (correct_tf_idf + wrong_tf_idf)
#     recall_tf_idf = correct_tf_idf / (correct_tf_idf + missed_tf_idf)
    
    
    
#     print('classified using tf-idf scores')
#     print('correct: %f , wrong: %f , missed: %f' % (correct_tf_idf, wrong_tf_idf, missed_tf_idf))
#     print('Precision for document: %i is : %f' % (file_number, precision_tf_idf))
#     print('Recall for document: %i is : %f \n' % (file_number, recall_tf_idf)) 
    
    return rouge_1_nn, rouge_2_nn, rouge_l_nn, rouge_1_tf_idf, rouge_2_tf_idf, rouge_l_tf_idf

In [28]:
# highest = np.argsort(predicton[0]) [::-1]
# print(highest)
# lines = original_test.split('.')
# output = []
# for i in range (0, 6):
#     output.append(lines[highest[i]])
# print(output)
# test(1, 0.35)
# test(16, 0.35)
# test(17, 0.35)
# test(8, 0.35)

In [29]:
def extractive_summary(text, num_sentences):
    # Preprocess the text
    text = text.lower()
    text = ''.join(c for c in text if c not in '1234567890')
    sentences = sent_tokenize(text)
    words = [word_tokenize(sent) for sent in sentences]
    words_without_stopwords = [[word for word in sent if word not in stopwords.words('english')] for sent in words]
    
    # Calculate TF-IDF scores
    flat_words = [word for sent in words_without_stopwords for word in sent]
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(flat_words)
    feature_names = tfidf_vectorizer.get_feature_names()
    word_scores = {}
    for i, feature in enumerate(feature_names):
        word_scores[feature] = tfidf_matrix[:, i].sum()
    
    # Calculate sentence scores
    sentence_scores = []
    for sent in words_without_stopwords:
        score = 0
        for word in sent:
            score += word_scores.get(word, 0)
        sentence_scores.append(score)
    
    # Select top N sentences with highest scores
    top_sentences_idx = sorted(range(len(sentence_scores)), key=lambda i: sentence_scores[i], reverse=True)[:num_sentences]
    summary = [sentences[i] for i in top_sentences_idx]
    return ' '.join(summary)


In [30]:
# precision_nn = []
# recall_nn = []
# precision_tf_idf = []
# recall_tf_idf = []

rouge_1_list_nn = []
rouge_2_list_nn = []
rouge_l_list_nn = []

rouge_1_list_tf_idf = []
rouge_2_list_tf_idf = []
rouge_l_list_tf_idf = []

for i in range(300, 330):
    rouge_1_nn, rouge_2_nn, rouge_l_nn, rouge_1_tf_idf, rouge_2_tf_idf, rouge_l_tf_idf = test(i, 0.35)
    
    rouge_1_list_nn.append(rouge_1_nn)
    rouge_2_list_nn.append(rouge_2_nn)
    rouge_l_list_nn.append(rouge_l_nn)
    
    rouge_1_list_tf_idf.append(rouge_1_tf_idf)
    rouge_2_list_tf_idf.append(rouge_2_tf_idf)
    rouge_l_list_tf_idf.append(rouge_l_tf_idf)
    
print('Using nn')
print('Average Rouge 1 score is: %f' % (np.average(rouge_1_list_nn)))
print('Average Rouge 2 score is: %f' % (np.average(rouge_2_list_nn)))
print('Average Rouge l score is: %f' % (np.average(rouge_l_list_nn)))

print('Using tf_idf only')
print('Average Rouge 1 score is: %f' % (np.average(rouge_1_list_tf_idf)))
print('Average Rouge 2 score is: %f' % (np.average(rouge_2_list_tf_idf)))
print('Average Rouge l score is: %f' % (np.average(rouge_l_list_tf_idf)))

# print('Neural network accuracy: ')
# print('Average precision score is: %f' % (np.average(precision_nn)))
# print('Average recall score is: %f' % (np.average(recall_nn)))
      
# print('Classical approach accuracy using tf-idf: ')
# print('Average precision score is: %f' % (np.average(precision_tf_idf)))
# print('Average recall score is: %f' % (np.average(recall_tf_idf)))

article number: 300
nn accuracy
Rouge 1 score is: 0.595041
Rouge 2 score is: 0.466667
Rouge l score is: 0.396694
t-idf accuracy
Rouge 1 score is: 0.481013
Rouge 2 score is: 0.331915
Rouge l score is: 0.362869
article number: 301
nn accuracy
Rouge 1 score is: 0.608696
Rouge 2 score is: 0.500000
Rouge l score is: 0.504348
t-idf accuracy
Rouge 1 score is: 0.530973
Rouge 2 score is: 0.366071
Rouge l score is: 0.300885
article number: 302
nn accuracy
Rouge 1 score is: 0.680851
Rouge 2 score is: 0.557940
Rouge l score is: 0.365957
t-idf accuracy
Rouge 1 score is: 0.625551
Rouge 2 score is: 0.488889
Rouge l score is: 0.361233
article number: 303
nn accuracy
Rouge 1 score is: 0.608696
Rouge 2 score is: 0.517073
Rouge l score is: 0.367150
t-idf accuracy
Rouge 1 score is: 0.475728
Rouge 2 score is: 0.313725
Rouge l score is: 0.388350
article number: 304
nn accuracy
Rouge 1 score is: 0.383673
Rouge 2 score is: 0.123457
Rouge l score is: 0.220408
t-idf accuracy
Rouge 1 score is: 0.517544
Rouge 2 s