In [1]:
seed = 2020

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchtext import data
from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator
from torchtext.vocab import Vectors
import time
import heapq
import math
import random
from tqdm import tqdm
from torchtext.vocab import GloVe
from gensim.models.word2vec import Word2Vec
from gensim.corpora.dictionary import Dictionary
from nltk import word_tokenize

In [3]:
import pandas as pd

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(0)

In [5]:
tokenize = word_tokenize
Article = data.Field(sequential=True, tokenize=tokenize, lower=True, init_token='<bos>', eos_token='<eos>',
                  pad_token='<pad>', unk_token='<oov>')
Title = data.Field(sequential=True, tokenize=tokenize, lower=True, eos_token='<eos>', 
                  pad_token='<pad>', unk_token='<oov>')
Len = data.Field(sequential=False, use_vocab=False)
ID = data.Field(sequential=False, use_vocab=False)

In [6]:
pmi_dct = dict()

In [7]:
train_path = 'train_data.csv'
valid_path = 'valid_data.csv'
test_path = 'test_data.csv'


class MyDataset(data.Dataset):
    def __init__(self, csv_path, article_field, title_field, cat, **kwargs):

        csv_data = pd.read_csv(csv_path)
        fields = [("id", ID), ("article", article_field), ("title", title_field), ("art_len", Len), ("title_len", Len)]
        examples = []
    
        
        for id, text, label in tqdm(zip(csv_data.index, csv_data['article'], csv_data['title'])):
            examples.append(data.Example.fromlist([self.getID(id, cat), text, label, len(word_tokenize(text))+2, len(word_tokenize(label))+1], fields))
        super(MyDataset, self).__init__(examples, fields)

    def getID(self, id, cat):
        if cat == 'train':
            return id
        elif cat == 'valid':
            return id + 100000
        elif cat == 'test':
            return id + 110000
        
    def shuffle(self, text):
        text = np.random.permutation(text.strip().split())
        return ' '.join(text)


# class MyDataset(data.Dataset):
#     def __init__(self, csv_data, article_field, title_field, cat, **kwargs):
#         if cat == 'train':
#             csv_data = csv_data[:200000]
#         elif cat == 'valid':
#             csv_data = csv_data[200000:220000]
#         elif cat == 'test':
#             csv_data = csv_data[220000:]
        
#         fields = [("id", ID), ("article", article_field), ("title", title_field), ("art_len", Len), ("title_len", Len)]
#         examples = []
    
        
#         for id, text, label in tqdm(zip(csv_data.index, csv_data['article'], csv_data['title'])):
#             examples.append(data.Example.fromlist([id, text, label, len(word_tokenize(text))+2, len(word_tokenize(label))+1], fields))
#         super(MyDataset, self).__init__(examples, fields)

        
        
#     def shuffle(self, text):
#         text = np.random.permutation(text.strip().split())
#         return ' '.join(text)

In [8]:
train = MyDataset(train_path, article_field=Article, title_field=Title, cat='train')
valid = MyDataset(valid_path, article_field=Article, title_field=Title, cat='valid')
test = MyDataset(test_path, article_field=Article, title_field=Title, cat='test')

100000it [01:29, 1112.46it/s]
10000it [00:08, 1131.51it/s]
10000it [00:09, 1082.34it/s]


In [9]:
from collections import defaultdict
vectors = Vectors(name='glove-wiki-gigaword-300.txt')
Article.build_vocab(train, valid, test, vectors=vectors)
default = Article.vocab.stoi['<oov>']
Article.vocab.stoi.default_factory = lambda: default
Title.vocab = Article.vocab

In [10]:
from torchtext.data import Iterator, BucketIterator
train_iter, val_iter = BucketIterator.splits(
        (train, valid), 
        batch_size=16, 
        device=device, 
        sort_key=lambda x: len(x.article),
        sort_within_batch=True,
        repeat=False 
)


## GCN

In [11]:
import math

import torch

from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module


class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, in_features, out_features, device, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features)).to(device)
        self.device = device
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features)).to(device)
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.mm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'


In [12]:
import torch.nn as nn
import torch.nn.functional as F
import torch
import math
import numpy as np


class GCN(nn.Module):
    def __init__(self, input_dim, nfeat, nhid, nout, dropout, device):
        super(GCN, self).__init__()
        self.input_dim = input_dim
        self.nfeat = nfeat
        self.gc1 = GraphConvolution(nfeat, nhid, device)
        self.gc2 = GraphConvolution(nhid, nout, device)
        self.device = device
        self.dropout = dropout

    def forward(self, input_seq, seq_embed, input_length, id=None):
        pad_len = len(input_seq)
        input_seq = input_seq[:input_length]
        adj, uni_words = self.load_data(input_seq, id)
        uni2idx = [list(input_seq).index(ele) for ele in uni_words]
        seq2idx = [list(uni_words).index(ele) for ele in input_seq]
        x = seq_embed[uni2idx]
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, adj)
        x = torch.tanh(x) # [node_num, embded_dim]
        x = x[seq2idx]  # [seq_len, embed_dim]
        if pad_len > input_length:
            diff = pad_len - input_length
            pad_embed = torch.tensor(seq_embed[-1,:].unsqueeze(0).expand(diff, self.nfeat), device=self.device)
            x = torch.cat((x,pad_embed),0)   # [pad_len, emb_dim]
        return x
        
    def unranked_unique(self, nparray):
        n_unique = len(np.unique(nparray))
        ranked_unique = np.zeros([n_unique])
        i = 0
        for x in nparray:
            if x not in ranked_unique:
                ranked_unique[i] = x
                i += 1
        return ranked_unique

    def load_data(self, word_seq, id):
        if id is not None:
            id = int(id)
        word_seq = torch.tensor(word_seq, device=self.device)
        words = torch.unique(word_seq, sorted=False)
        word_num = len(words)
        if id is None:
            adj = self.get_pmi(word_seq, words, 3, self.device)
        elif id in pmi_dct.keys():
            adj = pmi_dct[id]
        else:
            adj = self.get_pmi(word_seq, words, 3, self.device)
            adj = torch.tensor(self.normalize(adj), dtype=torch.float32, device=self.device)
            pmi_dct[id] = adj
        words = torch.tensor(words, dtype=torch.int64, device=self.device)
        return adj, words

    def get_pmi(self, word_seq, uni_words, window_size, device):
        word_len = len(word_seq)
        word_num = len(uni_words)
        seq2idx = {int(ele): list(uni_words).index(ele) for ele in uni_words}
        window_num = word_len - window_size + 1
        if window_size > word_len:
            window_size = word_len
        win_num_matrix = torch.zeros((word_num, word_num), device=device)

        for i in range(window_num):
            words = list(torch.unique(word_seq[i:i+window_size], sorted=False))
            indicies = [seq2idx[int(ele)] for ele in words]
            for ele in indicies:
                win_num_matrix[ele,indicies] += 1

        pos_matrix = win_num_matrix / window_num
        diag = torch.mul(pos_matrix, torch.eye(word_num, device=device)).inverse()
        pos_matrix = torch.mm(diag, pos_matrix)
        pos_matrix = torch.mm(pos_matrix, diag)
        pmi_matrix = torch.clamp(torch.log2(pos_matrix), min=0)
        diag = torch.diag(pmi_matrix)
        diag = 1 - diag
        pmi_matrix = pmi_matrix + torch.diag_embed(diag)
        return pmi_matrix

    def normalize(self, mx):
        """Symmetrically normalize adjacency matrix"""
        rowsum = mx.sum(1)
        r_inv = torch.pow(rowsum, -0.5).flatten()
        r_inv = torch.clamp(r_inv, min=0)
        r_mat_inv = torch.diag(r_inv)
        mx = torch.mm(r_mat_inv, mx)
        mx = torch.mm(mx, r_mat_inv)
        return mx


## GAT

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GAT(nn.Module):
    def __init__(self, nfeat, nhid, nout, dropout, alpha, nheads, device):
        """Dense version of GAT."""
        super(GAT, self).__init__()
        self.dropout = dropout
        self.device = device
        self.nfeat = nfeat
        self.nhid = nhid
        self.nout = nout
        self.nheads = nheads
        self.attentions = [GraphAttentionLayer(nfeat, nhid, dropout=dropout, alpha=alpha, device=device, concat=True) for _ in range(nheads)]
        for i, attention in enumerate(self.attentions):
            self.add_module('attention_{}'.format(i), attention)

        self.out_att = GraphAttentionLayer(nhid * nheads, nout, dropout=dropout, alpha=alpha, device=device, concat=False)

#     def forward(self, x, adj):
#         x = F.dropout(x, self.dropout, training=self.training)
#         x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
#         x = F.dropout(x, self.dropout, training=self.training)
#         x = F.elu(self.out_att(x, adj))
#         return F.log_softmax(x, dim=1)

    def forward(self, input_seq, seq_embed, input_length, id=None):
        pad_len = len(input_seq)
        input_seq = input_seq[:input_length]
        adj, uni_words = self.load_data(input_seq, id)
        uni2idx = [list(input_seq).index(ele) for ele in uni_words]
        seq2idx = [list(uni_words).index(ele) for ele in input_seq]
        x = seq_embed[uni2idx]
        # x: [node_num, feature_dim]
        x = F.dropout(x, self.dropout, training=self.training)
        x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
        x = F.elu(self.out_att(x, adj))
        x = torch.tanh(x) # [node_num, embded_dim]
        x = x[seq2idx]  # [seq_len, embed_dim]
        if pad_len > input_length:
            diff = pad_len - input_length
            pad_embed = torch.tensor(seq_embed[-1,:].unsqueeze(0).expand(diff, self.nfeat), device=self.device)
            x = torch.cat((x,pad_embed),0)   # [pad_len, emb_dim]
        return x

        
    def unranked_unique(self, nparray):
        n_unique = len(np.unique(nparray))
        ranked_unique = np.zeros([n_unique])
        i = 0
        for x in nparray:
            if x not in ranked_unique:
                ranked_unique[i] = x
                i += 1
        return ranked_unique

    def load_data(self, word_seq, id):
        if id is not None:
            id = int(id)
        word_seq = torch.tensor(word_seq, device=self.device)
        words = torch.unique(word_seq, sorted=False)
        word_num = len(words)
        if id is None:
            adj = self.get_pmi(word_seq, words, 3, self.device)
        elif id in pmi_dct.keys():
            adj = pmi_dct[id]
        else:
            adj = self.get_pmi(word_seq, words, 3, self.device)
            adj = torch.tensor(self.normalize(adj), dtype=torch.float32, device=self.device)
            pmi_dct[id] = adj
        words = torch.tensor(words, dtype=torch.int64, device=self.device)
        return adj, words

    def get_pmi(self, word_seq, uni_words, window_size, device):
        word_len = len(word_seq)
        word_num = len(uni_words)
        seq2idx = {int(ele): list(uni_words).index(ele) for ele in uni_words}
        window_num = word_len - window_size + 1
        if window_size > word_len:
            window_size = word_len
        win_num_matrix = torch.zeros((word_num, word_num), device=device)

        for i in range(window_num):
            words = list(torch.unique(word_seq[i:i+window_size], sorted=False))
            indicies = [seq2idx[int(ele)] for ele in words]
            for ele in indicies:
                win_num_matrix[ele,indicies] += 1

        pos_matrix = win_num_matrix / window_num
        diag = torch.mul(pos_matrix, torch.eye(word_num, device=device)).inverse()
        pos_matrix = torch.mm(diag, pos_matrix)
        pos_matrix = torch.mm(pos_matrix, diag)
        pmi_matrix = torch.clamp(torch.log2(pos_matrix), min=0)
        diag = torch.diag(pmi_matrix)
        diag = 1 - diag
        pmi_matrix = pmi_matrix + torch.diag_embed(diag)
        return pmi_matrix

    def normalize(self, mx):
        """Symmetrically normalize adjacency matrix"""
        rowsum = mx.sum(1)
        r_inv = torch.pow(rowsum, -0.5).flatten()
        r_inv = torch.clamp(r_inv, min=0)
        r_mat_inv = torch.diag(r_inv)
        mx = torch.mm(r_mat_inv, mx)
        mx = torch.mm(mx, r_mat_inv)
        return mx


In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class GraphAttentionLayer(nn.Module):
    """
    Simple GAT layer, similar to https://arxiv.org/abs/1710.10903
    """
    def __init__(self, in_features, out_features, dropout, alpha, device, concat=True):
        super(GraphAttentionLayer, self).__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.device =device
        self.out_features = out_features
        self.alpha = alpha         # 学习因子
        self.concat = concat
        # 建立就是0的矩阵，大小为(输入维度，输出维度)
        self.W = nn.Parameter(torch.empty(size=(in_features, out_features))).to(device)
        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        # self.a.shape [16, 1], 因为GAT的注意力机制打分用的是拼接方法，所以这里的a维度是[2*out_features, 1]
        self.a = nn.Parameter(torch.empty(size=(2*out_features, 1))).to(device)
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, h, adj):
        # adj: 邻接矩阵， 维度： N*N
        # h: [N, in_features]
        Wh = torch.mm(h, self.W) # h.shape: (N, in_features), Wh.shape: (N, out_features)
        a_input = self._prepare_attentional_mechanism_input(Wh)  # a_input: [N, N, 2*out_features]
        # self.a: [2*out_features, 1]
        # e: [N, N]
        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))
        # 这里采用了Local Attention 机制，即只对每个节点的一节邻节点进行聚合。所以对于一个邻接矩阵，只对值为1的位置赋值。
        zero_vec = -9e15*torch.ones_like(e)
        # attention 是一个N*N的矩阵，
#         print(adj.shape, e.shape, zero_vec.shape)
        
        attention = torch.where(adj > 0, adj, zero_vec)

        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime = torch.matmul(attention, Wh)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime

    def _prepare_attentional_mechanism_input(self, Wh):
        # Wh: [N, out_features]

        N = Wh.size()[0] # number of nodes

        # Below, two matrices are created that contain embeddings in their rows in different orders.
        # (e stands for embedding)
        # These are the rows of the first matrix (Wh_repeated_in_chunks): 
        # e1, e1, ..., e1,            e2, e2, ..., e2,            ..., eN, eN, ..., eN
        # '-------------' -> N times  '-------------' -> N times       '-------------' -> N times
        # 
        # These are the rows of the second matrix (Wh_repeated_alternating): 
        # e1, e2, ..., eN, e1, e2, ..., eN, ..., e1, e2, ..., eN 
        # '----------------------------------------------------' -> N times
        # 
        
        Wh_repeated_in_chunks = Wh.repeat_interleave(N, dim=0)  # Wh_repeated_in_chunks: []
        Wh_repeated_alternating = Wh.repeat(N, 1)
        # Wh_repeated_in_chunks.shape == Wh_repeated_alternating.shape == (N * N, out_features)

        # The all_combination_matrix, created below, will look like this (|| denotes concatenation):
        # e1 || e1
        # e1 || e2
        # e1 || e3
        # ...
        # e1 || eN
        # e2 || e1
        # e2 || e2
        # e2 || e3
        # ...
        # e2 || eN
        # ...
        # eN || e1
        # eN || e2
        # eN || e3
        # ...
        # eN || eN

        all_combinations_matrix = torch.cat([Wh_repeated_in_chunks, Wh_repeated_alternating], dim=1)
        # all_combinations_matrix.shape == (N * N, 2 * out_features)

        return all_combinations_matrix.view(N, N, 2 * self.out_features)

    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'

In [15]:
class Encoder(nn.Module):
    def __init__(self, gnn, input_dim, emb_dim, hid_dim, n_layers, device, dropout=0.5, bidirectional=True):
        super(Encoder, self).__init__()
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        self.GNN = gnn
        self.embedding = nn.Embedding(input_dim, emb_dim)
        weight_matrix = Article.vocab.vectors
        self.embedding.weight.data.copy_(weight_matrix)
        self.device = device
        self.gru = nn.GRU(emb_dim*2, hid_dim, n_layers, dropout=dropout, bidirectional=bidirectional)
        
    def forward(self, input_seqs, input_lengths, ids, hidden):
        # input_seqs = [seq_len, batch]
        embedded = self.embedding(input_seqs)
        # embedded = [pad_len, batch, embed_dim]
        gnn_outputs = torch.zeros(embedded.shape, device=self.device)
        for i in range(embedded.shape[1]):
            gnn_input = embedded[:,i,:]
            if ids is not None:
                gnn_output = self.GNN(input_seqs[:,i], gnn_input, input_lengths[i], ids[i]) # pad_len, emb_dim
            else:
                gnn_output = self.GNN(input_seqs[:,i], gnn_input, input_lengths[i])
            gnn_outputs[:,i,:] = gnn_output
        inputs = torch.cat((embedded, gnn_outputs),-1)
        packed = torch.nn.utils.rnn.pack_padded_sequence(inputs, input_lengths, enforce_sorted=False)
        
        outputs, hidden = self.gru(packed, hidden)        
        outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        # outputs = [seq_len, batch, hid_dim * n directions]
        # output_lengths = [batch]
        return outputs, hidden


In [16]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout=0.5, bidirectional=True):
        super(Decoder, self).__init__()
        
        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(output_dim, emb_dim)
        weight_matrix = Article.vocab.vectors
        self.embedding.weight.data.copy_(weight_matrix)
        self.gru = nn.GRU(emb_dim, hid_dim, n_layers, dropout=dropout, bidirectional=bidirectional)
        
        if bidirectional:
            self.fc_out = nn.Linear(hid_dim*2, output_dim)
        else:
            self.fc_out = nn.Linear(hid_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, token_inputs, hidden):
        # token_inputs = [batch]
        batch_size = token_inputs.size(0)
        embedded = self.dropout(self.embedding(token_inputs).view(1, batch_size, -1))
        # embedded = [1, batch, emb_dim]
        output, hidden = self.gru(embedded, hidden)
        # output = [1, batch,  n_directions * hid_dim]
        # hidden = [n_layers * n_directions, batch, hid_dim]
        
        output = self.fc_out(output.squeeze(0))
        output = self.softmax(output)
        # output = [batch, output_dim]
        return output, hidden

In [17]:
class Seq2Seq(nn.Module):
    def __init__(self, 
                 encoder, 
                 decoder, 
                 device, 
                 predict=False, 
                 basic_dict=None,
                 max_len=100
                 ):
        super(Seq2Seq, self).__init__()
        
        self.device = device

        self.encoder = encoder
        self.decoder = decoder

        self.predict = predict  
        self.basic_dict = basic_dict  
        self.max_len = max_len  

        self.enc_n_layers = self.encoder.gru.num_layers
        self.enc_n_directions = 2 if self.encoder.gru.bidirectional else 1
        self.dec_n_directions = 2 if self.decoder.gru.bidirectional else 1

        assert encoder.hid_dim == decoder.hid_dim, \
            "Hidden dimensions of encoder and decoder must be equal!"
        assert encoder.n_layers == decoder.n_layers, \
            "Encoder and decoder must have equal number of layers!"
        assert self.enc_n_directions >= self.dec_n_directions, \
            "If decoder is bidirectional, encoder must be bidirectional either!"
        
    def forward(self, input_batches, input_lengths, target_batches=None, target_lengths=None, ids=None, teacher_forcing_ratio=0.5):
        # input_batches = target_batches = [seq_len, batch]
        batch_size = input_batches.size(1)
        BOS_token = self.basic_dict["<bos>"]
        EOS_token = self.basic_dict["<eos>"]
        PAD_token = self.basic_dict["<pad>"]

        encoder_hidden = torch.zeros(self.enc_n_layers*self.enc_n_directions, batch_size, self.encoder.hid_dim, device=self.device)
        
        # encoder_output = [seq_len, batch, hid_dim * n directions]
        # encoder_hidden = [n_layers*n_directions, batch, hid_dim]
        encoder_output, encoder_hidden = self.encoder(
            input_batches, input_lengths, ids, encoder_hidden)

        decoder_input = torch.tensor([BOS_token] * batch_size, dtype=torch.long, device=self.device)
        if self.enc_n_directions == self.dec_n_directions:
            decoder_hidden = encoder_hidden
        else:
            L = encoder_hidden.size(0)
            decoder_hidden = encoder_hidden[range(0, L, 2)] + encoder_hidden[range(1, L, 2)]
            
        if self.predict:
            assert batch_size == 1, "batch_size of predict phase must be 1!"
            output_tokens = []

#             return beam_search(3, decoder_inpuy, decoder_hidden)
            prev_beam = Beam()
            prev_beam.add(0, False, [decoder_input], decoder_input, decoder_hidden)
            while True:
                cur_beam = Beam()
                for prob, complete, seq_list, decoder_input, decoder_hidden in prev_beam:
                    if complete:
                        cur_beam.add(prob, complete, seq_list, decoder_input, decoder_hidden)
                    else:
                        # decoder_output_t: [batch_size, vocab_dim]
                        # decoder_hidden: [num_layer*num_direction, batch_size, hidden_dim]
                        decoder_output_t, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
                        # value: [batch_size, beam_width] log_softmax, 
                        # index: [batch_size, beam_width]
                        value, index = torch.topk(decoder_output_t, 3)
                        seq_len = 0
                        for m, n in zip(value[0], index[0]):
                            seq_len += 1
                            cur_prob = (prob+m.item())/ seq_len
                            decoder_input = torch.LongTensor([n.item()]).to(self.device)
                            cur_seq_list = seq_list + [decoder_input]
                            if int(n) == EOS_token or len(cur_seq_list) == self.max_len:
                                cur_complete = True
                            else:
                                cur_complete = False
                            cur_beam.add(cur_prob, cur_complete, cur_seq_list, decoder_input, decoder_hidden)
                    
                best_prob, best_complete, best_seq, _, _ = max(cur_beam)
                if best_complete or len(best_seq) >= self.max_len: 
                    best_seq = [i.item() for i in best_seq]
                    return best_seq
                else:
                    prev_beam = cur_beam
#         if self.predict:
#             assert batch_size == 1, "batch_size of predict phase must be 1!"
#             output_tokens = []

#             while True:
#                 decoder_output, decoder_hidden = self.decoder(
#                     decoder_input, decoder_hidden
#                 )
#                 # [1, 1]
#                 topv, topi = decoder_output.topk(1)
#                 decoder_input = topi.squeeze(1)  
#                 output_token = topi.squeeze().detach().item()
#                 if output_token == EOS_token or len(output_tokens) == self.max_len:
#                     break
#                 output_tokens.append(output_token)
#             return output_tokens

        else:
            max_target_length = max(target_lengths)
            all_decoder_outputs = torch.zeros((max_target_length, batch_size, self.decoder.output_dim), device=self.device)

            for t in range(max_target_length):
                use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
                if use_teacher_forcing:
                    # decoder_output = [batch, output_dim]
                    # decoder_hidden = [n_layers*n_directions, batch, hid_dim]
                    decoder_output, decoder_hidden = self.decoder(
                        decoder_input, decoder_hidden
                    )
                    all_decoder_outputs[t] = decoder_output
                    decoder_input = target_batches[t]  
                else:
                    decoder_output, decoder_hidden = self.decoder(
                        decoder_input, decoder_hidden
                    )
                    # [batch, 1]
                    topv, topi = decoder_output.topk(1)
                    all_decoder_outputs[t] = decoder_output
                    decoder_input = topi.squeeze(1)  
     
            loss_fn = nn.NLLLoss(ignore_index=PAD_token)
            loss = loss_fn(
                all_decoder_outputs.reshape(-1,self.decoder.output_dim ),  # [batch*seq_len, output_dim]
                target_batches.reshape(-1)                                                 # [batch*seq_len]
            )
            return loss

### Beam Search

In [18]:
class Beam:
    """保存每一个时间步的数据"""
 
    def __init__(self, beam_width=1):
        self.heapq = list()
        self.beam_width = beam_width
 
    def add(self, prob, complete, seq_list, decoder_input, decoder_hidden):
        heapq.heappush(self.heapq, [prob, complete, seq_list, decoder_input, decoder_hidden])
        # 保证最终只有一个beam width个结果
        if len(self.heapq) > self.beam_width:
            heapq.heappop(self.heapq)
 
    def __iter__(self):
        for item in self.heapq:
            yield item

In [19]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [20]:
def train(
    model,
    data_loader, 
    optimizer, 
    clip=1, 
    teacher_forcing_ratio=0.5, 
    print_every=1 
    ):
    model.predict = False
    model.train()

    if print_every == 0:
        print_every = 1

    print_loss_total = 0  
    start = time.time()
    epoch_loss = 0
    step = 0
    for batch in tqdm(data_loader, position=0, leave=True):
        step += 1
        ids = batch.id
        input_batchs = batch.article
        target_batchs = batch.title
        input_lens = list(batch.art_len)
        target_lens = list(batch.title_len)
        optimizer.zero_grad()
        loss = model(input_batchs, input_lens, target_batchs, target_lens, ids, teacher_forcing_ratio)
        print_loss_total += loss.item()
        epoch_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        if step % 500 == 0:
            print_loss_avg = print_loss_total / 500
            print_loss_total = 0
            print('\tCurrent Loss: %.4f' % print_loss_avg)

    return epoch_loss / len(data_loader)

In [21]:
def evaluate(
    model,
    data_loader, 
    print_every=None
    ):
    model.predict = False
    model.eval()
    if print_every == 0:
        print_every = 1

    print_loss_total = 0  
    start = time.time()
    epoch_loss = 0
    with torch.no_grad():
        for i, batch in enumerate(data_loader):
            ids = batch.id
            input_batchs = batch.article
            target_batchs = batch.title
            input_lens = list(batch.art_len)
            target_lens = list(batch.title_len)
        

            loss = model(input_batchs, input_lens, target_batchs, target_lens, ids, teacher_forcing_ratio=0)
            print_loss_total += loss.item()
            epoch_loss += loss.item()

            if print_every and (i+1) % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('\tCurrent Loss: %.4f' % print_loss_avg)

    return epoch_loss / len(data_loader)

In [22]:
def summary(
    model,
    sample, 
    idx2token=None
    ):
    model.predict = True
    model.eval()

    # shape = [seq_len, 1]
    input_batch = sample['src']
    # list
    input_len = sample['src_len']

    output_tokens = model(input_batch, input_len)
    output_tokens = [idx2token[t] for t in output_tokens]

    return " ".join(output_tokens)


In [23]:
INPUT_DIM = len(Article.vocab.stoi)
OUTPUT_DIM = len(Article.vocab.stoi)
ENC_EMB_DIM = 300
DEC_EMB_DIM = 300
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5
LEARNING_RATE = 1e-4
N_EPOCHS = 200
CLIP = 1

bidirectional = True
gcn = GCN(INPUT_DIM, ENC_EMB_DIM, HID_DIM,ENC_EMB_DIM,0.5, device).to(device)
gat = GAT(ENC_EMB_DIM, HID_DIM,ENC_EMB_DIM,0.5, 0.05, 4, device).to(device)
enc = Encoder(gcn, INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, device, ENC_DROPOUT, bidirectional)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT, bidirectional)
model = Seq2Seq(enc, dec, device, basic_dict=Article.vocab.stoi).to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


In [24]:
best_valid_loss = float('inf')
count = 0

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    train_loss = train(model, train_iter, optimizer, CLIP)
    valid_loss = evaluate(model, val_iter)
    end_time = time.time()
    if count == 5:
        break
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'gcn+seq2seq(node).pt')
    else:
        count += 1

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
        
    print(f'\tTrain Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f}')

  8%|██████▏                                                                      | 500/6250 [50:57<9:49:54,  6.16s/it]

	Current Loss: 7.6234


 16%|███████████▊                                                              | 1000/6250 [1:41:57<9:04:00,  6.22s/it]

	Current Loss: 6.9809


 24%|█████████████████▊                                                        | 1500/6250 [2:32:58<9:33:24,  7.24s/it]

	Current Loss: 6.7732


 32%|███████████████████████▋                                                  | 2000/6250 [3:24:00<8:03:18,  6.82s/it]

	Current Loss: 6.6000


 40%|█████████████████████████████▌                                            | 2500/6250 [4:15:10<6:28:32,  6.22s/it]

	Current Loss: 6.4892


 48%|███████████████████████████████████▌                                      | 3000/6250 [5:06:01<5:52:48,  6.51s/it]

	Current Loss: 6.3800


 56%|█████████████████████████████████████████▍                                | 3500/6250 [5:56:58<5:11:01,  6.79s/it]

	Current Loss: 6.3108


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [6:48:04<3:18:46,  5.30s/it]

	Current Loss: 6.2432


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [7:39:21<2:51:16,  5.87s/it]

	Current Loss: 6.1709


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [8:30:41<1:52:00,  5.38s/it]

	Current Loss: 6.1156


 88%|█████████████████████████████████████████████████████████████████         | 5500/6250 [9:21:59<1:16:17,  6.10s/it]

	Current Loss: 6.0524


 96%|████████████████████████████████████████████████████████████████████████   | 6000/6250 [10:13:30<20:51,  5.01s/it]

	Current Loss: 5.9838


100%|███████████████████████████████████████████████████████████████████████████| 6250/6250 [10:39:13<00:00,  6.14s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 01 | Time: 699m 44s
	Train Loss: 6.455 | Val. Loss: 6.174


  8%|██████▏                                                                      | 500/6250 [28:44<5:51:15,  3.67s/it]

	Current Loss: 5.7456


 16%|████████████▏                                                               | 1000/6250 [57:33<4:47:35,  3.29s/it]

	Current Loss: 5.7506


 24%|█████████████████▊                                                        | 1500/6250 [1:25:57<4:51:16,  3.68s/it]

	Current Loss: 5.6896


 32%|███████████████████████▋                                                  | 2000/6250 [1:54:34<4:26:57,  3.77s/it]

	Current Loss: 5.6510


 40%|█████████████████████████████▌                                            | 2500/6250 [2:23:20<3:38:54,  3.50s/it]

	Current Loss: 5.6467


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:51:57<3:13:46,  3.58s/it]

	Current Loss: 5.6230


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:20:48<2:10:18,  2.84s/it]

	Current Loss: 5.5674


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:49:26<2:30:11,  4.01s/it]

	Current Loss: 5.5558


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:18:07<1:52:37,  3.86s/it]

	Current Loss: 5.5042


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [4:46:44<1:00:22,  2.90s/it]

	Current Loss: 5.4973


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:15:33<47:00,  3.76s/it]

	Current Loss: 5.4997


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:43:55<14:22,  3.45s/it]

	Current Loss: 5.4755


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:58:03<00:00,  3.44s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 02 | Time: 390m 13s
	Train Loss: 5.594 | Val. Loss: 5.873


  8%|██████▏                                                                      | 500/6250 [28:37<6:32:40,  4.10s/it]

	Current Loss: 5.2168


 16%|████████████▏                                                               | 1000/6250 [57:20<4:20:02,  2.97s/it]

	Current Loss: 5.2038


 24%|█████████████████▊                                                        | 1500/6250 [1:25:57<4:05:15,  3.10s/it]

	Current Loss: 5.2126


 32%|███████████████████████▋                                                  | 2000/6250 [1:55:04<3:59:21,  3.38s/it]

	Current Loss: 5.1947


 40%|█████████████████████████████▌                                            | 2500/6250 [2:23:41<2:57:25,  2.84s/it]

	Current Loss: 5.1912


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:52:25<3:28:44,  3.85s/it]

	Current Loss: 5.1884


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:20:55<2:37:29,  3.44s/it]

	Current Loss: 5.1830


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:49:26<1:35:36,  2.55s/it]

	Current Loss: 5.1446


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:18:04<1:30:59,  3.12s/it]

	Current Loss: 5.1142


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [4:46:40<1:05:00,  3.12s/it]

	Current Loss: 5.1479


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:15:18<41:04,  3.29s/it]

	Current Loss: 5.1182


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:44:04<15:50,  3.80s/it]

	Current Loss: 5.1340


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:58:32<00:00,  3.44s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 03 | Time: 391m 5s
	Train Loss: 5.169 | Val. Loss: 5.699


  8%|██████▏                                                                      | 500/6250 [28:40<5:16:42,  3.30s/it]

	Current Loss: 4.8738


 16%|████████████▏                                                               | 1000/6250 [57:21<5:33:17,  3.81s/it]

	Current Loss: 4.8601


 24%|█████████████████▊                                                        | 1500/6250 [1:26:09<4:33:08,  3.45s/it]

	Current Loss: 4.8621


 32%|███████████████████████▋                                                  | 2000/6250 [1:54:43<4:42:43,  3.99s/it]

	Current Loss: 4.8670


 40%|█████████████████████████████▌                                            | 2500/6250 [2:23:13<4:15:00,  4.08s/it]

	Current Loss: 4.8577


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:52:03<2:40:11,  2.96s/it]

	Current Loss: 4.8721


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:20:43<2:33:44,  3.35s/it]

	Current Loss: 4.8603


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:49:18<2:12:02,  3.52s/it]

	Current Loss: 4.8754


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:18:06<1:30:19,  3.10s/it]

	Current Loss: 4.8500


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [4:46:50<1:18:01,  3.75s/it]

	Current Loss: 4.8594


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:15:25<40:51,  3.27s/it]

	Current Loss: 4.8318


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:43:56<10:13,  2.45s/it]

	Current Loss: 4.8215


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:58:05<00:00,  3.44s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 04 | Time: 390m 38s
	Train Loss: 4.857 | Val. Loss: 5.598


  8%|██████▏                                                                      | 500/6250 [28:40<5:50:44,  3.66s/it]

	Current Loss: 4.5498


 16%|████████████▏                                                               | 1000/6250 [57:29<5:28:50,  3.76s/it]

	Current Loss: 4.5633


 24%|█████████████████▊                                                        | 1500/6250 [1:26:04<4:35:59,  3.49s/it]

	Current Loss: 4.5650


 32%|███████████████████████▋                                                  | 2000/6250 [1:54:32<3:28:09,  2.94s/it]

	Current Loss: 4.5674


 40%|█████████████████████████████▌                                            | 2500/6250 [2:23:21<4:18:49,  4.14s/it]

	Current Loss: 4.5971


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:51:57<2:20:23,  2.59s/it]

	Current Loss: 4.5990


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:20:49<2:54:47,  3.81s/it]

	Current Loss: 4.6315


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:49:25<2:29:52,  4.00s/it]

	Current Loss: 4.5866


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:18:01<1:58:51,  4.07s/it]

	Current Loss: 4.6251


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [4:46:31<1:05:03,  3.12s/it]

	Current Loss: 4.6169


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:15:16<38:50,  3.11s/it]

	Current Loss: 4.6262


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:43:55<21:31,  5.16s/it]

	Current Loss: 4.6322


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:58:04<00:00,  3.44s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 05 | Time: 390m 51s
	Train Loss: 4.597 | Val. Loss: 5.535


  8%|██████▏                                                                      | 500/6250 [29:02<5:09:21,  3.23s/it]

	Current Loss: 4.2969


 16%|████████████▏                                                               | 1000/6250 [57:43<5:19:44,  3.65s/it]

	Current Loss: 4.2992


 24%|█████████████████▊                                                        | 1500/6250 [1:26:06<4:22:46,  3.32s/it]

	Current Loss: 4.3448


 32%|███████████████████████▋                                                  | 2000/6250 [1:54:31<5:50:36,  4.95s/it]

	Current Loss: 4.3120


 40%|█████████████████████████████▌                                            | 2500/6250 [2:23:07<3:29:42,  3.36s/it]

	Current Loss: 4.3762


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:52:04<3:36:59,  4.01s/it]

	Current Loss: 4.3805


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:20:34<2:54:05,  3.80s/it]

	Current Loss: 4.3868


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:49:02<2:02:41,  3.27s/it]

	Current Loss: 4.3505


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:17:46<1:17:43,  2.67s/it]

	Current Loss: 4.3905


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [4:46:33<1:04:45,  3.11s/it]

	Current Loss: 4.4170


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:15:17<40:18,  3.22s/it]

	Current Loss: 4.4104


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:43:40<11:57,  2.87s/it]

	Current Loss: 4.4098


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:57:46<00:00,  3.43s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 06 | Time: 390m 36s
	Train Loss: 4.367 | Val. Loss: 5.516


  8%|██████▏                                                                      | 500/6250 [28:25<5:04:29,  3.18s/it]

	Current Loss: 4.0666


 16%|████████████▏                                                               | 1000/6250 [57:07<5:45:22,  3.95s/it]

	Current Loss: 4.1389


 24%|█████████████████▊                                                        | 1500/6250 [1:26:03<4:43:12,  3.58s/it]

	Current Loss: 4.1073


 32%|███████████████████████▋                                                  | 2000/6250 [1:54:43<4:36:04,  3.90s/it]

	Current Loss: 4.1376


 40%|█████████████████████████████▌                                            | 2500/6250 [2:23:19<3:34:24,  3.43s/it]

	Current Loss: 4.1546


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:51:54<3:01:37,  3.35s/it]

	Current Loss: 4.1831


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:20:13<2:48:29,  3.68s/it]

	Current Loss: 4.1509


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:48:50<2:24:15,  3.85s/it]

	Current Loss: 4.1536


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:17:17<1:22:28,  2.83s/it]

	Current Loss: 4.1890


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [4:46:02<1:15:22,  3.62s/it]

	Current Loss: 4.2061


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:14:45<42:47,  3.42s/it]

	Current Loss: 4.1810


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:43:00<16:29,  3.96s/it]

	Current Loss: 4.2148


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:57:05<00:00,  3.43s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 07 | Time: 389m 59s
	Train Loss: 4.160 | Val. Loss: 5.500


  8%|██████▏                                                                      | 500/6250 [28:46<5:14:10,  3.28s/it]

	Current Loss: 3.8850


 16%|████████████▏                                                               | 1000/6250 [57:23<4:33:19,  3.12s/it]

	Current Loss: 3.9215


 24%|█████████████████▊                                                        | 1500/6250 [1:25:55<4:58:43,  3.77s/it]

	Current Loss: 3.9129


 32%|███████████████████████▋                                                  | 2000/6250 [1:54:33<3:50:08,  3.25s/it]

	Current Loss: 3.9271


 40%|█████████████████████████████▌                                            | 2500/6250 [2:23:06<2:50:37,  2.73s/it]

	Current Loss: 3.9781


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:51:35<3:11:49,  3.54s/it]

	Current Loss: 3.9286


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:20:19<3:18:35,  4.33s/it]

	Current Loss: 3.9828


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:48:54<2:19:20,  3.72s/it]

	Current Loss: 3.9443


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:17:31<1:21:19,  2.79s/it]

	Current Loss: 3.9993


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [4:45:55<1:19:44,  3.83s/it]

	Current Loss: 4.0157


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:14:39<40:17,  3.22s/it]

	Current Loss: 4.0182


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:42:56<15:27,  3.71s/it]

	Current Loss: 4.0149


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:57:19<00:00,  3.43s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 08 | Time: 390m 21s
	Train Loss: 3.964 | Val. Loss: 5.499


  8%|██████▏                                                                      | 500/6250 [28:40<5:09:57,  3.23s/it]

	Current Loss: 3.6583


 16%|████████████▏                                                               | 1000/6250 [57:27<4:17:09,  2.94s/it]

	Current Loss: 3.6950


 24%|█████████████████▊                                                        | 1500/6250 [1:26:16<3:56:37,  2.99s/it]

	Current Loss: 3.7249


 32%|███████████████████████▋                                                  | 2000/6250 [1:54:52<3:41:14,  3.12s/it]

	Current Loss: 3.7362


 40%|█████████████████████████████▌                                            | 2500/6250 [2:23:31<3:22:13,  3.24s/it]

	Current Loss: 3.7303


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:52:16<4:14:55,  4.71s/it]

	Current Loss: 3.7865


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:21:14<3:28:38,  4.55s/it]

	Current Loss: 3.8120


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:49:58<2:16:01,  3.63s/it]

	Current Loss: 3.7763


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:18:41<1:32:59,  3.19s/it]

	Current Loss: 3.8327


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [4:47:15<1:00:54,  2.92s/it]

	Current Loss: 3.8557


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:15:49<37:27,  3.00s/it]

	Current Loss: 3.8493


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:44:11<11:26,  2.75s/it]

	Current Loss: 3.8316


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:58:28<00:00,  3.44s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 09 | Time: 391m 32s
	Train Loss: 3.777 | Val. Loss: 5.509


  8%|██████▏                                                                      | 500/6250 [28:22<5:45:35,  3.61s/it]

	Current Loss: 3.5092


 16%|████████████▏                                                               | 1000/6250 [57:08<5:17:02,  3.62s/it]

	Current Loss: 3.5179


 24%|█████████████████▊                                                        | 1500/6250 [1:25:52<3:57:08,  3.00s/it]

	Current Loss: 3.5607


 32%|███████████████████████▋                                                  | 2000/6250 [1:54:26<4:11:46,  3.55s/it]

	Current Loss: 3.5579


 40%|█████████████████████████████▌                                            | 2500/6250 [2:23:03<3:57:52,  3.81s/it]

	Current Loss: 3.5775


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:51:53<3:45:14,  4.16s/it]

	Current Loss: 3.5874


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:20:37<2:41:14,  3.52s/it]

	Current Loss: 3.6348


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:49:02<1:34:38,  2.52s/it]

	Current Loss: 3.5960


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:17:37<1:29:46,  3.08s/it]

	Current Loss: 3.6734


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [4:46:02<1:20:47,  3.88s/it]

	Current Loss: 3.6592


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:14:36<49:10,  3.93s/it]

	Current Loss: 3.6508


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:43:01<20:58,  5.03s/it]

	Current Loss: 3.6533


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:57:42<00:00,  3.43s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 10 | Time: 390m 51s
	Train Loss: 3.600 | Val. Loss: 5.542


  8%|██████▏                                                                      | 500/6250 [28:52<5:09:37,  3.23s/it]

	Current Loss: 3.3257


 16%|████████████▏                                                               | 1000/6250 [57:16<3:40:27,  2.52s/it]

	Current Loss: 3.3683


 24%|█████████████████▊                                                        | 1500/6250 [1:26:04<4:15:55,  3.23s/it]

	Current Loss: 3.3947


 32%|███████████████████████▋                                                  | 2000/6250 [1:54:38<3:39:57,  3.11s/it]

	Current Loss: 3.4011


 40%|█████████████████████████████▌                                            | 2500/6250 [2:23:18<4:06:52,  3.95s/it]

	Current Loss: 3.4071


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:51:48<2:57:12,  3.27s/it]

	Current Loss: 3.4261


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:20:20<2:39:33,  3.48s/it]

	Current Loss: 3.4349


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:49:07<2:08:07,  3.42s/it]

	Current Loss: 3.4520


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:17:48<1:25:15,  2.92s/it]

	Current Loss: 3.4920


 80%|████████████████████████████████████████████████████████████▊               | 5000/6250 [4:46:33<58:05,  2.79s/it]

	Current Loss: 3.4823


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:15:17<35:31,  2.84s/it]

	Current Loss: 3.5101


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:43:49<11:07,  2.67s/it]

	Current Loss: 3.4768


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:58:10<00:00,  3.44s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 11 | Time: 391m 5s
	Train Loss: 3.435 | Val. Loss: 5.551


  8%|██████▏                                                                      | 500/6250 [29:00<6:20:05,  3.97s/it]

	Current Loss: 3.1743


 16%|████████████▏                                                               | 1000/6250 [57:45<6:51:54,  4.71s/it]

	Current Loss: 3.1743


 24%|█████████████████▊                                                        | 1500/6250 [1:26:22<5:46:13,  4.37s/it]

	Current Loss: 3.2009


 32%|███████████████████████▋                                                  | 2000/6250 [1:55:19<4:37:07,  3.91s/it]

	Current Loss: 3.2443


 40%|█████████████████████████████▌                                            | 2500/6250 [2:24:05<3:18:24,  3.17s/it]

	Current Loss: 3.2428


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:52:45<3:21:35,  3.72s/it]

	Current Loss: 3.2801


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:21:25<2:43:13,  3.56s/it]

	Current Loss: 3.2642


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:50:04<2:25:43,  3.89s/it]

	Current Loss: 3.2951


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:18:48<1:33:38,  3.21s/it]

	Current Loss: 3.3140


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [4:47:18<1:05:31,  3.15s/it]

	Current Loss: 3.3338


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:15:56<41:53,  3.35s/it]

	Current Loss: 3.3628


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:44:13<16:02,  3.85s/it]

	Current Loss: 3.3395


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:58:33<00:00,  3.44s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 12 | Time: 391m 37s
	Train Loss: 3.274 | Val. Loss: 5.578


  8%|██████▏                                                                      | 500/6250 [28:24<5:46:52,  3.62s/it]

	Current Loss: 3.0221


 16%|████████████▏                                                               | 1000/6250 [57:11<3:56:22,  2.70s/it]

	Current Loss: 3.0351


 24%|█████████████████▊                                                        | 1500/6250 [1:25:42<5:18:23,  4.02s/it]

	Current Loss: 3.0626


 32%|███████████████████████▋                                                  | 2000/6250 [1:54:12<3:39:26,  3.10s/it]

	Current Loss: 3.0730


 40%|█████████████████████████████▌                                            | 2500/6250 [2:22:51<3:07:11,  3.00s/it]

	Current Loss: 3.0952


 48%|███████████████████████████████████▌                                      | 3000/6250 [2:51:25<2:46:14,  3.07s/it]

	Current Loss: 3.1097


 56%|█████████████████████████████████████████▍                                | 3500/6250 [3:19:58<2:28:04,  3.23s/it]

	Current Loss: 3.1265


 64%|███████████████████████████████████████████████▎                          | 4000/6250 [3:48:49<1:41:49,  2.72s/it]

	Current Loss: 3.1395


 72%|█████████████████████████████████████████████████████▎                    | 4500/6250 [4:17:30<2:04:50,  4.28s/it]

	Current Loss: 3.1900


 80%|███████████████████████████████████████████████████████████▏              | 5000/6250 [4:45:44<1:17:24,  3.72s/it]

	Current Loss: 3.1545


 88%|██████████████████████████████████████████████████████████████████▉         | 5500/6250 [5:14:03<50:41,  4.06s/it]

	Current Loss: 3.1933


 96%|████████████████████████████████████████████████████████████████████████▉   | 6000/6250 [5:42:40<16:39,  4.00s/it]

	Current Loss: 3.2007


100%|████████████████████████████████████████████████████████████████████████████| 6250/6250 [5:56:56<00:00,  3.43s/it]
  0%|                                                                                         | 0/6250 [00:00<?, ?it/s]

Epoch: 13 | Time: 389m 49s
	Train Loss: 3.120 | Val. Loss: 5.614


  8%|██████▏                                                                      | 500/6250 [28:47<4:34:26,  2.86s/it]

	Current Loss: 2.8646


 16%|████████████▏                                                               | 1000/6250 [57:09<5:35:11,  3.83s/it]

	Current Loss: 2.8834


 24%|█████████████████▊                                                        | 1500/6250 [1:25:37<4:03:10,  3.07s/it]

	Current Loss: 2.9144


 32%|███████████████████████▋                                                  | 2000/6250 [1:54:01<4:07:52,  3.50s/it]

	Current Loss: 2.9429


 39%|████████████████████████████▋                                             | 2422/6250 [2:18:11<3:38:24,  3.42s/it]


KeyboardInterrupt: 

In [25]:
from rouge import Rouge

model.load_state_dict(torch.load('gcn+seq2seq(node).pt'))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [37]:
dct = Article.vocab.stoi
r1 = 0
r2 = 0
rl = 0
rouge = Rouge()
for i in range(10000):
    sample = test[i].article
    label = ' '.join(test[i].title)
    sample = [dct['<bos>']] + [dct[ele] for ele in sample] + [dct['<eos>']]
    test_sample = {}
    test_sample["src"] = torch.tensor(sample, dtype=torch.long, device=device).reshape(-1, 1)
    test_sample["src_len"] = [len(sample)]
    predict = ''.join(summary(model, test_sample, Article.vocab.itos))[6:-6]
    score = rouge.get_scores(label, predict)[0]
    r1 += score['rouge-1']['r']
    r2 += score['rouge-2']['r']
    rl += score['rouge-l']['r']
    print(label)
    print(predict)
    print(r1/(i+1),r2/(i+1),rl/(i+1))
    print(i)
    
print(r1/10000)
print(r2/10000)
print(rl/10000)



boeing to cooperate with airship builder cargolifter
fed expands china < > on <
0.0 0.0 0.0
0
us says palestinian pm must control security forces
abbas calls rules to remove its stability in indonesia
0.0 0.0 0.0
1
angola zambia urged to overcome crisis
s. africa <oov> regional development in <oov> countries
0.0 0.0 0.0
2
oecd hails first office
0.0 0.0 0.0
3
asia foundation cooperates with yunnan in offering legal aid service
<oov> china < < unk <oov> committee for < unk > < unk > law
0.0 0.0 0.0
4
upton 's torrid offensive pace not slowing
griffey makes < < unk unk > as a < > < unk > contributed
0.0 0.0 0.0
5
as afghan leader visits u.s. villagers press claims of wrongful u.s. attack
un official leader defends first in of # nd # # years
0.023809523809523808 0.0 0.028571428571428574
6
parents in prayer death get probation some jail
man executes man in to # years after
0.03645833333333333 0.0 0.04285714285714286
7
karadzic asks to summon holbrooke to testify
former commander commander 

wartime us president picks up his peace prize
clinton 's < president on the unk > of china unk > contributed reporting
0.06529410241695635 0.004090909090909091 0.07311133311133312
54
iraq tries desperately to prove it is disarming in advance of crucial u.n. report
iraq 's unk > takes <
0.06710432677855832 0.004017857142857143 0.07478196406767837
55
drc delegation asks tanzania to mediate dialogue
president hails s. africa of president president
0.06592705788770642 0.003947368421052632 0.07346999978578928
56
man ordered to stay away from tom cruise
<oov> 's # s first of of his time of the time
0.06479038447584941 0.003879310344827586 0.07220327565155153
57
assailants kill # # in algeria reports say
top-seeded algerian republic dies # in hospital in indonesia 's largest # # # #
0.06708207287456383 0.006234866828087168 0.07267440657271167
58
giuliani revamps child welfare system
the s <oov> <oov> in the city city of a year
0.06596403832665444 0.006130952380952381 0.07146316646316647
59
wa

with giants opener a week away bonds is on the witness stand
<oov> of < a
0.09822632951648959 0.01976524523694335 0.10112654640956525
105
u.s. troops arrive in kuwait for joint exercises
us navy seizes < of first # , # # # # # dollars from kuwait
0.09789243858642893 0.019580523318841078 0.10103105617124307
106
barcelona names cruyff honorary president
albert albert champion retires # # at world # # # # #
0.09698602711803607 0.019399222176999957 0.10009558342891675
107
german shares close mixed
ecb stock exchange ends
0.096096247052733 0.019221247661614635 0.0991772753240643
108
irish stocks down six percent
stocks rise lower on economic growth of asian bank
0.09623274581690006 0.019046509046509046 0.09928576474031017
109
ross meets netanyahu in bid to revive < unk > process
arafat chief 's israeli state policy of china unk > contributed reporting for < article
0.09716758594467573 0.01951841951841952 0.09959249959249956
110
amputee athletes gird for world competitions
harvard world pres

floods kill nine as storms hit algeria
mysterious disease reported of southern central korea
0.09485058563484974 0.014562870028457074 0.09797759220836141
155
wiesenthal center asking check of eva peron link to nazis
world group of <oov> . in
0.09552032712762139 0.014470112894517858 0.09862741646181133
156
indian government distances itself from celebrity guru
pak unk > 's first
0.0949157680951681 0.01437852990151458 0.09800319230698974
157
palestinians stage demonstrations in territories
army of < of major bank in israel city
0.09501762559841302 0.014288098895844677 0.09817298355034201
158
thousands stranded on rooftops as heavy rains cause floods
floods damages becomes more more
0.09567376543842293 0.014198798277745648 0.09912190240315237
159
us to increase tsunami aid to # # # million dollars
uae announces emergency to for dprk to combat # years
0.09694287248538926 0.014110606984095053 0.09988650066289814
160
tour of flanders cycling results
swiss wins super-g of nordic # # # #
0.097

usoc takes troubled roadshow to chicago after contentious senate hearing
clinton of <oov> on on of commerce policy
0.09031182090001898 0.010922152521342805 0.09384373639181327
207
hk holds symposium to mark # # th anniversary of basic law promulgation
china kong 's published
0.08987970692442081 0.010869893418369874 0.09339472329902948
208
russia building communication system in far east
russian < prime deputy president of < china 's economic development
0.08945170832001881 0.010818132021139541 0.09294998652141505
209
execution photos make site popular
national council chief s
0.08902776657442629 0.010766861253266842 0.09250946525828038
210
iraqi beheaded for smuggling hashish into saudi arabia
armed defector dies from kuwait # years years of < # years
0.0886078242792639 0.010716074171883508 0.09207309985611868
211
thai bourse down # . # # points
hk bourse up
0.08975677033116095 0.010665763964503772 0.09320577700859387
212
russian ministers ask pensioners for forgiveness over payments
r

< unk > offers a < unk > wimbledon final
dprk unk < < < #
0.09399928450256047 0.010328158400373823 0.09719044951603086
257
afghan suicide attack kills # #
unknown # dies kills #
0.0959529552187668 0.011253532306163886 0.09874569874569869
258
briton faces execution in china on drugs charges
australian publisher makes second first month of china 's economic security
0.09593355574065265 0.011210249489601717 0.09871555794632712
259
former yunlin county prosecutor gets life sentence for corruption
legislator becomes < former china president 's <
0.09604492142747008 0.011167298342131978 0.0988846827927287
260
sioux souls to be given peace on normandy beaches
south carolina 's <oov>
0.09567833775789959 0.01112467506601697 0.09850726033932135
261
weather forecast for major chinese cities
chinese vice leader expresses chinese policy policy
0.09585772484953169 0.011082375921279263 0.09889316429240377
262
ronaldo reports to brazilian police in drug investigation
<oov> police investigate man in in

portuguese president to visit beijing as scheduled
president retains national policy chief china
0.09982980616099965 0.011159035514040674 0.10533242175399031
305
mayor 's proposal to move gypsies to former chicken farm stirs
<oov> s accuses government 's of china rights law
0.09986655308266125 0.011122686864157806 0.1053512448463588
306
hong kong launches new smoking cessation programs
hk exchange development surplus # in asia of year year
0.09954231102719807 0.011086574244468982 0.10500919535010438
307
senegal 's prime minister resigns
mauritian premier minister resigns of election election
0.10014480932715628 0.011590069689201012 0.10574810841801129
308
art deco furniture sets world record
china s the china of
0.09982176155513318 0.011552682367622944 0.1054069854876306
309
beijing hikes grain prices to cut subsidy
world economy develops down in
0.0995007912607437 0.01151553547898107 0.10506805627384401
310
prince frederik linked to blond rock singer
crown 's < 's founder 's second ti

germany to grant india developmental aid
swiss bank bank development <oov> < unk > program in indonesia
0.10304232569595376 0.012561470832147523 0.10807652509333182
356
genocide verdicts in srebrenica killings
military war crimes trial
0.10275449797054606 0.012526382924795156 0.10777463535843423
357
longer life < unk > bring new problems for both poor and rich countries who warns
central health bank chief to focus in asia
0.10246827374221586 0.012491490493249765 0.10747442746049989
358
thai airways domestic flight makes safe emergency landing after landing-gear problem
airport airways carrier makes to
0.10329475075959858 0.012456791908546294 0.10828699849533183
359
caritas says it will continue to fund workers pensions
insurance insurance chief governor 's first in # years
0.10300861571594319 0.012422285559769158 0.10798703451058021
360
haitians see hope despite cholera toll above # # #
philippine official says toll
0.1034146692636892 0.012387969853802944 0.1083793355202195
361
# talib

karzai urges funds for shattered education health
president 's national national secretary system for health in china
0.10375073543138874 0.012276958342553363 0.10848906156787934
405
un inspections move up a gear in iraq as number of experts doubles
un experts sees it has military to to be reserves in afghanistan
0.10411007023376861 0.012246793825741193 0.10866923139650417
406
charles < unk > # # < unk > and friendly curmudgeon
vanuatu county founder < #
0.10483529064986231 0.012216777174207515 0.10938327739798331
407
nba star webber pleads guilty to criminal contempt
chris simon simon chris < unk
0.10457896964582843 0.012186907303365932 0.10911583662194912
408
defiant ahmadinejad sworn in as police break up protests
guinea 's accuses major in china 's city of <
0.10456780142718006 0.01215718313921138 0.10912070314509344
409
arafat chairs first meeting of new cabinet
arafat chief bank chief in central china
0.10466096284185152 0.012127603618191402 0.10926071765487827
410
zambian opposi

spaniard sued for music file-sharing networks
world internet <oov> watchdog office on internet in beijing
0.10087505916246674 0.011209861325028892 0.1057341637210347
456
funds transfer may ease n. korean reactor shutdown
north korea 's < nuclear of <
0.10065480794158799 0.01118538564527992 0.10550330310155645
457
retransmits indian groups march in mexico city
< 's first in
0.10098017873038627 0.011161016613372992 0.10581811072007158
458
years of drought < unk > the east
state leader sees a nation 's national state says it has not not affect the year
0.10089652616792891 0.011136753533778705 0.10574335085204657
459
top two officials at cia 's clandestine service quit amid infighting
<oov> official 's official of of unk > scandal
0.10091868361899872 0.011112595717002611 0.10582385799305546
460
mozambique opposition loses bid to annul elections
s. korea 's < president < unk > president <oov> < > president
0.10070024490986669 0.011088542479519922 0.10559480202337353
461
costa rediscovers fe

bucks tix sell out in # # minutes
baseball west # <oov> list from
0.10088474625845742 0.012357036181114093 0.1060938132296401
507
elders negotiate truce after # # killed in market area
moderate dprk leader becomes of in south 's main military leader
0.10086514772142528 0.012332759096278898 0.10608184110148756
508
lakers # # grizzlies # #
boston beats sports
0.1006673729219715 0.012308577215697959 0.10587383749148464
509
indonesia to send junior shuttlers to sea games
world badminton world championships title at
0.10047037219218291 0.01228448998044219 0.10566664798563047
510
china provides free food aid to mongolia
china 's < of
0.10076242224649505 0.012260496835949139 0.10594854906378354
511
india keeps rates unchanged warns of inflation
india central central korea currency index up
0.10084447823209086 0.012236597231980428 0.10606690796749285
512
former white house press secretary tony snow dies at # #
new york president 's top
0.10064828274914905 0.012212790622579687 0.105860552115416

ferguson wants players fans to stick together in face of media criticism
<oov> makes new china
0.10241902547988732 0.011766322683373808 0.10748606392613558
558
lebanese army helicopter joins battle against militants in palestinian refugee camp
army helicopter military makes first
0.10295042007724466 0.012191739964296355 0.10800841024055319
559
car bomb explodes in jerusalem as israel and palestinians try to
major central korea bank reports
0.10276690774199111 0.012170007807497253 0.1078158818800531
560
nato soldier killed in afghanistan
s. military officer <oov> first of of south korean military exercise
0.10258404847554628 0.012148352989334447 0.10762403867386083
561
sea levels average global temperature to rise
world 's environment on china of economic issues
0.10240183879796982 0.012126775097701525 0.10743287697106535
562
calm returns to caracas after riots _ for now
nicaraguan government defends president of nation of nation city
0.10222027525400179 0.01210527372341482 0.1072423931

johannesburg securities exchange ends weaker
shenzhen bourse stock market on regional bank says
0.10103382816422712 0.011472564983508184 0.10688730503086949
605
bangkok plans bounty for those bringing dogs for neutering
anti-corruption hospital files against chinese university city
0.1008673803418808 0.01145366454696204 0.10671121391879226
606
ex-wife confesses to deadly kuwait wedding blaze
badminton badminton di 's zhou of dies # th th unk > event
0.10070148004526586 0.011434826282904537 0.10653570205379426
607
stocks mixed in early trading
monday 's sports exchange index
0.10053612457721123 0.011416049885067257 0.10636076658244156
608
vazquez loses arbitration hearing stewart settles
orioles unk > wins at the . #
0.10037131125823219 0.011397335049190096 0.10618640467001132
609
chinese fm meets indian brazilian south african mexican counterparts
foreign fm rates on economic issues of
0.10044084617083271 0.011378681473004843 0.10624642224478567
610
king accepts resignation of belgian 

mexico says it has detected large increase in heroin shipments from venezuela
indonesian police drug seize to rise to china in #
0.09831407853375844 0.01102959651780255 0.10386476666964481
655
mexico city law recognizing gay civil unions signed published in official gazette
archbishop rights civil <oov> of of of # in # # # #
0.09839860224047442 0.011012808699662821 0.10414155436986498
656
french and pakistani president hold talks
chirac praises < unk > policy for afghanistan
0.0982490602917807 0.010996071908325947 0.1039832845303971
657
bhutto to take control of largest province
prime leader 's <oov> of party 's <
0.09828965352350788 0.01097938591149996 0.10404227369326013
658
boeing could benefit when air force retires older tankers
military develops war against in asia war in
0.09814072980604803 0.010962750478300717 0.10388463388463398
659
oil prices fall after u.s. government suspends oil purchases
opec petroleum price prices to on # . # million
0.09816035216808293 0.010946165379241

beijing private cars exceed one million
beijing petrochemical company improves
0.09736990999276111 0.010482520880057272 0.1033410994049293
704
asian regional summit in philippines to go ahead despite bombings
asian world issues issues
0.09758609992194983 0.010467673116771072 0.10366686744165508
705
south korea to penalize china over chemical import dumping
south <oov> develops controls of <
0.09768380935157461 0.010452867355644099 0.10375597606856458
706
german court refuses compensation over beer ban
daewoo korea < 's unk > ruling party in # # years
0.09754583786943963 0.010438103418701099 0.10360942807976717
707
myanmar set up industrial zone supervisory committee
<oov> securities <oov> environment development of < unk > < unk > in myanmar
0.09750900110436082 0.010423381128970913 0.10360433720800445
708
polish foreign minister condemns killing of bhutto
interior foreign fm <oov> in south china
0.09757287172654783 0.010408700310479404 0.1036596228497638
709
zimbabwe to host internatio

libya joins italy in trying to reduce tension between ethiopia eritrea
libya libya makes economic of in nuclear deal
0.09721677776008462 0.010168451377300191 0.10310762352595035
752
china denies report on organ transplants
china leader of <oov> china of china leader
0.09725362553493862 0.010154965367515974 0.10330244100138011
753
eu finance ministers discuss worries that slowing us economy will drag down eu
economic bank of financial reform <oov> in <oov> 's stability
0.09712481278588572 0.010141515082260985 0.10316561657621273
754
stocks lower at close in tokyo
xinhua index up lower
0.09732702864198906 0.010128100379771222 0.10335984195111192
755
kazakh finance stable despite outside disturbances
chinese bank ministry 's <oov> on in indonesia
0.09719845925144481 0.010114721119031762 0.10322330319027821
756
cambodian second prime minister to visit myanmar
hun sen minister 's of of state china
0.09723513674583605 0.010101377159771826 0.10327559057770151
757
storm slams mexican coast ; t

little saigon fight divides san jose s vietnamese
<oov> professor 's unk <oov> > unk unk > <oov> unk > contributed < > < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk >
0.09696057834974306 0.009917833147587707 0.10293349626267348
801
dollar seen changing little against mark and yen next week
us 's 's 's
0.0968398304314993 0.009905482172310512 0.10280531009049082
802
tehran stock exchange index up
stock exchange exchange exchange closes lower
0.09713397657938715 0.010141918139757887 0.10329933333664693
803
frankfurt stocks jump # . # # percent
stock index up lower
0.09701331325444382 0.010129319483683653 0.1031710111834337
804
judge rules top us generals can face abu ghraib questioning
former army colonel military of military in iraq

chinese premier calls for more asia-europe cooperation
chinese exchange official says of
0.09563329419445629 0.010090327848852679 0.10171402345698137
851
net properties niche profitable for teamsters
bank bank governor 's china
0.095521180133267 0.010078498625114281 0.10159478075656286
852
forward nowitzki sprains ankle
injury retires at injury # - #
0.09540932863428192 0.010066697104475974 0.10147581731305401
853
greece says ex-king can attend olympics but must not offend democracy
former president <oov> chief chief <oov> < unk > relations
0.09529773877623013 0.010054923189733897 0.101357132146606
854
australian fm arrives in pyongyang in effort to prompt nuclear freeze
s. fm reaffirms < < unk unk > korea
0.0953162123420419 0.010043176784138413 0.1014056134675295
855
at least # # die as shiite groups clash in karbala
major provincial of < city
0.0952049915575121 0.010031457791391462 0.10128728719743904
856
un says iran nuclear probe needs to end soon
dprk chief says agency <oov> on <o

nkorea declares past accords with south dead
south korea war declares military
0.09498895849849352 0.009705012174849556 0.10105157547018029
902
malaysia shares to see positive trend next week
asean bourse rates in china kong
0.09488388221696864 0.009694276541912775 0.1009397927539522
903
ravens defense and special teams again prove to be the catalyst
weaver <oov> wins first title of the #
0.09491715969518194 0.009683564634131656 0.10096637861831248
904
algeria rounds up # # # illegal migrants for deportation reports say
mauritanian mauritanian smugglers in to # , # # # people from the year
0.09504891306669466 0.009842684489774064 0.10096531197524591
905
construction of potala palace square
< 's > china unk unk > in china city 's legacy contributed contributed
0.09494411823420658 0.009831832577436937 0.10085399410096228
906
special teams unit vital to vikings
<oov> 's <oov> is the < unk > in a round at
0.09483955422734071 0.00982100456799042 0.10074292142023435
907
carruth judge questio

kamsky and karpov draw # th game in fide world chess championship
top-seeded challenger world chess chess championship
0.09705876071086779 0.010768535747847228 0.10258704724113543
953
eu urges speedy transition of joint au-un operation for darfur
world council body <oov> in indonesia
0.09695712850069935 0.01075725979418456 0.10247962624925991
954
former rector of st. patrick 's cathedral dies while delivering
archbishop s leader dies
0.09711721518636807 0.010746007430383113 0.10263393626364352
955
israel okays armored vehicles for palestinians
lebanon president declares first first
0.0970157342927564 0.010734778582493476 0.10252669077120502
956
syria lebanon stress to continue consultations
eritrea eritrea military military cooperation on regional economic program
0.09691446525904789 0.010723573176875006 0.10241966917332276
957
afghanistan 's kandahar gets new currency amid hopes it will boost local economy
central central central economy city <oov> financial economic system
0.09692926

china b shares fall as shenzhen index declines # . # %
china s exchange markets exchange exchange policy policy
0.09696386237922643 0.01105905356180845 0.10226238533815818
1002
radical palestinian guerrilla leader won t attend plo meeting in west bank
dprk vice official leader in of of china
0.09711628881111962 0.01104803856822099 0.10244510635446906
1003
gary neville admits headbutt was mistake
edwards wins <oov> title of the
0.09701965568792448 0.011037045495018782 0.10234317092526064
1004
north korea says uranium enrichment in final stage
dprk develops < on <oov> development program
0.09692321467829434 0.011026074276832878 0.10224143815098105
1005
goldfields reports rise in final earnings
south china produces up in indonesia #
0.09696882930409259 0.011015124848553997 0.10228177152208945
1006
big bonus for angels first-round pick
anaheim unk > < # <oov> title title
0.09687263006867187 0.011004197145331225 0.10218030151065881
1007
earnhardt believes tough times will pass
<oov> of pres

us shuttle discovery lifts off on the way to space station
first first satellite space launched in beijing
0.09551089063517089 0.010711994988123339 0.10057247543002544
1052
clinton expresses frustration at mideast impasse
clinton sees economic of china policy says china is little balanced
0.09550652460127518 0.010701831805022651 0.10057193228445616
1053
south korean government seeks to lower interest rates
s. korea 's foreign minister currency to strengthen economic ties
0.09551078381966259 0.010691687888619787 0.10057139016854671
1054
bay state public school buildings < unk > in nation
s. national elections passes passes
0.09542033800165155 0.010681563184179807 0.10047615210967498
1055
religious violence erupts again in central nigeria
religious < in unk >
0.09570849283797922 0.010671457637174905 0.10075952377276895
1056
internet tv may not be a cable killer
new <oov> > launches <oov> unk > software for chinese cities
0.09561803112452177 0.010661371193283435 0.10066428792799317
1057
a

british high commissioner ends tour of duty
s. china bank first second time of china 's economic zone
0.09531848311218698 0.010543726856295444 0.1005346622753696
1102
sand or bleach rust from < unk >
china develops tree on a of of unk > 's <oov> > < unk > contributed contributed reporting from china < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed < unk > contributed <
0.09526949636426113 0.0105530471520174 0.10070239744152804
1103
# pakistani policemen killed in attack on post
gunmen officer police shot of
0.09518327962546995 0.010543496883101546 0.10061126405017824
1104
fire kills four in tokyo
explosion at south central city 's city
0.09509721879398218 0.0105339638841114 0.1005

cheney steers rally to the right
clinton declares clinton of on on nation of the america
0.09480771470711395 0.01027350693316215 0.09988771244201322
1150
dollar dips gold higher
currency dollar down in
0.0949424302325418 0.010264588958393779 0.10001801824718509
1151
taiwan china to cooperate on solar cell industry development
coastal development <oov> china on < of development ties
0.09514918730374804 0.010255686452792397 0.1002565108592864
1152
safrica 's safety minister admits to corrupt police
s. s. korea minister chief says on <
0.0951750545591174 0.01024679937614353 0.10029342648493446
1153
nadal wins # th barcelona open title
nadal loses # title at masters title
0.09546370942839214 0.010237927688371978 0.1006394927823501
1154
regional land reform meeting ends with call for < unk > land
southern bank province urged for to more more poverty
0.09547724524299656 0.010229071349541206 0.10066056588547954
1155
arab league chief rejects outside pressure in iraqi government setup
arab al 

nkorea 's kim jong-il a no-show at his own anniversary
china 's top supreme chief of <oov> dies in china
0.09534547049468657 0.010234365650904496 0.10048017021372575
1200
e-readers with color open door for pictures
scholars develops most asian in chinese cities
0.0952661481398657 0.01022585120360757 0.10039657606213363
1201
two armed hijackers take colombian plane with # # onboard
man becomes of in china in central korea
0.09518695765928394 0.010217350911667747 0.10031312088668715
1202
ryanair profits climb despite soaring fuel costs
ryanair airways s profit falls
0.0952740116811616 0.010208864739814203 0.10039591729791082
1203
palestinian militants promise to maintain calm during wednesday 's election
fatah issues control on of control in s. africa
0.09519494611130172 0.010200392652893195 0.10031260118397065
1204
u.s. dollar up on taipei forex
u.s. securities ends higher
0.0954476866203305 0.010399231464955472 0.1005610981979143
1205
taliban wish to live in peace with all countries :


burglars break # # safe deposit boxes at malaysian bank branch
weihai discovers huge huge in of of of the # years
0.09489638015474698 0.010228050312260834 0.1001316772116774
1249
south korea defeats holland china breezes past hong kong in uber cup semifinals
guangdong <oov> < of unk > wins hong 's # st # st # st round
0.0948704837677328 0.010219874412730649 0.10011824927892093
1250
biotech counterattack on resistant bacteria
indonesian unk < < of chinese of unk > industries
0.09479470862095346 0.010211711573742845 0.10003828262614224
1251
putting energy hogs in the home on a strict low-power diet
<oov> china s slalom is
0.09471905442412908 0.010203561764027168 0.09995844361367126
1252
leader of lebanon s hezbollah says he expects israeli prime and defense ministers to resign
coup 's ruling party to resign
0.09490933694319542 0.010354914585586955 0.10014454799143813
1253
agnew 's < unk > words launched baltimore riots
s. korean chief of official president dies at china 's <
0.0949785870

car bomb kills five near iraq oil city
major casualties reported reported
0.09434935429014729 0.010169409461374936 0.09964745928949181
1298
viking ship to retrace leif ericsson 's # , # # # -year-old voyage to america
dprk <oov> of of china of the unk > classic
0.09427677786377026 0.01016158683871234 0.09957080739773067
1299
dortmund 's evanilson out for the season
cottbus champ di < retires in the of the year
0.09428117695841762 0.010153776241603414 0.09957967773109991
1300
venus williams puts u.s. ahead of belgium in fed cup
results <oov> second of of the
0.09433677257263287 0.010145977642339508 0.09965680547477801
1301
workers demand better living standards in belarus
president president praises economic of of <
0.09426437290066615 0.010138191013297038 0.09958032289191171
1302
ac milan s kaka has successful arthroscopic knee surgery
filippo inzaghi defends to unk > in the round of #
0.0941920842711411 0.010130416326937147 0.09950395761362037
1303
tel aviv stock exchange index closes

lists for fifth inter-korean family reunion exchanged
taiwan korea dprk military official of <oov> to the article
0.09543042323849597 0.01060790429230989 0.10055448628465694
1348
lawyers for marcos victims to ask swiss banks for money
dprk businessman becomes bank president in # #
0.09535973403609709 0.010600046585426697 0.10048000148000164
1349
irish pm 's party of
0.09528914948092602 0.010592200510974124 0.10040562694152642
1350
liberian president to ask nigeria to hand over charles taylor to war crimes court
un <oov> <oov> chief in <oov> of democracy
0.09521866934077741 0.010584366043140562 0.10033136242455784
1351
thai reinforcements close border with cambodia
armed patrol military exercise on south of south korea
0.09514829338413235 0.010576543156190717 0.10025720768514575
1352
harvard hopes others follow decision to end early admissions
china s economic economic
0.09507802138015588 0.010568731824465319 0.1001831624800607
1353
china 's central bank signals further changes to yuan 

african bloc demands reparations from colonial powers
african african < commission on the <
0.09458213772454836 0.010379158004525064 0.09963722694409399
1397
senate parliamentarian loses job
senate senator concedes to in senate of senate race of senate campaign
0.09457409712098068 0.010371739020962145 0.09965535616000244
1398
two germans to go on trial for providing iraq with technology to make long-range cannons
army korean <oov> <oov> > unk unk > facilities unk > facilities
0.09450654419446569 0.010364330635947171 0.0995841737627453
1399
nato links karadzic to criminal rings
un accuses prison of uzbekistan s torture case
0.09443908770324907 0.010356932826785181 0.09951309298204383
1400
china busts terror cell it blames for xinjiang attacks
police tighten security on < in indonesia
0.09437172744097858 0.010349545570845962 0.09944211360045892
1401
sensitive taiwanese government e-mail addresses for sale in rival china lawmaker says
senator says < china of china unk > interests policy
0

manila to resume negotiations with abu sayyaf
vanuatu chief says <oov> of # # # # # in china
0.09489481040585607 0.011059413773683904 0.10000716556310488
1447
unique coral reef spurs mexico tourism battle
largest coastal of mexico becomes largest of > 's <
0.09489833365609357 0.011051781328015386 0.10002441389604959
1448
bush expected to discuss stalled hemisphere trade talks with brazilian counterpart
clinton makes central central economic crisis in # years
0.0948328865294342 0.011044159409858133 0.09995543154163852
1449
west sacramento to control port
world 's bank bank <oov> <oov> in the elections
0.09476752961246009 0.011036547997446102 0.09988654426972836
1450
clinton says nkorea-us talks are positive
dprk <oov> china policy on < unk unk > says says it has not not a balanced
0.09474277479138522 0.011028947069073205 0.09986694511487909
1451
united states defeats south korea at volleyball world cup
lee becomes first of # world title title
0.09476359875918193 0.011021356603093113 0.0

lloyds banking group says returns to profit in first half
cadbury retains full-year in in # china
0.09565389984374789 0.010983997022315646 0.1009275451403537
1498
indonesian police say more bombs likely in jakarta
chinese official 's most dead of # # years
0.09559013057718539 0.010976674357634103 0.10086026011026013
1499
putin rejects negotiations with chechen separatists
president hails israel on currency policy policy
0.09552644627966561 0.01096936145000077 0.10079306473377095
1500
eu commission insists eu newcomers must not come off badly in new budget deal
eu eu commissioner rules to to <oov> # . # <oov> # dollars for year of year
0.09554606915164986 0.010962058279927533 0.10079253672795618
1501
de clarin de buenos aires
de clarin de buenos la
0.0960147677084352 0.01145376682398613 0.10122447782128423
1502
malaysia new zealand sign free trade deal
australia s. asian financial bank <oov> to in south korea
0.09595092810224608 0.011446151287534012 0.10115717431209453
1503
overseas inv

dprk accuses u.s. of impeding korean reunification
dprk korea <oov> <oov> of china dprk
0.09532457353669892 0.011285783216990202 0.10051438769966853
1548
leaders and stars jet to south africa for world cup final
fifa international championship in
0.09526307381183653 0.011278502066527628 0.10044953970760422
1549
krajicek fights into milan indoor quarter-finals
top-seeded top-seeded round round
0.0952016533902944 0.011271230305040505 0.10038477533641943
1550
lyondell buys occidental unit for dlrs # # # million
sumitomo discovers < < > to
0.09514031211878005 0.01126396791438004 0.10032009442447587
1551
un chief ban arrives in new delhi for talks
chief general general chief starts to china in nepal
0.09522214206733345 0.011256714876444187 0.10043947252575712
1552
futuristic stadium gets the go-ahead
< world 's # in
0.09516086655763761 0.011249471173177492 0.10037483966055394
1553
hamburg sv thwarts moenchengladbach
top-seeded ajax wins second second
0.09509966985888672 0.011242236786570948

standard & amp ; poor 's to provide financial services to china market
computer technology software unit in indonesia in china
0.09508719229068116 0.010988548876948641 0.10020579767454764
1599
nhra plans to add two texas races in ' # #
ohio state system becomes no .
0.09502779991573383 0.010981685323621378 0.10014320816944174
1600
zhengzhou airport opens to traffic
shenzhen bank bank to promote cooperation in central korea
0.09503783943583083 0.01097483033902486 0.10015872426921113
1601
a-rod to meet with texas rangers
dodgers s sports # for the year
0.09497855195021895 0.010967983907122785 0.10009624222038442
1602
criville fractures wrist in australian gp qualifying
dprk retains first world 's second title of china tour
0.09491933838915273 0.010961146011918844 0.10003383807934926
1603
human rights activists sees troubled times ahead for women with < unk > pakistan
islamic group becomes in
0.09486019861445544 0.010954316637456588 0.09997151170048363
1604
toyota plans joint auto venture

bortolami wins second stage of tour
former world cup slalom results in #
0.09425595024872732 0.010857582749364337 0.09904508622690435
1649
nigeria oil firm insolvency raises corruption questions
nigeria bank state state of state state governor 's <
0.09425942938243494 0.01085100638186018 0.09907162290566282
1650
italy holds out on eu-south korea free-trade deal
argentina government proposes economic development in north korea
0.09427803747602911 0.010844437976060023 0.09908731804918239
1651
jamaica to build solar energy plant ease costs
coastal develops <oov> energy of < unk > system
0.09428822082366074 0.010837877517514312 0.09909459197117994
1652
china 's diplomatic news summary
china china launches first
0.09438236337455333 0.010831324991808439 0.09923621152460325
1653
drive-by shooting leaves three injured as hell 's angels bandidos clash
philippine fugitive player shot dies at least #
0.09432533475620013 0.010824780384562633 0.09917625006748869
1654
boeing makes pledge to twa memo

roddick into # rd rd at wimbledon
defending wins first #
0.09575053174401395 0.010721868907749649 0.10083049439266287
1696
president weizman to visit cairo for talks with mubarak
lebanon vice <oov> bank of china 's economic peace
0.09569414156041912 0.010715554497321056 0.10077111247605941
1697
gonzalez can slug and steal as well
baseball president defends a of his role of his politics
0.09563781775726408 0.010709247519983022 0.10071180046165326
1698
getting rid of glasses for good
chinese develops baseball world on <
0.09558156021740687 0.010702947962618326 0.10065255822608758
1699
venus joins serena in final
top-seeded davenport <oov> round
0.095525368823981 0.010696655812140597 0.10059338564629564
1700
photo budget likely hurt us shuttle safety analysis
us national supercomputer launches launches
0.09558675227355562 0.010690371055494216 0.10068116861595117
1701
pennekamp aims for blue < unk > glory
flower < wins first round in world tour race
0.09559586816247961 0.010684093679654231

arctic mosque plan on ice over saudi funding
military bank becomes uae 's first government says government says of state is a own image from
0.09516100475226937 0.010586726695163797 0.10031270167217497
1746
china 's wen says india nuclear development must follow international rules
chinese vice-premier <oov> state hails china of < unk > program
0.09515857230727899 0.010580670215361073 0.10030732203215033
1747
internet leader google prime stop for presidential hopefuls
chinese 's chairman resigns of china economic industry
0.09510416489029369 0.010574620661207065 0.10024997079027946
1748
obama vows we will not rest until oil spill cleaned up
president 's major major of < unk unk > < > > contributed unk > < > > contributed
0.09504981965321352 0.010568578020829231 0.10019268509268502
1749
norwegian wins world cup nordic
veerpalu svindal champion 's world 's slalom at china # # # m cup qualifier
0.09507168345314505 0.010562542282382155 0.10023064853161932
1750
jal to recruit # # # vietname

prison guards call in sick en masse to protest planned cuts
provincial national national company <oov> <oov> # , # # # jobs in from state city
0.09566632784130792 0.01068282233637805 0.10085105489946883
1796
israeli army closes rafah crossing in gaza strip
lebanon bank seizes seizes <oov> unk > on < of year in iraq
0.0956559033413534 0.010676880833410098 0.10084131200649545
1797
survey shows china manufacturing gaining in sept
chinese exchange exchange up
0.09560273163299245 0.010670945935781743 0.10078525791421836
1798
drawing back on line-item veto clinton cuts
clinton releases <oov> on # of # of # # # million yuan in central america
0.0956190634487519 0.010665017632484087 0.10077977115477106
1799
police arrest more than # # # pro-democracy protesters in katmandu
chinese women s <oov> police of of president s # # th anniversary
0.09569410518518748 0.010705366502945412 0.10083486289760572
1800
astronaut back in texas after attempted murder charge
john john <oov> jr. <oov> at at age at

uncertainty on currency market as romania 's currency falls
economic exchange exchange in indonesia kong says
0.09616041643002853 0.0109567705336609 0.10134866638116898
1845
rey wins hamburg marathon
del del defends champion wins title at
0.09618569890237672 0.010950838335212789 0.10138403075598516
1846
det-digest- # # # # the detroit news
a # of daily 's daily daily daily daily daily daily daily daily daily daily #
0.09620129105665032 0.010944912556892868 0.10143739437570594
1847
russians edges rising chinese stars in figure skating
dorfmeister beaten < unk >
0.09614926223509454 0.01093899318828449 0.10138253369729831
1848
hang seng china enterprises index up
shenzhen stock exchange index up
0.09631350587712963 0.01106821535412866 0.10154394854394842
1849
unheralded u.s. baseball team keeps victory string intact
us 's first team team of a world s # # # m of world 's largest championship
0.09632150026137272 0.011062235767227455 0.10157913099566247
1850
bhp billiton posts record half-ye

basketball star shaquille o'neal to become father for sixth time
chris carter # to the the year at
0.0968479045115142 0.011130180264642086 0.10186398663455615
1895
western envoy reiterates decision to sack serb president
<oov> commission president 's <oov> of president president
0.09686274483596781 0.01112431301094433 0.10191571885035237
1896
israel to renew pinpoint attacks against gaza rocket launchers but maintain truce
lebanon intensifies military system of the of north bank
0.09681171072383084 0.011118451939811062 0.10186202247582636
1897
jordan 's stock market index down
shenzhen stock market closed closed
0.09697136753756236 0.011244245277388834 0.10207167912539149
1898
indonesians return to rumbling volcano
villagers indonesian of of <oov> in south korea
0.09692032997570049 0.01123832725355863 0.1020179571890097
1899
tennis star 's father remains jailed in tax evasion case
former dictator boxer wins at court 's first round of congress
0.09691716782995266 0.011232415455950235 0.

man hospitalized in puerto rico with symptoms that could be sars health officials say
former national chief <oov> resigns after death in # years
0.09647072581564052 0.011139682313340902 0.10165258830469506
1945
us investigation says figures in administration s september benchmark report were unreliable
u.s. chief says <oov> of of china
0.09654958009103054 0.011133960853498406 0.10174712435883455
1946
china says military budget to jump # # . # percent
china bank bank of china #
0.09667113232575451 0.011128245267844658 0.10195156628678176
1947
australian shares close down # . # # percent
seoul stock market declines in asia-pacific bank
0.09662153195001016 0.011122535547337812 0.1018992566067988
1948
tokyo stocks close # . # # percent lower on profit-taking
indonesian stocks rise lower in thin trade concerns
0.09670018757465118 0.011116831682954562 0.1019752057059748
1949
australian < unk > speeding at < unk > in makeshift car
australia champion <oov> # in china # , # # # in china
0.09669

princess anne meets british troops in afghanistan
queen world prince crown 's #
0.09661478886439007 0.010971965933859528 0.10171995812025857
1995
pataki proposes extending and easing < unk > program
gop passes overhaul to curb environment control in china
0.09656640890001131 0.010966471709556143 0.10166902173662298
1996
atomic agency chief says world must not tolerate north korean nuclear moves
china human official chief of <oov> < unk > in asia
0.09656357740952537 0.010960982984976785 0.10166363638585846
1997
asian stocks up despite signs of deepening slump
asian securities index up # #
0.09668202150953728 0.010955499751867742 0.10181287918906713
1998
record number of treaty signs expected during millennium summit
world commission committee ends < unk > policy in # # years
0.09663368049878251 0.010950022001991808 0.1017619727494726
1999
impoverished afghanistan pledges aid for u.s. hurricane victims
un official sees first . in indonesia # years in iraq
0.09658538780488007 0.0109445497

u.s. rules force western union to block muslims money transfers
national security body says it
0.0961664939940619 0.010748266419791156 0.1012927254129598
2045
daniel sedin helps canucks beat stars # - #
cilic cilic upset in at least at
0.09611951475908678 0.010743015678990086 0.10124324191251381
2046
hingis out of sydney international ; baghdatis advances
sorenstam <oov> out at the round at the # #
0.09612140952727082 0.01073777006586558 0.10126356120008442
2047
downtown new york city office vacancies fall for the first time since sept. # #
hong kong banking chief company 's for for $ . # billion in a
0.09614958168164801 0.010732529572910057 0.10129548072447025
2048
one u.s. supply warship and a british frigate cross egypt 's suez canal
small military military becomes makes a of on central china
0.09615145993448623 0.010727294192630589 0.10130026883685399
2049
american man imprisoned in afghanistan for running private jail freed leaves country
man american of official in hospital of ho

queen elizabeth ii cancels visit to belgium
s. elizabeth 's queen 's iraq of
0.09596761782954301 0.01070315373938447 0.10089065541117045
2095
intel produces trimmer chip lowers costs
taiwan maker develops maker # . <oov>
0.09592185358641972 0.010698049708035217 0.10084254351064056
2096
un security council holds closed-door consultations on kosovo
un committee chief recommends of < unk > proposal to protect development development
0.09591279792547437 0.010692950542302121 0.10083419784325387
2097
taiwan wins big in international badminton championship
taekwondo taekwondo gold <oov> in #
0.09594650629552734 0.01068785623523099 0.10088144215109414
2098
palestinian arsenal worries israel
china army airlines seizes another of of < unk > in south korea
0.09590081748300566 0.01068276677988088 0.10083340336911743
2099
thai rice exports up in may
thailand develops < of < of war war
0.09585517216292808 0.010677682169324059 0.10078541031658572
2100
vietnam resumes work on its first oil refinery af

# # # hostages seized in colombia
foreigners reports of military
0.09626400967155635 0.010751322635730656 0.10121132233201179
2145
russia to seek extradition of citizens held at us base
russia court court of china
0.09640547962513271 0.010746315033198875 0.10139706461317992
2146
students can now prepare for sat with cd-roms
chinese unk develops < on chinese economic system
0.09636059811692733 0.010741312093239285 0.10134985927583673
2147
man runs onto court in french open final
del defending < at at world # # # # at french # # # #
0.09634484167294552 0.010736313809342944 0.1013691739727103
2148
norway threatens to suspend aid to zambia
vanuatu development <oov> chief of indonesia 's affairs says
0.09630003011867903 0.010731320175013017 0.10132202551969974
2149
elderly in low medium income families to get denture subsidies
chinese ministry issues # <oov> #
0.09625526023019987 0.010726331183764754 0.10127492090532517
2150
opec ministers ready to trim excess oil production
opec says econo

at least # # # die in china mine blast
earthquake reports reports china # 's largest of province 's province
0.0966397868241724 0.01076400949133749 0.101507632356517
2196
mugabe launches marketing campaign to reverse negative image of zimbabwe
south bank <oov> economic
0.09659581967821054 0.010759112307765453 0.10146145054015825
2197
despite moi 's support lawmakers reject anti-corruption bill
indonesia government passes # <oov> # . # # billion
0.09655189252055787 0.010754219578203031 0.101415310726361
2198
caterpillar agrees to buy bucyrus for $ # . # billion
< world unk > on economic of development in indonesia
0.09650800529668489 0.010749331296576575 0.10136921285784901
2199
liberal group jeers hillary clinton
hillary senator <oov> obama to to in the #
0.09651464005625529 0.010744447456823474 0.10137994924455604
2200
pelosi calls for us-chinese climate cooperation
chinese house speaker stresses policy policy in tibet
0.09647080961117979 0.010739568052892128 0.10133390930393635
2201


hong kong gold closes higher
hong kong stock markets close higher
0.09625204654620657 0.010857255065784305 0.10104850833460437
2243
sri lanka tigers seek india 's help for separation again
sri lanka military minister wins # rd # years
0.09630815798303331 0.010908098159296206 0.10111485643779608
2244
five communist guerrillas killed in philippine clashes
s. central dprk korea military forces on the year
0.09626527812640685 0.010903241481576127 0.10106983646609627
2245
israeli missiles miss leader of uprising ; aide injured
israeli military chief <oov>
0.09633369589315077 0.010898389126666658 0.10113611602263116
2246
jackson s moves expressed as much as his music
<oov> s < <oov> of <
0.09636498280185785 0.010893541088798924 0.10120233661158906
2247
wall street up at start of business
stock stock markets soar on wall trading
0.09638565517182462 0.01088869736221431 0.10123144480636677
2248
remains removed from mountaintop
former china becomes executive chief of
0.09634281710285937 0.010883

ahmadinejad un visit off because of late visa
indonesian leader 's top #
0.09610708077221447 0.010706230526884809 0.10086829861044497
2291
adb assures of its support to manage bangladesh 's food situation
asian <oov> official defends china
0.09606516752285894 0.01070156143376362 0.1008243089468556
2292
calm at latest israeli embassy protest in london
anti-war activists denounce in philippines in
0.09609594411359294 0.010696896411342626 0.1008675415933478
2293
haeggman takes early lead in madeira islands
vanuatu eritrea takes < to unk > to the # # # # #
0.09608519580305482 0.010692235454300645 0.10087200502233158
2294
u.s. bonds little changed in asia ahead of fed meeting
economic growth economic outlook shows improved growth says china says says it will remain in the china says it says not remain not remain
0.09606149435308252 0.010687578557325775 0.10086157430451831
2295
seeking answers sept. # # widow turns down compensation
< leader s china s leader 's < of history unk > contributed

peruvian president recognizes # # -year-old girl as daughter
paraguay s ruling presidential leader in #
0.09594178181891276 0.010704360759463159 0.10083879083879059
2339
huckabee to spend caucus eve trading jokes with jay leno on tonight show
the dole is < < unk > 's nomination for the of the <
0.09590079857165992 0.010699788200403158 0.10079571574659119
2340
judge 's office still waiting for government to deliver davidian
state issues system for <oov> of china in indonesia
0.09590729315429845 0.010695219546175829 0.10080012027065802
2341
argentine soccer chief charged with fraud
fifa 's former executive of chief president 's president
0.09591378219311912 0.010690654791781388 0.10081807034431849
2342
tandycrafts reports loss for fiscal year
biogen posts maker sees <oov> on economic reforms
0.09587286334406062 0.01068609393222858 0.10077505922215794
2343
bomb defused outside office of < unk > sinn fein party
n. ireland military raid in n. china province
0.09583197939380729 0.01068153696

johnson out of lions tour
<oov> 's < hero title title of < unk > event
0.09512095313790014 0.010484807106380825 0.10001315251419873
2388
chinese firm signs # . # bln dlr financing deal with embraer
taiwan president < to to invest
0.09508115357591776 0.010480420157800749 0.09997130600687061
2389
eurozone data to confirm inflation upturn on rising oil prices
asian economy economic report of says of stability in china
0.09504138730507881 0.010476036878771975 0.09992949450289451
2390
imf warns delays loan money to maldives over fiscal issues
world world 's development system system to boost economic development
0.09504346030369709 0.01047165726469222 0.09994744084417972
2391
united states topple puerto rico # # - # # in olympic tune-up
us <oov> # baseball baseball team winner in beijing
0.09509660646413108 0.0104672813109669 0.10001014563279478
2392
ruling halts double-amputee sprinter s olympic bid
wbc president winner <oov> of
0.0950568835708712 0.010462909013009102 0.09996837030045025
2

political chaos deepens ukraine 's financial gloom
economic financial financial financial <oov> crisis says <oov> is n't be remain
0.0953107576930709 0.010442542437595601 0.10037407219523682
2437
u.s. military chief visits philippines reiterating aid
philippines philippine military committee resigns for indonesia
0.09538882392022187 0.010438260952381335 0.10039149042839053
2438
court oks suing agencies that spread credit errors
us court of < unk
0.09543169735304144 0.010433982976581178 0.10043231358805102
2439
buddhist culture fair opens in central china
< unk > < > central city in beijing city
0.09547453565809959 0.010429708505882047 0.10044969369016862
2440
dollar rises against yen in asian trading
us consumer currency prices up on # nd
0.09543543879665074 0.010425437535977918 0.10040855949946831
2441
jones nets season-high # # as sun top fever # # - # #
wednesday <oov> < unk > < unk > < unk > #
0.09543048500808614 0.010421170062569823 0.10043568111517326
2442
arabs sail to gaza defy

starbucks alliance seeks to secure premium coffee supply in latin america
< america unk > <oov> banking of commerce .
0.0956921753908932 0.010422342319784866 0.10098746247720888
2486
u.s. stocks expected to gain as blizzard impedes trading
us stock securities index closes down #
0.09565371390560747 0.010418153275444117 0.10094687266110068
2487
patrons at movie theater survived storm 's wrath
village village of china in china in china province
0.09561528332549273 0.010413967597149442 0.10090631546035295
2488
taiwanese yeh chang-ting extends lead after second round
lee takes world title 's
0.0955768836133138 0.010409785280845366 0.10086579083567009
2489
israel raids hezbollah targets in south lebanon
israel army reports of #
0.09561880377244134 0.010405606322482924 0.10090558778836552
2490
chad extends state of emergency
s. < leader declares second article of <
0.09563059397959525 0.010401430718019647 0.10092242228076871
2491
the abcs of keeping christmas simple
a oldest of the 's world 

district prosecutor adjourns criminal case against < unk >
military leader says of <oov> in south korea
0.09596604099587201 0.010269197199433453 0.10099311969879314
2537
family in us sues conagra says relative died after eating tainted peanut butter
< unk becomes founder in indonesia s company industry
0.09597200597031677 0.010265152616054393 0.10099710472888858
2538
somali pirates gather at coastal town local official warns
gulf <oov> oil first <oov> in gulf
0.09593422171599775 0.010261111217386655 0.10095734208923153
2539
mbeki to attend burundi peace summit in tanzania
s. african president <oov> economic ties
0.09589646720135155 0.01025707299967025 0.10091761074641799
2540
options and hurdles in speeding vaccines
< unk > becomes < in indonesia in indonesia
0.0959024525058007 0.010253037959151104 0.10094347583529299
2541
british spirits group allied domecq faces lawsuit as russian government takes vodka fight to united states
chinese publisher reports company wins in in #
0.095864740

biotech corn story clarification
burma < 's largest largest < > panel on the < unk > panel
0.09573350425603105 0.01019039656170244 0.10072064077087232
2587
pakistani premier to visit tajikistan for regional economic summit
pakistani pm stresses regional of #
0.09582527707529613 0.010186460525950528 0.10081048731106639
2588
u.s. businessman detained in shanghai
scholar vice president kissinger in beijing china
0.09584343609683353 0.010182527529608461 0.10082672154100697
2589
china to complete beijing-based nuclear reactor next year
east develops develops regional
0.09580644519135424 0.010178597569157049 0.10078780732968276
2590
new clashes reported in kurdish enclave
lebanon <oov> military official <oov> of the unk > of democracy 's democracy
0.0957694828282403 0.01017467064108253 0.10074892314475618
2591
blast victims families compensated in n. china
# china 's national of of national national park
0.09577539938369069 0.010170746741876558 0.10077434456531999
2592
china to step up lunar

jackson could face new charges over manhandled accusation
hong court 's mayor of # .
0.09566391371711788 0.01007688521110577 0.10046498502129877
2636
us preps first cut on humvee replacement contract
fed military development of develops development system
0.095627649913586 0.010073065315271386 0.10042690125138926
2637
india wants to reopen consulate in pakistan
pak issues on indian in in philippines
0.09564554665210193 0.010069248314394057 0.1004520015793223
2638
australian pm defends next year 's budget
australian cabinet pm hails cabinet on of of china #
0.09568507485412764 0.010065434205184059 0.10050864854842104
2639
two killed nine injured in train blast in ethiopia
unknown <oov> causes reported of central china 's main province
0.09564884423131276 0.010061622984356653 0.10047059150618384
2640
us stocks flat as traders await fed decision amid tumbling dollar and soaring oil
philippine stock markets index on on economy 's economic economy
0.09561264103516161 0.010057814648632066 0.

tuvalu fears it will be the next atlantis
fiji chief leader 's chief in china city
0.09525975950661449 0.010004566472351964 0.10023746567289532
2686
manninen wins world cup nordic combined opener
ahonen wins second straight round at # st event
0.09526565658682448 0.010000844535420286 0.10024151092789464
2687
sichuan company places orders for taiwan tft-lcd panels
tsmc manufacturer makes china to to in myanmar
0.09523022867437123 0.009997125366757058 0.10020423256756444
2688
bank <oov> agency reports in terror of terrorist threat
0.09527743759390574 0.01003987736476198 0.10024959241501971
2689
hk shares end up modestly
hk stock index rates for regional election
0.09529511864379918 0.010036146455299044 0.10026542576709781
2690
shenzhen stock indices up june # #
shenzhen stock index up aug. #
0.09550736661854765 0.01010671252273764 0.10047582741676332
2691
bowater predicts lower # q earnings
vanuatu moo-hyun says it has # than
0.09552494915706924 0.010102959565989502 0.10049156500140514
2

rocker 's chances diminished
james ucla <oov> wins at the of the round
0.09541942916670298 0.010040628007675068 0.10044882344553023
2732
us court accepts complaint against bin laden could transmit via television
us court chief 's first office of the article
0.09546580912027121 0.010082676058879282 0.10049336382547781
2733
indonesia 's widjaja takes volvo open title
lu retains motorcycle wins titles title in indonesia
0.09552231156666233 0.01007898952284313 0.10050232420433504
2734
u.s. justice department finds no antitrust violations in orbitz air travel service
justice department agency <oov> < unk > rules to china s ruling group
0.09557174392017204 0.010105763771311876 0.10054993637778713
2735
connecticut discovers # # school bus drivers have < unk >
china of man becomes national city in # years years of china 's # years
0.09558554062803216 0.010102071493719143 0.10054973545108717
2736
u.n. agencies meet to speed further emergency supplies to indonesia after earthquake
indonesian mil

india billionaire mukesh ambani wins family gas feud
<oov> <oov> ruling ruling party in iran of china of state state says
0.09548223011407964 0.00996776096630197 0.10043867578901702
2782
former sfor officer arrested with stolen explosives
swiss army executive dies dies
0.09544793333602142 0.00996418059239166 0.10040259867846063
2783
lyon competes to host # # # # olympics
southern world 's soccer chief of french president
0.09541366118760633 0.009960602789665486 0.10036654747606262
2784
un climate chief says biofuels necessary in future
china economic council 's <oov> in china 's province
0.09541929559174253 0.009957027555354768 0.10038179894604865
2785
stocks up for # th straight day on signs of growth
economic index rates sharply in the #
0.09543631670665657 0.009953454886694791 0.10039703947131279
2786
explosion rocks belfast city hall
man of man of police
0.09540208560310325 0.009949884780924814 0.10036102905543355
2787
at sundance festival films take diversity to a new frontier
a w

colombo stocks close mixed
stock exchange rates in malaysia
0.09518840342063131 0.00992189718875308 0.10028004394835889
2830
oil gasoline break out to # # -month highs
opec price price rises on the # # # dollars
0.09522541316518617 0.00995762784338668 0.10029507823469674
2831
henry aims to score more goals after poor debut season at fc barcelona
del del < < unk > # at la de # # # # at french # # # #
0.0952094493765645 0.009954112972986614 0.10029889610722637
2832
villarreal aims for the impossible in champions league
pires advances # in
0.09526406848405337 0.00995060058308789 0.10035171936195211
2833
lebanon blames israel for violence fears retaliation
israeli pm warns on attack on palestinians
0.09523046563802724 0.009947090671065636 0.1003163219300784
2834
china 's tax revenue close to # trillion yuan in first three quarters
china securities sector in in china
0.095314422925649 0.009943583234298687 0.10045725411557556
2835
zambia to export aids drugs
anti-corruption dictator suharto 

stock futures inch higher as rally continues
economic economic gains from
0.09492562900422565 0.009875957525657219 0.10016467873982607
2880
court upholds life sentence of u.s. serviceman
marine marine files from murder in south china
0.09489269158958157 0.009872530753441515 0.10012992347308775
2881
dominican republic wins little league opener
puerto american team wins team of the # nd event
0.09489446311521821 0.009869106358452461 0.1001337324178113
2882
u.s. police officer accused of using taser on partner during fight about soda
police man defends a chinese in in beijing 's ruling of <
0.09491934945486852 0.009865684338217214 0.10016205573591129
2883
bharti mtn extend talks to forge global telecom giant
indian telecom <oov> <oov> <oov> on < unk unk > bank in india
0.09491311157877431 0.009862264690266359 0.10016199956407909
2884
bekele sets new # # # # m world record
world emir 's second
0.09496684923934993 0.009858847412133904 0.10021391848314906
2885
argentina edges brazil for narr

army investigator warned of problems at iraq prison
national <oov> official military says on a unk > of military in south china
0.0946339530405178 0.009748253202942455 0.09993374161881763
2928
greece russia agree to strengthen military cooperation
rok kyrgyzstan military cooperation
0.0947723032271934 0.009858691796843611 0.10007028300393066
2929
central china becomes leading livestock producer
central <oov> bank > < > on
0.09478870883607432 0.009855328203600062 0.10009300439037308
2930
china spends # # # mln dollars on free education for rural students
china 's china <
0.09484164583851767 0.009851966904758452 0.10017255429792525
2931
turkey s human rights situation < unk > european mps say ahead of critical eu report
china council chief body commission ends in china
0.09480930978470298 0.009848607897971968 0.10013840068241282
2932
tech stocks fall sharply on wall street
china index up on the year
0.09483380104471728 0.009845251180896995 0.10016107561969445
2933
baseball owners expecte

dalian to carry out new medical care measures
china chi national city < unk > improves system for <
0.09456252571951498 0.009903009048574423 0.09988842966043895
2979
mongolian president arrives in beijing for olympics
maldivian vice-premier leader speaks in # nd time in #
0.0945643497632186 0.009899687005954975 0.0998968535350916
2980
protest strike against killing of communists cripples indian state
military <oov> chief takes india in pakistan 's <
0.09453263804297608 0.009896367191398987 0.09986335358420793
2981
# # die in mine explosions in romania
# people killed # dead # in # # # in
0.09462285048869964 0.009926572901358292 0.09996396928867182
2982
norway give fresh aid for afghanistan
mongolia says spokesman 's spokesman in pakistan situation with
0.09459114041816054 0.009923246301860516 0.09993046929896382
2983
braswell hopes to land two recruits
national <oov> 's > of china 's > < unk unk > dies in < # > unk > <
0.09455945159389985 0.009919921931240128 0.09989699175481007
2984
b

mcclaren says it 's not end for beckham
reid retains career as a #
0.09462221796308529 0.009878134384333994 0.09999897210147626
3029
weakened tropical storm rick nears mexico resorts
coastal coastal volcano becomes
0.09459099981133237 0.009874875349565162 0.09996598002885948
3030
jordan 's stock market index ends lower
stock exchange rates in malaysia
0.09462576531271386 0.00987161846455541 0.09999897277950959
3031
somali faction leader confirms visit of u.s. military delegation
honduran military chief <oov> says of of in in iraq
0.09466050788926753 0.009868363727178373 0.10004842910236501
3032
haze reaches danger level in northern thailand as forest fires continue
mysterious foot-and-mouth becomes china of # 's largest disease in south korea
0.09465677447642772 0.009865111135310483 0.10004291984205879
3033
after three mauresmo set for final
fish stops first second
0.09462558608286052 0.009861860686830972 0.10000995677127064
3034
businessman argues against the minimum wage
kmt presiden

0.09517043755200329 0.010125227865248923 0.1004636505353164
3077
opening men 's races called off
international international chief world cup event of china unk > bank contributed contributed reporting from
0.09513952802373048 0.010121939385916268 0.10043102187323932
3078
world bank ready to double loans to mideast
china 's economic council chief of central bank
0.09514922298216433 0.010118653041959802 0.10043899881418958
3079
china 's agbank launches world-record ipo
bank develops huge investment of < unk unk > region
0.09511834040411105 0.010115368831300289 0.10040639933388636
3080
nadal djokovic win fish loses at indian wells
top-seeded verdasco wins at at round of french open
0.09512352949259481 0.01011208675186119 0.10041437908750937
3081
russia skeptical about report on corruption in u.n. oil-for-food program
russia < > watchdog defends economic policy in indonesia
0.09516475514706436 0.010108806801568664 0.10045388860523066
3082
yemen extradites # wanted militants to saudi
yemen 

indonesian baseball team aims at asian cup final
asian president president world world
0.09574092764812746 0.010168293758755952 0.10088592242961646
3125
dubai company agrees to delay u.s. ports takeover
uae government rules rules
0.09571031014648111 0.010165041985887786 0.10085365958266103
3126
cambodia 's ranariddh urges commanders to forego amnesty
vanuatu prime ruling party proposes government 's <oov> power system
0.09571168153070539 0.010161792292158283 0.10085338667358729
3127
top ugandan defense officials in somalia for peacekeeping deployment talks
military army <oov> official 's of of < unk > to visit
0.09568109294600398 0.010158544675574019 0.1008211548465903
3128
imf chief says more stimulus may be needed in # # # #
imf official sees # . #
0.09584221719745893 0.010235171338616967 0.10094868802395561
3129
western wall length may stop peace
east bank peace is little not not be a of of war
0.09583822202535286 0.010231902360227118 0.1009483850255449
3130
kinkel demands tough sta

afghanistan qualifies for t # # world cup
afp wins first cup
0.09575352572785999 0.010200628904591379 0.10084999632247353
3175
germany says exclusion from iraq contracts unacceptable
germany <oov> agency of development policy says says says not be not be balanced
0.09576835217328941 0.010197418130620781 0.10088120501107205
3176
davenport crashed out in wimbledon tennis
sampras loses out at # nd title of international title
0.09576968371760243 0.010194209377275715 0.10088442398718911
3177
daimler reports strong rise in # nd-quarter revenue
daimler-benz profits seen profits in q nd outlook
0.09577887853241286 0.01019100264264933 0.10089762710731179
3178
cabinet urged to better explain financial reforms
cppcc financial watchdog <oov> in # .
0.09580117028968779 0.010187797924837177 0.10091830919522354
3179
china says smuggling still a threat to nation 's economy despite crackdown
world of world of of # of
0.09577105360616385 0.010184595221937197 0.10088658385438883
3180
chicago merc chicag

bolivia seeks coastline it lost in # # th-century war
s. world world world world
0.09587419981171473 0.010196847988719254 0.10119047158445225
3224
cuba elections could settle question of castro 's return
presidential presidential < > of the < > policy
0.09587892297082799 0.010193687155492745 0.10121076798714358
3225
cashiered general tells army he ll retire
general executive general general to to retire # years
0.0959180749073794 0.010190528281258009 0.10128269936779005
3226
civic groups launch campaign to protest rejection of u.n. bid
china international commission urged calls to to for financial reforms
0.09591933944427304 0.010187371364194422 0.10128574410500917
3227
panel on twa crash may end without two key security measures
committee department agency procedures for # nd unk > in asia
0.09588963385757614 0.010184216402483615 0.10125437657818817
3228
spanish royal family politicians mourn deaths of plane crash victims
dutch spanish military federation of a years of #
0.09592874611

fin swimming results at world games
china wins first women title at #
0.09571082252404274 0.010161540987628623 0.10111954841425895
3272
asian shares mixed in cautious trade
asian stocks close higher in tokyo trading
0.09576885656900005 0.010158437279324523 0.10117593043542572
3273
# workers die at sardinia refinery inhaled fumes
# reports damages reported reported from iran china 's largest province
0.09576737267108862 0.010155335466414806 0.10117557137269735
3274
hollywood writers contract talks to continue despite past deadline
< < < unk <
0.09573813965134775 0.010152235547163763 0.10114468749865196
3275
pla officers leave for international exhibition
chinese army officer officer < unk > in # st # th anniversary of # # years
0.0957089244729372 0.010149137519837806 0.10111382247347692
3276
arsonists torch mosque in west bank village
longest-living knocks militia in of of # of west of # # # # # s bank
0.09573356195425581 0.010146041382705456 0.10118466430107295
3277
eight investors bid

north korean workers moving into nuclear power plant as dispute heats up
s. korea korea < of dprk of
0.09565088007652989 0.010150751090800695 0.10106987392439923
3322
egypt stresses just and comprehensive peace
egyptian official minister stresses
0.09569731482981614 0.010147697314900934 0.10111467841479502
3323
sales of new homes in u.s. hit all-time high in what could be final throes of boom
major bank sees # in
0.09572868405843873 0.010144645375858859 0.10114441836113643
3324
czech farmers protest low milk price
romanian food company <oov>
0.09569990213298521 0.010141595272017651 0.10111400813312646
3325
overseas investment in jiangsu booming
east central bank city china improves economic cooperation
0.09567113750956081 0.010138547001722484 0.10108361618598696
3326
china 's leaders order communist party to take stronger role in business
chinese vice-president official on china of state-owned development
0.0956799502687226 0.010135500563320524 0.10109080259939261
3327
britain blasted 

windows # # is watching
china chief chief defends a < of life in beijing
0.09568065595103102 0.010006213549311986 0.1010660510647421
3370
after setbacks sears holdings ends bid to privatize its canadian subsidiary
marine 's <oov> files of # in china
0.0956522809047822 0.010003246107571384 0.10103607892622944
3371
third candidate for extreme-right republican party presidential race
former ruling chairman chief executive of # president
0.09562392268334585 0.010000280425357458 0.10100612455951546
3372
zimbabwe opposition fears new crackdown
opposition declares party 's campaign of china election
0.09563262928598862 0.009997316501105722 0.10101323596302479
3373
us pair still perfect in women s beach volleyball
flyweight wins ibf wins # at world 's # event
0.0956042936921261 0.009994354333253543 0.10098330611533204
3374
eu lawmakers call for ban of communist symbols alongside nazi swastika
eu 's < of unk > < > < > of <
0.09562534297322045 0.009991393920240139 0.10105213017552694
3375
valder

dini lauded for surviving no-confidence motion
berlusconi s pm minister 's party in # #
0.09546750227002618 0.009980023431875183 0.10098900226657825
3420
heinen makes the most of his pga chance
< <oov> first world of # # # th title
0.09546882678718865 0.009977107001883402 0.10099601892284167
3421
uefa has no plans to probe marseille doping claims
world world soccer soccer
0.09544093639081495 0.009974192275911481 0.10096651380483909
3422
boxer felix trinidad stops vargas
mayweather defends his at
0.09541306228556062 0.009971279252466414 0.1009370259211344
3423
rights group condemns olympic torch arrests
china media chief defends controversial ties with
0.09538520445715608 0.009968367930056935 0.10090755525663188
3424
hong kong stocks lower at midday
hk stocks index up at #
0.09545465808496581 0.00996545830719352 0.10097539698987085
3425
imran right to resign says icc boss
coastal dictator founder founder of # st china #
0.09542680437674143 0.009962550382388385 0.10094593232777867
3426
<

us backs bolstering bosnian peacekeepers self-defense reach
un official agency agency on on of of dprk
0.09521844103919817 0.009971518490327147 0.1008406262054132
3470
court rules that british historian must remain in custody on holocaust denial charges
austrian court publisher <oov>
0.09526302098129517 0.00996864650919514 0.1008835868545476
3471
people 's daily editorial on international children 's day
world women leader makes a day in beijing
0.09527158331329019 0.009965776181953793 0.10089053082608386
3472
zimbabwe reportedly plans to explore methane gas with foreign investment
s. government minister defends economic on of # # # million dollars
0.09524415913847348 0.009962907507174878 0.10086148922250698
3473
rice due in jerusalem on new mideast peace bid
us official 's strategic policy of israel < unk > bank < unk > contributed < unk >
0.09521675074735449 0.009960040483431807 0.10083246433352208
3474
asean + # fms support indonesia 's territorial integrity
s. asian economic minist

flyers edge bruins # - # in overtime
phillies beats phillies in
0.09482475220801821 0.009900920386232755 0.1004145073063493
3520
syphilis makes comeback in europe amid spread of risky sex online dating sites
world asian < < unk in beijing kong
0.09483331985361504 0.009898109222011791 0.1004265580262672
3521
liechtenstein 's controversial ruler announces withdrawal from public life
founder 's founder of <oov> governor in china
0.09484188263537671 0.00989529965368309 0.10043860190501568
3522
lufthansa to ban smoking on all flights
airline carrier airline company to china in #
0.09485044055744385 0.00989249167988806 0.10045063894841867
3523
browns get better in week #
weaver 's sports cup
0.09482353263104458 0.009889685299269653 0.100422142313256
3524
britain orders thousands more turkeys slaughtered after bird flu outbreak
glaxosmithkline airways announces more more # million million in asia
0.09482500071594786 0.009886880510472356 0.10042911277771621
3525
kiwi union star signs for bradf

former nfl player stingley dies at # #
o'brien resigns as dies #
0.09507020133321355 0.010012083113646621 0.10054595094941561
3571
fda extends some deadlines for implementing tobacco rules
fda watchdog issues rules
0.09518353181142983 0.010009280963320943 0.100657748892055
3572
france eyes more renewable energy less nuclear dependence
china development economy improves from # # years
0.0951568995977165 0.010006480381070434 0.10062958500036726
3573
ny woman has # # arrests since # # # #
<oov> of woman in a central 's 's largest death in us china 's city
0.09514893030179174 0.010003681365579225 0.10062474688801283
3574
lawsuit targets election initiative
presidential county election election in south korea
0.09516227152454211 0.010000883915532922 0.10064321498638494
3575
< unk > takes # # th stage of spanish vuelta menchov still overall leader
frenchman retains giant slalom
0.09513566759065212 0.0099980880296186 0.10061507877867279
3576
dollar little changed in tokyo amid light trading o

reichel is here to stay with islanders
national world title 's < of < unk unk > < unk > < unk > dies in beijing
0.09518469221992447 0.010002713293377251 0.10074499653217922
3621
bulgaria 's # # # # budget includes # -pct growth target tax reform
romania <oov> <oov> deficit falls to #
0.09519785050053094 0.009999952400941873 0.10076319185929336
3622
eu to import more energy from russia
world bank says says 's 's is not to to be built to the china
0.09518997765730967 0.009997193032177816 0.10076047273651459
3623
iraqi documents arrive in new york as new inspectors reach baghdad
lebanon military <oov> reconstruction
0.09516371835312834 0.009994435185824112 0.10073267674403556
3624
euro mps seek russian cooperation in cyber-attacks probe
foreign foreign ministry condemns of in unk > zone
0.09516811642062915 0.009991678860621182 0.10073936933180609
3625
us experts to help assess safety of taiwan 's dams after quake
us national military development in china 's dprk of shenzhen unk > region
0

sri lanka wins toss bats against india
sri lanka <oov> world 's
0.09537793765880943 0.010168322749762843 0.10097605605943723
3671
# # macedonians hurt in coach crash in austria
national soccer federation dies
0.09535197034662354 0.010165554352607993 0.10094856462027048
3672
in the coen brothers < unk > world the only certainty is
a china china a world of a year 's the <
0.09540024887204016 0.010162787462473913 0.10098913387323176
3673
# # suspected taliban # police killed in eastern afghanistan
pakistan army in pakistan of the casualties
0.09541316231258032 0.010160022078130384 0.10100700531072658
3674
# # # # olympic organizers look for volunteers from outside beijing
university olympics games in # , # # # in the beijing 's history of the article
0.09546721685141227 0.010191262550905647 0.10102486702491481
3675
bangladesh court suspends arrest warrant for former prime minister
indonesian supreme president dictator president defends corruption of corruption
0.09544125350715026 0.010188

greenspan backs bush tax cuts with spending cuts
greenspan reaffirms economic economic on of economy and the and economy
0.09541908595060188 0.01008973407523865 0.10103361121393782
3721
defense lawyers boycott chun trial
former <oov> <oov> former former leader of president 's president
0.09539345632773037 0.0100870239667038 0.10100647352626284
3722
three convicted of killings in grenadian coup ordered released from
imelda defector loses second jail sentence for jail # years
0.0953678404694254 0.010084315313651518 0.10097935041307105
3723
aqueduct to begin its new season
new world s a crown of a world s <
0.09536908400218529 0.010081608114909598 0.10099059277345872
3724
guinness not only irish stout around
the unk 's world of world cup title
0.09534348843482024 0.0100789023693071 0.10096348848124899
3725
two un armored vehicles strike mines in eritrea
netherlands ships of military in operation of china
0.09535144564210898 0.010076198075674336 0.10097472906466083
3726
turkey authorizes t

first female vp at raytheon takes leave
world s champion becomes first vice vice vice secretary dies in
0.09547301437155335 0.010047022954487294 0.10109472446151725
3770
brazil s defense minister defends air traffic controllers in deadly plane crash
australia <oov> minister slams military unk > test at china unk > in china
0.0954855764416715 0.010044359374700846 0.10111612516601266
3771
thai human rights groups ask for liberal policy towards burmese
myanmar rights rights body
0.09552652911157829 0.010041697206830529 0.10117767226597749
3772
army assault sparks riots in sign of festering instability
indonesia 's maluku <oov> president defends on <oov> policy
0.09550121736565577 0.010039036449753997 0.10115086313183176
3773
nissan bristles as rival scouts sites in mississippi
south korean < develops control in chinese industry
0.09550903161271122 0.01003637710235009 0.10115718078398227
3774
# # killed # # missing in geological disasters in china in september
dprk reports causes disease o

overweight chinese warned off slimming pills
chinese china city city 's <oov> of # # million yuan in year
0.09573072013307306 0.010220882027751442 0.10145119521360703
3820
west indies scores # # # - # and draws tour match with mcc
< <oov> west africa <oov> <oov> <oov> > unk > contributed reporting for
0.09572579924264658 0.010218207804300957 0.10145372266412965
3821
charter aircraft catches fire at norwegian airport # missing # # rescued
tourism weather <oov> no no #
0.09574435557731152 0.010215534979868757 0.10147949987504669
3822
london casino sues australian bank over money laundering scam
australian businessman tycoon 's bank in #
0.09579403390632223 0.010212863553357286 0.10152767842782892
3823
prices down on taipei futures market
taiwan securities index up on # . # # dollars
0.09579803837095092 0.010210193523670133 0.10153848351656862
3824
eu lawmakers to vote on consumer credit rules
european commission agency says to of a # to be better a < unk > bank
0.09578933527676092 0.0102

ecevit warns islamists kurds pose very serious problem
erbakan leader pm in indonesia
0.09571248843731194 0.01013453666529843 0.10137992304430943
3869
# # th annual pata conference opens in bangkok
development economic cooperation cooperation in asia
0.09573081811394055 0.010131918598477119 0.10140539968521764
3870
france to start smoking ban in public places starting feb. # prime minister says
< of china unk > < in beijing china province
0.09573192069190699 0.010129301883963049 0.10141149333199316
3871
belgrade calm day after massive rioting over kosovo independence
senior military officials find < in in central iraq
0.09570720292255716 0.010126686520708732 0.10138530910959914
3872
china builds national astronomical center
chinese computer giant < in beijing
0.09568249791405882 0.010124072507667765 0.101359138405131
3873
jordan 's official rejects claims on prisoners tortured in israel
israeli ministry official <oov> for death in china
0.09572232178556488 0.01012145984379482 0.1013974

us banks return to old ways of high pay
bank financial system helps <oov> attract attract stability of stability of the economy
0.09577001148540727 0.010240550006948384 0.10145469686178057
3920
animal rights group demand closure of kentucky fried chicken in india
activist rights <oov> group 's of of in china china
0.0958475815997659 0.010237938953912446 0.10155631473611464
3921
hamas plo delegations head to yemen
chad relations <oov> < in china unk > ties to china states 's issues
0.09584135702414234 0.010235329232027687 0.10155003555237438
3922
s. korea expects asem to help overcome financial crisis
afp world currency currency system to # years
0.09584878787097616 0.010232720840276405 0.10156056233812993
3923
spanish central bank maintains rates
lebanon central central currency 's chief of economic situation
0.09585267636097364 0.01023011377764194 0.10156653416938137
3924
belgium pledges dlrs # # million to east zaire peacekeeping
dutch government says <
0.09582826151727497 0.01022750

contingent of pakistani troops leaves for haiti
s. military produces < < unk in
0.09567801257612885 0.010163322518282992 0.10143834822534781
3970
major foreign exchange rates in pakistan
martin martin wins < # of < #
0.09565392445614493 0.010160763776460665 0.10141280986980265
3971
bbc to launch persian-language tv channel
china tv world 's first <oov> list from
0.09566131083307516 0.01015820632270369 0.10141874674121724
3972
< unk > flattens jacobs ; guerrero tops casamayor
valuev gasquet wba wba champion at < unk > open
0.09571272972818512 0.01021156918528535 0.10147710471469286
3973
hong kong stocks open lower
hong kong stocks index closes down
0.09581443721756168 0.010309629167880247 0.1015773620468401
3974
kyrgyzstan continues economic reform in # # # #
indonesia kyrgyzstan president defends resignation on gdp situation
0.09582177765085706 0.010307036202797782 0.10158325305236153
3975
bell from the exodus ship sold for record price at sotheby 's
china flag maker <oov> wins first t

us clings to hopes for # # # # economic recovery
bank bank economy makes economy recovery for economy
0.09595002328213464 0.010351562567814257 0.10169836263250225
4020
judge sentences california man to # # years in prison for attending terrorist training camp
virginia carolina <oov> # . in china states in # years
0.09602561999439667 0.010376614668396877 0.10173523524248919
4021
white house pushes senate to act quickly on health care bill
congress unveils rules of on china economy
0.09603726093967699 0.010374035345834512 0.10174545694460568
4022
# # chinese firms qualified for < unk > services
china telecommunications ministry company says < unk > on < unk >
0.09607552205773373 0.01041664075002744 0.10180300860374801
4023
rat poison found in food linked to # # animal deaths
china develops < 's of china unk > 's <
0.09605165236281256 0.01041405276474793 0.10177771593080298
4024
eurostar train sets new british rail speed record
cable <oov> launches launches on < unk unk > bank from india


flames claim hay off waivers
byrd re-sign 's <
0.09591448201271568 0.010370386110324205 0.10167522983030976
4069
# nd indictment is issued against delay
u.s. becomes national party on of in congress
0.09589092158972067 0.010367838729800913 0.10165025433784346
4070
president of burundian election panel says he was target of attack
s. leader leader 's official of elections elections
0.09589807018461513 0.010365292600446836 0.10166622104028178
4071
keiko the killer whale moves to his winter home
new <oov> women <oov> a world of women
0.09587452536011609 0.010362747721340416 0.10164126002357657
4072
armenia azerbaijan announce no progress in karabakh talks in romania
nam dprk 's issues <oov> on <
0.09585099209419559 0.010360204091561001 0.10161631126068418
4073
australian shares close flat
australian stock shares close lower . . # <oov> on year of the # #
0.09588005582970272 0.01037653853888162 0.10165272443583494
4074
dietary supplements show promise in migraine prevention
new china < <
0

chester county pa. resumes its growth
us bank # <oov> in # years of # years
0.09580151905353526 0.01031213410193227 0.1016008765441505
4121
philippine official says report points to call between bombing suspect and iraqi diplomat
dprk chief says u.s. us
0.0958187068167206 0.010309632977968669 0.10161665772050812
4122
efforts to repair aging system compound metro 's problems
national china issues code system in beijing
0.09583011283903885 0.01030713306696528 0.10162665783814553
4123
sri lanka achieves # st series victory in australia
sri lankan cricket championship world championship
0.09584728533693646 0.010304634368039956 0.10165050592109386
4124
passenger throughput drops # # . # pct macao airport in # # # #
china airlines launches first unk > to be stable from shanghai
0.09582405526293332 0.010302136880311395 0.10162586934670678
4125
philippine public sector debt projected to fall # . # percent this year
bank loans banking system improves to be year # years
0.09587352847464573 0.010

davenport makes it # # straight ; ljubicic 's bad back ; low bonus
chinese <oov> world champion title title at
0.09560661058576628 0.010360812779069323 0.10150396920144589
4170
us leaders defend iraq war strategy brace for tough battle of baghdad
military government policy <oov> to in asia of china s <
0.09560548462227712 0.010358329362775203 0.10150142968124158
4171
dutch nurse acquitted of being a mass murderer
retired dictator of man 's
0.09563050128064705 0.010355847136711753 0.1015250334603738
4172
# # -kilogram < unk > removed from yemeni woman 's ovary
army professor becomes uae in china in indonesia
0.09560759028369434 0.010353366100023513 0.1015007102611739
4173
president of oil-rich bashkortostan re-elected for third mandate
ceausescu vice dictator vice leader of #
0.09561890754179576 0.010350886251855844 0.1015163188734866
4174
australian stocks soar on wall street gains
aussie shares soar in asia kong 's trading
0.09562594324401277 0.01034840759135492 0.1015219423603464
417

storms rock cruise industry
new 's unk <oov>
0.095481265664786 0.010487146556911608 0.10137077532060365
4221
britain 's prince charles invites sierra leone 's president
charles fiji prime minister postpones <oov> #
0.09549248420070652 0.01048466321650031 0.10138059922956329
4222
czech roma attacked in village
armed official chief of
0.09546987707849991 0.010482181051913071 0.10135659814073053
4223
aussie swim team to wear face masks to prevent jet lag
aussie asia eyes first < of unk > event
0.09547357914572656 0.010479700062314985 0.1013589069011969
4224
tunis stock exchange index up
shenzhen exchange index up # # . # percent
0.0955397235898473 0.010544828927826573 0.10145323749587243
4225
gold opens lower in hong kong
hk closes higher # # # . # us yuan in us $ #
0.09553531936778278 0.010542334291221929 0.10145880805714617
4226
french still have billions tucked away ahead of euro switch
bank financial financial giant giant in # #
0.09551272350227479 0.010539840834672444 0.1014348111772

top indonesian militant faces trial
military military military <oov> <
0.09498974982588901 0.010428843212963982 0.10088610605020039
4272
kirkuk bombing hits school
man <oov> <oov> woman of a # in # # people in southern philippines
0.09496752480253245 0.010426403146699835 0.10086250143951948
4273
famed jaffna library not reopening soldiers guard complex
sri lankan government declares <
0.09494531017684765 0.010423964221987157 0.10083890787193128
4274
palmsource loses out with new windows-based treo
starbucks s company to <oov> company in taiwan
0.09492310594153969 0.010421526438025045 0.10081532533968808
4275
livorno fires arrigoni hires < unk >
alianza 's sacks < # - # # as premier wins < unk > as a < unk > #
0.09493598340098754 0.01044370124079725 0.10085020602116115
4276
clinton heads to new york state to help cuomo 's re-election bid
<oov> <oov> <oov> in # .
0.09491379172651325 0.0104412599829102 0.10082663187295611
4277
us aid offer boosts deal at un climate talks
world china <oov>

taiwan offers to cooperate with us on intelligence case
taiwan premier meets < < unk > of state state secretary of # # years
0.09503704984616915 0.010490911182370733 0.10096261097097904
4322
new zealand stocks close virtually unchanged
asian exchange index index down # # # percent
0.09501507087996976 0.010488484977194422 0.10093926161599037
4323
first private company wins oil contract in indonesia
< company unk < < unk > <
0.0950220038115582 0.010486059893962701 0.10097372652659939
4324
senior hamas official killed in israeli missile strike
israeli official reports casualties
0.09511561869740852 0.010483635931897523 0.10100817550336162
4325
kuerten loses in french open farewell ; djokovic ivanovic blake advance to # nd round
ferrero upsets <oov> # st
0.09513985821238485 0.010481213090221559 0.10103105320719721
4326
man charges s african police set dog on him
s. african official <oov> <oov> on on of of president s <
0.09517563920632838 0.010499796241288765 0.1010943547198573
4327
shangh

zimbabwe sets economic vision for new millenium under western pressure
imf financial economic issues on economic governance in asia
0.09508333422461546 0.010452729201684071 0.1010661292198395
4372
two koreas hold politically charged friendly soccer match
<oov> south < < unk > korea faces the history in china
0.09506159592232359 0.01045033946021135 0.10104302310890675
4373
thai bourse up # . # # points
hk bourse exchange exchange down
0.09508558184325563 0.010447950811191873 0.10107707041791043
4374
facing us threat to nato role belgium government agrees to limit reach of war crimes law
u.s. defense <oov> says administration 's has no unk > on its development in the year
0.09506385296257847 0.0104455632538767 0.10105397236708366
4375
amnesty international criticizes us and urges china to fight for human rights worldwide
us china chief defends < on on # # years
0.09508782740786918 0.010443176787517579 0.10108800161717114
4376
russian eu officials try to smooth troubled ties
world <oov> <

hk firm selected as # g services supplier for europe
economist ping <oov> defends china s unk > in china 's affairs
0.09475628547268769 0.010367515222566036 0.10076213483048474
4420
snl cast revels in < unk > parties
new legacy s the # of
0.09473485709514977 0.01036517069176039 0.10073934827805814
4421
philippine police autopsy says u.s. peace corps volunteer died from blows to the head
veteran 's dictator leader of dead in south
0.09471343840713368 0.01036282722110885 0.10071657202929529
4422
s. africa 's zuma accepts nomination for anc top job
s. korea leader leader 's presidency of the year
0.09474226046495807 0.010360484809892505 0.10075031602295956
4423
naacp holds language funeral
dprk leader defends <oov> on of the unk > <
0.094720849784627 0.010358143457393094 0.10072754758996001
4424
# powers meet again on iran sanctions
us <oov> committee nuclear china
0.09469944877925315 0.010355803162893005 0.10070478944545257
4425
la 's mayoralty race is far from settled
new 's 's < of nat

us soldiers guard karzai amid security threats
national national military official of the # years of # years
0.09489665404081193 0.010304382891987375 0.10098030110134225
4470
gunmen kill # # police trainees in southwestern pakistan
man shot in dead of china police in
0.094931337257708 0.01030207869187736 0.10102161013189076
4471
hemophiliac children sue for costly blood-clotting treatment
china court becomes boxing 's ruling party in beijing
0.0949101140658328 0.010299775522037907 0.10099902537666343
4472
a 's put end to angels streak
anaheim # 's title #
0.09493360308816945 0.010297473381778175 0.10103232912602045
4473
czech tourist drowns at australian beach
oldest oldest climber champion at world 's dies in china
0.09493473524390394 0.010295172270407946 0.10103458136780483
4474
mobutu stays in presidential palace of togo
dprk 's 's of of president 's <
0.09494145223781728 0.010292872187237612 0.10105669160431782
4475
hillary clinton creates fund-raising panel for n.y .
new first pre

french foreign ministry searched
anti-corruption spy official of official 's death of # # years
0.0949783408218555 0.010239574902078694 0.10111068752213964
4520
palestinian prime minister to visit hamas mourning tent in gaza
arafat leader <oov> <oov>
0.09495733720822837 0.010237310511344046 0.10108832779469114
4521
dozens of colombian rebels killed in clashes with government troops
armed military forces dead
0.09493634288207134 0.010235047121887636 0.10106597795436509
4522
european foreign ministers meet iranian president
asian nations nuclear policy policy 's <oov> on iran s program
0.09491535783722561 0.010232784733045485 0.10104363799460507
4523
farmers bid good riddance to freedom to fail policy
grain develops environment
0.09489438206753784 0.010230523344154205 0.1010213079088604
4524
jamaica slum seethes as kingpin eludes assault
china < > city warns to
0.09487341556686008 0.010228262954550989 0.10099898769058624
4525
armed leader gunned becomes hospital of china # th years in ch

clean-air standard method upheld
us justice <oov> defends on on of <
0.09504962430464647 0.010305016242268409 0.10098317073074782
4570
senate struggles on stimulus in nighttime session
us economy election says of
0.09502883479801816 0.010302762301707982 0.10096108342306394
4571
netanyahu says hebron decision could be made this week
rabin confirms resignation cabinet reshuffle in west bank
0.09500805438367352 0.010300509346907696 0.10093900577525658
4572
# # # # rhodes scholars include guantanamo translator boxer frisbee aficionado
world china national national scoreboard dies at age
0.0949872830556491 0.01029825737722101 0.10091693778099001
4573
brazil wants help lifting us ethanol tariffs
brazil s < chief warns china 's development crisis
0.0949908073896503 0.010296006392001945 0.10091916601559768
4574
peres says last year s lebanon war was a mistake on israel s part
bundesbank 's military policy is
0.09497004890901445 0.010293756390605091 0.10089711200204532
4575
prosecutor accuses p

more evacuations in california as firefighters deal with hundreds of blazes
coastal <oov> system system for south west city city
0.09508325538845268 0.010322765234906316 0.10097129019864999
4618
ugandan government to de-mine war-ravaged northern region
lao army says says on on development 's stability
0.09506267459724305 0.010320530870136856 0.10094943494103124
4619
china us open high-level trade talks in washington
senior chinese issues <oov> strengthen economic cooperation in india
0.09506614753308244 0.010318297472415553 0.10095163396205917
4620
# northeast nuclear plants alerted after problem found at minnesota plant
northwest nuclear plant <oov> <oov> in < unk > contributed
0.0950888506599684 0.010316065041114729 0.10097787164883117
4621
ecowas agrees on security measures
nam commission <oov> body to to # . # <oov> usd from dprk korea
0.09506828201392471 0.01031383357560724 0.10095602915009683
4622
thai stock market closes # . # percent lower
thai thai bank loses sharply in asia o

els takes the lead at doral
lee 's < of world cup golf event
0.09516032300944602 0.010267963286210857 0.10102731714046516
4667
palestinians and israelis demonstrate against barrier
palestinian barrier of israel barrier bank barrier barrier of west 's economy
0.09515778992105961 0.01026576410795294 0.10103245157671693
4668
bc-sports-field < unk > india wins second match against malaysia
australia world champion junior cup title
0.09513741352064825 0.010263565871527254 0.1010108172187776
4669
crashed silk air plane 's black box found
national military military loses world of world in indonesia
0.09511704584487847 0.010261368576328896 0.10098919212410434
4670
crisis at builder huarte deepens as more # # # # losses revealed
nabisco de # 's <oov> <oov> in <oov> of south # # # dollars
0.0951578414869738 0.01029210162112124 0.10099135863073683
4671
u.s. bonds fall as dollar drops vs yen ; fed move debated
us securities bonds in on year of year
0.0951642275684018 0.010289899159828468 0.1010003

< unk > creator hopes biotech pets will become ambassadors for genetically modified products
lee develops china to to chinese china market
0.09496594283500277 0.010234844832980905 0.10083828668036204
4709
against three u.s. golds canadian golden girl triumphs again
american < champion <oov>
0.09494578449434579 0.0102326722910932 0.10081688182222569
4710
tech stocks up blue chips lower
major markets markets
0.09492563470986058 0.010230500671337026 0.10079548604934321
4711
beijing welcomes swedish investment says mayor
chinese 's chairman chairman concludes < unk > relations
0.09490549347610079 0.010228329973125411 0.10077409935593151
4712
venezuela 's u.n. envoy says both sides must work toward stabilizing
venezuelan president <oov> president 's situation situation
0.09491566565458638 0.010226160195871884 0.10079514854995868
4713
israeli critics round on netanyahu after obama meeting
israeli prime prime praises israel
0.09493795289410821 0.01022399133899047 0.10082679326924819
4714
mada

czech president meets with cheney gates
vice president defends <oov> cabinet of < unk > secretary of state secretary secretary contributed contributed contributed reporting for # # years
0.09478810070862441 0.010238030222737282 0.1006721237449699
4758
swisscom sees annual profit double in # # # # confirms debitel talks
switzerland ericsson posts second # in
0.09483821525329346 0.010235879376051834 0.10068598814474336
4759
nigeria upset fancied malaysia in games table tennis
top-seeded flyweight badminton champion at indonesia unk > forum
0.09481829544332637 0.010233729432893662 0.1006648400690986
4760
arena names six veterans to squad for < unk > friendly
un team <oov> <oov> team in beijing
0.09479838399951215 0.010231580392693559 0.10064370087546796
4761
americans focusing their hunt for saddam around his tigris river hometown ; one soldier killed sunday
u.s. dictator s military is replaced of his democracy of iraq
0.09479597689250686 0.01022943225488279 0.10064356572936772
4762
despi

us in talks with macedonia over icc immunity agreement
pentagon commission <oov> issues of china 's < unk > contributed
0.0946359531592235 0.010135799007698508 0.10054431664659974
4806
duncan thwarting < unk > style
heavyweight heavyweight # <oov> at
0.09461627014067957 0.010133690896424028 0.10052340476709754
4807
mellon chile 's < unk > form mutual fund joint ventures
boj bank sees huge financial in to asia
0.09459659530804479 0.010131583661885367 0.10050250158457162
4808
london share prices up at midday
stock exchange index higher
0.09457692865621359 0.010129477303535703 0.1004816070935977
4809
vfb stuttgart leverkusen stunned in german cup
bundesbank cup cup champion 's world # # # # # qualifier
0.09457459159628366 0.010127371820828669 0.1004904151450971
4810
hong kong shares close # . # pct higher
hk securities index up on . #
0.09458957332427002 0.010125267213218356 0.10050416748331854
4811
czech opposition demands emergency parliamentary session to discuss u.s. missile defense
d

kerry 's hopeful tone on black opportunity
us < is the of of unk > 's character > is not a own < > < unk > unk unk > in the article
0.09463511633477643 0.010158862512139768 0.10047743516839336
4857
tanzania rejects accusation on sex abuses on refugees
environment ministry issues report on for # years
0.09464136553907057 0.010156771781019754 0.10048248200206934
4858
a swimmer 's path back to the top
baseball of of the <oov> . at <oov> <oov> event
0.09464475437560803 0.010154681910282919 0.10049610014706206
4859
wage talks for german municipal hospital doctors break down
economic bank system ends down on the unk > in < unk > < unk > says says says contributed contributed reporting
0.09463463501561604 0.01015259289939827 0.10049012040447443
4860
groups condemn suu kyi detention
junta <oov> accuses < unk > military unk > in democracy
0.09461517087842648 0.01015050474783525 0.10046945193051217
4861
benfica stays top ahead of fc porto and sporting
< < < # > title de # de
0.09459571474622858 

director excavates british life in the queen
the director director 's lee of the unk > < unk > < unk > < unk > < unk > < unk > < unk > <
0.0944075343428384 0.010094471625390905 0.10031588421208863
4906
israeli aircraft blasts gaza city building killing # after # killed in earlier fighting
israel military military casualties on central < unk > area
0.09438829890389323 0.010092414887080922 0.1002954449528767
4907
iraqi pm aides say agreement reached on us pact
military <oov> <oov> military policy system to curb terrorism in
0.09436907130175351 0.01009035898671688 0.1002750140209246
4908
russian schoolchildren hold < unk > protest
ancient war on in 's central 's town
0.09434985153163096 0.010088303923786795 0.10025459141114437
4909
liu < unk > scores # # points as rockets edge olympians
chinese unk unk # <oov> at at world 's <
0.09439172694365873 0.010086249697779102 0.10028508324755017
4910
fox 's north shore tv series canceled
american president director says <oov> to be
0.0943725103868

bush defends iraq war during farewell visit
s. korean leader defends economic on on economy stability
0.09426471002591603 0.010037905161436418 0.10019363191854615
4955
thai protesters call for talks as clashes leave # # dead
indonesian leader meets in in with
0.09424569354215047 0.010035880165438549 0.10017341936419502
4956
maradona can t count on messi for argentina
uruguay de la <oov> president # st
0.09422668472941506 0.010033855986300702 0.10015321496335512
4957
tudjman urged to keep u.n. troops in croatia
eu <oov> president stresses <
0.09420768358306914 0.010031832623528712 0.10013301871109391
4958
kohl to visit ukraine next month
germany chancellor says < unk > bank economic ties to be balanced
0.09420549117374459 0.010029810076628808 0.10012963167775162
4959
britain reopens visa section in guyana after four months dealing with administrative problems
britain issues agency agency on
0.09422681641237113 0.010027788345107615 0.10015984138714937
4960
red cross calls for stronger so

iran kills kurd separatists at border
iran s top <oov> in < unk > dies from
0.09399898185113709 0.010075859373006407 0.09990774892446717
5004
< unk > sorry for late villa return
top-seeded retains defends defends champion s title of
0.09398020458748324 0.010073846616439685 0.09988779132380307
5005
love em or hate em cowboys still america s team
johnson johnson takes a of of #
0.0939614348242343 0.010071834663850022 0.09986784169501861
5006
lebanon warns u.n. against video
lebanon army says <oov> military has has
0.09397119834420892 0.010069823514755804 0.09988118011853532
5007
clinton views economy with careful pride and calm concern
us economy says <oov> 's economic growth of the # years
0.09397058702309989 0.010067813168675796 0.09987938892484208
5008
military investigation blames u.s. pilot for dropping bomb on
china defector spy wins second first time of the years
0.09395183041890366 0.010065803625129155 0.09985945291906866
5009
former president reiterates that # # # # consensus do

chronology of major anti-israeli attacks in palestinian uprising
palestinian woman 's > bank makes
0.09379184999735572 0.009996200014439908 0.09971406263508216
5055
van den hoogenband magnini highlight euro swimming in # # # freestyle
cycling < > wins china 's world 's event
0.09377330306241459 0.009994223308880399 0.09969434460806316
5056
texas instruments in talks to sell chip operations
u.s. national supercomputer launches development of china in china
0.09377453412151653 0.009992247384936374 0.0996966017781903
5057
hungarian pm outlines austerity program
thai pm minister announces economy of # economic economy
0.09377796099975126 0.009990272242144332 0.099701603438246
5058
iranian mourners opposition bury slain physicist
north of of of iranian state president 's state state state says of < unk > < unk > in <
0.09376883868485389 0.009988297880041143 0.09969986578754853
5059
joseph < unk > # # writer whom buckley < unk >
former scholar founder of unk > #
0.09383499213079079 0.0100521

the band drummer helm sues over tv ad
founder 's former vice chairman of the mayor president
0.09388630142156128 0.01004519923090642 0.09974423332799254
5105
friend says steffi will lay low knows she did no wrong eds
s. korea court defends china
0.09386791757558095 0.010043232283729818 0.09972470244228115
5106
chinese and french companies to jointly produce corn products
dprk korea < < unk unk > #
0.0938495409276609 0.010041266106696981 0.09970517920374508
5107
research institutes in beijing make profits
dprk development development sees economic of of stability of china # years
0.09383117147357446 0.010039300699355682 0.09968566360789388
5108
pakistani fm warns against knee-jerk blame over blasts
pakistani pm expresses of casualties in pakistan
0.09384076559713289 0.010037336061254046 0.09969411203827536
5109
something wilder a mild take on older dad
a 's 's a <oov> <oov> of
0.09385035596638745 0.010035372191940554 0.09972352035131815
5110
# mainland fishermen nabbed off kinmen on smu

poverty excessive consumption major causes of
development of developing china province province in china province
0.09394806408351655 0.01011235353148836 0.09976257401144374
5153
u.n. organization shows a world free of violence against women video
un council body in in beijing
0.09392983943480977 0.0101103918722194 0.09974322142676645
5154
boeing taking $ # # # million in charges for aviation slump
monsanto posts <oov> first year # <oov> dollars from us city
0.0939292535642656 0.010108430973873353 0.09974327122866196
5155
spaniards say they are sick of news about their favorite sport
boxing national champion results
0.09391103963105554 0.010106470836007565 0.09972392989237562
5156
fort worth church officials say they wo n't ever accept female
south of s. <oov> of <oov> on the of china
0.09389283276024687 0.010104511458179722 0.09970459605563806
5157
associated press in new book examines its history
dprk korea causes revolutionary on china 's disease in beijing
0.09389401654920594 0.010

china enhances forest protection
east < of guangdong china province
0.09382865246203487 0.010051879168370505 0.09963747649252538
5203
mideast mediators meet in egypt
plo arafat confident about <oov> of new development for peace process
0.09381062582371363 0.010049947971604247 0.09961833384574487
5204
colombia keeps alive gold cup hopes ; united states honduras top groups
honduran honduran national results at
0.09379260611072406 0.010048017516749923 0.09959919855303535
5205
dick van dyke says he 'll retire
brian gibson clarke says of
0.09381300315199335 0.010046087803380085 0.09961848044307703
5206
in final major nicklaus misses cut ; woods on target
world s a world title title in a # #
0.09381419113141887 0.010044158831067608 0.0996313545187728
5207
red sox down yankees
jays blue sox for for
0.09383457619743318 0.010042230599385698 0.09966022160371832
5208
lawsuit criticizes yahoo s employee retention plan
<oov> 's # chief executive resigns on <oov> and <oov> #
0.09381656572215535 0.01

liberals in control after komorowski 's presidential win
kyrgyz leader president <oov> of in in presidential presidential race
0.09419703874940302 0.010066800085304256 0.10011074648087862
5253
cox news service commentary budget
hk population news budget #
0.09425523151082084 0.010064884424013047 0.10016781389353686
5254
yeltsin requests acting pm to make cabinet work proposals
yeltsin chief president of of of state elections
0.09426108097210112 0.01006296949166449 0.10018046588226842
5255
italy gets france and dutch in tough euro # # # # draw
afp 's international scoreboard at world cup
0.09424315038793295 0.010061055287842603 0.10016140929754667
5256
blagojevich defense rests its case in corruption trial
president governor governor makes himself
0.09422522662407065 0.010059141812131717 0.10014235996143074
5257
# die after receiving human antibody injections in eastern china
chinese hospital causes reported of china # -year-old dies at least #
0.09423900137973572 0.010057229064116479 0

advisory panel votes for use of embryonic cells in research
china panel panel <oov> policy to < unk > on myanmar unk > areas
0.09450845580925193 0.01005779379006249 0.10038041389232683
5302
supernatural themes in harry potter continue to anger certain
lee <oov> < <oov> in < unk > in hong
0.09450949116826225 0.010055897524264966 0.10038842232538957
5303
law made to combat mafia used in bp lawsuits
panel regulators probe rules against oil shrimp industry
0.09449167599556324 0.01005400197336501 0.10036949896585604
5304
liaoning reports initial success in water pollution reduction
east chinese china 's industry of state-owned company
0.09447386753796891 0.010052107136958421 0.10035058273913801
5305
armed group attacks soldiers in mexico
armed colonel chief s.
0.09450317338542737 0.010050213014641301 0.10037878123494748
5306
in advance of speech bush seeks iraq advice
president <oov> world economy on housing security secretary says
0.09448536947182799 0.010048319606010056 0.1003598703869378

new zealand lost to australia # - # in women 's soccer
south korea national national national boxing championship at
0.09446969508593082 0.009963848396917874 0.10041081487068305
5352
safin haas fail to reach semifinals of president 's cup
defending wins world 's title title of world # # # # cup qualifier
0.09449207387173579 0.009961987386757822 0.10043356634758845
5353
# idiots sweeps bollywood awards in sri lanka
ruby wins khan 's of of # nd century title
0.09449310242936947 0.009960127071652919 0.10043556028685334
5354
us russia tied in freestyle world cup wrestling
us champ wins title at the chess cup tournament
0.0945169502859402 0.00995826745121385 0.10045829864793163
5355
severe storms bring flooding to las vegas
powerful central becomes <oov>
0.0944993066513899 0.009956408525051592 0.10043954593211159
5356
thai leg of olympic torch relay route gets off to peaceful start
ancient women city suffers of in indonesia s <
0.09450240702549585 0.009954550292777413 0.10044153763893858
53

chile president helps schoolboy who fainted
president paraguayan vice vice leader president dies in chile
0.0943433803876621 0.00999225233901667 0.1002045019823017
5401
air raid sirens sound across yugoslavia
military raids sirens on <oov> < unk > military forces from the < unk > < unk > of the < unk > < unk > of
0.0943327739942972 0.009990402949355553 0.10020137942656435
5402
creative game expected when barcelona plays arsenal in champions league final
ajax cup cup <oov> <oov> to to the round of world league
0.09433073856856422 0.009988554244146568 0.10020339825182055
5403
freedom more < unk > than freedom 's symbol glenn says in flag
senate <oov> < unk > in confrontation of indonesia s <
0.09438056384609758 0.010023708998217957 0.10025886478313381
5404
residents sift through the ashes as firefighters fear worsening
floods destroys in indonesia in indonesia province
0.09436310536221927 0.010021854816013328 0.10024031893319242
5405
police collect jackson 's dna sample during searches o

greek youths firebomb bank clash with police
police raid police at # of central city
0.0943403562965416 0.010042265126158873 0.10024086097089294
5449
chicago # # milwaukee # #
phillies 's first second straight round of st. st. st. petersburg
0.0943230493150159 0.010040422846737453 0.10022247152657614
5450
south africa # # # - # at close on # th day
south africa <oov> <oov> title world 's # nd # nd century
0.09436688832529072 0.010055255691209639 0.10026522847114817
5451
austrian railroad union strikes
czech insurer giant operator to sell to to to to the year
0.09434958282587293 0.01005341170520355 0.10024684130289746
5452
china to launch crackdown on copyright piracy ahead of us sanctions move
china issues on on on property of property property system
0.09438728917298955 0.010051568395393281 0.1003201367115328
5453
southern hemisphere rugby powers meet on super union tournament
australia african exchange chief <oov> regional policy for regional issues
0.09436998627854903 0.010049725761

judge rules out quick trial for parmalat accused
man s <oov> chief in in # # # # # million in # # # # #
0.0943535972290846 0.01003478610921698 0.10024780996564815
5497
us flight canceled after man found sitting on plane hours before takeoff
national airlines reports china <oov> in south
0.094336438909894 0.010032961270862875 0.1002295797765291
5498
more victims fall in indonesia 's aceh as troops hunt senior rebel
indonesian forces military says casualties of the unk > < unk > < unk > in southern korea
0.09432938784019321 0.010031137096086355 0.10022534223055575
5499
calls to arm nz police after machete attack
< < <oov> in china in south korea
0.09431224016016408 0.010029313584525532 0.10020712275369144
5500
flame for beijing games lit at olympia pro-tibetan protesters detained
a chinese at <oov> in beijing 's sun unk > < unk > contributed contributed reporting reporting for the < article
0.09432106329769534 0.010027490735818784 0.10020026940531745
5501
moya mantilla advance in croatia

nigeria listed as most corrupt country in world
anti-corruption world bank official says in
0.09411526342367114 0.009990702040327162 0.09996062823166575
5545
french prime minister wins senate seat ; good fallback if needed
prime leader becomes <oov> party of the democracy
0.09412083125070853 0.00998890094026581 0.0999651422702034
5546
us inspectors arrive at turkish bases for war preparation
kuwait navy launches # <oov> of
0.09410386642892578 0.009987100489483498 0.09994712403980141
5547
highest u.n. court rules serbia not directly responsible for genocide in bosnia
un court chief of of <oov> military military < unk > dies # years years
0.09409892189842259 0.009985300687629203 0.09994413002453624
5548
strong # . # magnitude quake hits off mexican coast
quake rocks north of china of # . #
0.09414953470528774 0.010009241560092178 0.0999561521032105
5549
prosecutors question former olivetti director-general
senior chinese state of chief official of # years
0.094132573881165 0.010007438418

bush named defendant in lawsuit
former carolina general general dies # . # million dollars from the # # # #
0.09419299998557003 0.00999286092802106 0.10002454763691121
5594
starbucks takes a # -hour coffee break
taiwan develops image on chinese in central
0.09417616778400006 0.00999107521305894 0.10000667334319482
5595
christopher calls for chechnya peace ahead of us-russian talks
yeltsin expresses <oov> economic ties in china
0.09415934159715282 0.009989290136194002 0.09998880543657641
5596
british government to introduce gambling legislation within weeks to allow giant casinos offering unlimited jackpot machines
royal pm 's government <oov> to # . # billion pounds dollars from # # # # # dollars
0.09416236978034573 0.009987505697084285 0.1000034230457907
5597
turkish president strongly opposes us war on iraq
lebanon military leader praises 's military unk > in #
0.09414555206829352 0.009985721895388074 0.09998556210222118
5598
ephron wins lifetime award from writers guild
philanthropi

jakarta police ban soccer final after fan riot
indonesia launches campaign system in beijing
0.09413497880455536 0.009938359895359707 0.0999777465489782
5645
obama makes pitch for budget priorities
us economy secretary energy to boost to economic crisis
0.0941183088950805 0.009936599959128902 0.09996004197193747
5646
germany 's bayer has fourth quarter yearly loss on falling sales and charge for restructuring
taiwan files # company company in in #
0.09410164488854808 0.009934840646104977 0.0999423436642229
5647
soupy sales unsure of comeback
president <oov> a <oov> at
0.09408498678182325 0.009933081955956968 0.09992465162250504
5648
# # < unk > sutherland charged for drunk driving
frank gibson becomes wbo at <
0.09409783309684712 0.009931323888354143 0.09993646436853054
5649
li peng says china uncertain of pol pot 's fate
china premier premier stresses official official says
0.09413174151175023 0.009929566442966008 0.09995417159479696
5650
russian tv on sidelines after attacks in subwa

israeli shelling kills lebanese woman wounds two
man lebanese military of in central china 's main city
0.09411588504714019 0.00987306530358162 0.09998889233723743
5695
raymond < unk > who turned posters into high art dies at # #
top-seeded upsets del del dies
0.0941344709897333 0.00987133227474125 0.1000152239341591
5696
why beer can shortage means brazil will boost rate
drought chile chile <oov>
0.09411795037355397 0.009869599854194614 0.09999767124480596
5697
major will leave eu summit before concluding dinner with bc-eu-summit
afp praises china economic of financial economic ties
0.09410143555509924 0.009867868041621496 0.09998012471537189
5698
scores more bodies recovered after nigerian clashes
s. military agency agency in to of control in s. korea
0.09408492653131764 0.009866136836701914 0.0999625843426148
5699
indonesian shares end # . # # pct higher
indonesian stock index up
0.09411227525495712 0.009864406239116105 0.09998890207909217
5700
norfolk southern sparks bidding war fo

fight over jeans leads to jackpot at atlantic city casino
a the world super super # <oov> <oov> #
0.09438129913146087 0.010009642064089075 0.10033823125385494
5745
increase in autumn grain production urged
economic issues zone
0.09436487642411244 0.01000790034805217 0.10032077201751356
5746
at least # # killed as gunmen rampage in india city
police forces of in
0.09439195282000247 0.010006159238040331 0.10034681224506793
5747
vere c. bird overcame poverty to lead antigua found family
former vanuatu becomes founder founder becomes <
0.094375533972756 0.010004418733737315 0.10032935758995486
5748
bush welcomes intelligence bill deal pushes for swift passage
clinton stresses <oov> to to control on banking bill
0.09437844450791048 0.0100026788348271 0.10033364813646095
5749
polygamist sect children in texas leave shelter for foster care for now
texas texas first of of in texas in texas korea 's first week
0.09438878491989766 0.01001542977457645 0.1003658826413432
5750
ugandan army < unk > 

arafat and netanyahu to meet tuesday on hebron deal
netanyahu prime president hails <oov> state policy
0.09471608316869437 0.010016965321565345 0.10074057847078302
5793
kunming hosts jade fair
coastal provincial china city in central china city
0.09469973871948494 0.010015236768446869 0.10072319441927813
5794
uk grocer sainsbury says # q like-for-like sales up # . # percent
altria morris <oov> <oov> in in china #
0.09470496650783561 0.010013508811792547 0.10073457182994883
5795
zimbabwe rights violations persistent and serious amnesty
china leader official official resigns from
0.09468862961521739 0.010011781451293705 0.10071719481221035
5796
sex brings red hot television ratings says survey
central world city of the < unk > economic economic policy
0.09467229835795364 0.010010054686641878 0.1006998237886139
5797
nokia cuts # , # # # jobs in new restructuring
china company plans for to to achieve development in north korea
0.09467164941719682 0.010008328517528817 0.1006997031085331
579

sri lankan military plane goes missing
air aircraft makes military military in southern asia
0.09461311213690506 0.009982875023924578 0.10066783955091416
5842
u.s. state department calls on europeans to move swiftly on membership for turkey
us military <oov> chief 's to to to stability in asia
0.09461247832081543 0.009981166797534446 0.10066962655836799
5843
iaaf ratifies gatlin 's # # # -meter mark
higgins 's ibf st world title
0.09462480581240582 0.009979459155652919 0.10068091775428044
5844
ruling csu expects absolute majority in bavarian poll
ruling election ruling leader resigns of of china
0.09463000170604037 0.009977752097980038 0.10069220508731368
5845
marriott to ban smoking in every rooms in u.s. canada
mastercard launches # st chinese in # # # #
0.094630920125451 0.00997604562421606 0.1007034885594497
5846
clijsters pulls out of fed cup against poland
defending champ champion defends <oov> title in china open
0.09461473836756362 0.00997433973406144 0.10068626840066731
5847
t

schumer calls for new federal housing aid as realty group sees prices falling slump worsening
clinton <oov> < system in <oov> < unk > system
0.09486158496537349 0.010016032344588446 0.1008403096138678
5891
u.s. says nigerian elections are vital for growth and consolidation of democracy
military chief <oov> < in # in central
0.0948454876321026 0.010014332695454797 0.10082319773373649
5892
lights dim on broadway as a second strike hits us entertainment
insurance of of of of <oov> > 's p of p unk > century unk > < > contributed
0.0948293957611097 0.010012633623059911 0.10080609166014745
5893
sino-french cooperation on goose liver production
world china bank 's stable stable stable
0.09481330934961503 0.010010935127110283 0.10078899139014573
5894
inter seeks return to winning ways against bremen
blatter coach coach of the year of
0.09479722839484067 0.010009237207312605 0.10077189692077834
5895
leiter gives weary mets just what they need
< baseball becomes baseball #
0.09478115289401061 0.

government urged to change skills definition rules on employment of foreign nationals
unep urged urged for strengthen policy on economic stability
0.09495534224333445 0.01002292688201669 0.10090999815689343
5940
father of hong kong movie star stalker commits suicide
lee lee wins china s women title title in beijing
0.09493936187607707 0.010021240088532675 0.10089301565972801
5941
psv eindhoven win # # # # dutch league title
newcastle <oov> <oov> league league league #
0.09497146265410808 0.010019553862705899 0.10091810517417193
5942
indian leaders urge calm army called out after attacks
pak declares # th term in of of years of the # years
0.0949554849517773 0.010017868204249858 0.10090112702727184
5943
< unk > becomes tourist sport in norway
world national official of xinhua election
0.09493951262461973 0.010016183112878243 0.10088415459211167
5944
iraqi government to treat wounded kurdish people in bombing incident
china 's agency says replaced in stable of stable casualties
0.0949403

former thai pm criticizes the military junta that ousted him
former pm becomes prime prime president of indonesia s # # years
0.0952375120955788 0.0100998288984351 0.10115607937344531
5988
school board inquiry over press aide job
former <oov> official executive of resigns on china s daily city
0.09522161267786668 0.010098142783427015 0.10113919188106243
5989
after sex scandal rooney admits he s hurting
former coach champion of < of china s la team dies in beijing
0.09521855834040135 0.010096457231301589 0.10113621977981929
5990
survey shows foreign businesses satisfied with post-wto china
most china 's financial financial remains remains of the china
0.09521935631130583 0.010094772241777007 0.10114318255069334
5991
powell says u.s. wants large sunni turnout in iraq
us <oov> says china
0.0952451832166435 0.010093087814571636 0.10116802099845727
5992
south africa 's gold fields gets go-ahead for mine acquisition
s. korea becomes african company 's securities in #
0.09524783018492754 0.01

un protests over sudan 's decision to remove special envoy
un official chief of china of dprk 's office
0.09530612162492944 0.010032922219880244 0.10122175721195344
6037
dollar higher stocks lower in early trading
stocks mostly exchange rates on regional trading
0.0953376513755652 0.010031260864983758 0.101252307390543
6038
madden scores again with new nfl cd-rom
the of the china of
0.09532186699619838 0.010029600060204787 0.10123554376349159
6039
taliban militants overrun district in e. afghanistan
afghanistan <oov> # , # # , # # in # of the year
0.09532973577220583 0.01002793980527014 0.10126016956323278
6040
guatemala arrests national police chief drug czar
mexico guard official says chief chief to death penalty in corruption
0.09532900411963001 0.010026280099906805 0.10125996099495022
6041
u.s. pressing china hard to maintain democracy in hong kong
china us economy of hong of of currency in #
0.09536287322369758 0.010024620943841952 0.10128457460392011
6042
# rd iraqi candidate fro

nuclear cleanup could derail an experimental cancer treatment study says
un agency department department of <oov> unk > in china province
0.09534615096681165 0.009985014681064058 0.1012521516988093
6086
justice dept . microsoft seek to overturn antitrust ruling
justice defends independent independent watchdog of chinese in chinese state
0.09534691539667912 0.009983374566957444 0.1012560524623279
6087
cetra to set up office in mongolia
chinese bank chief spokesman spokesman on # # th years in china
0.09534494239913219 0.00998173499156461 0.10125584618010383
6088
iaea team begins routine inspection of iraq 's nuclear facilities
dprk agency says says has stable in iraq to develop its own security
0.09534191746227241 0.009980095954620183 0.10125290323874969
6089
taipei shares open higher
manila securities exchange summary of # # # # dollars
0.0953262645452699 0.009978457455858958 0.10123627987588009
6090
stocks trade mixed on uneven economic reports
u.s. stocks fall in economic of china ec

italians kiwis one win away from louis vuitton cup final
baseball champ champion makes his of of the america s <
0.09530014498038646 0.009997628916281549 0.10129978078459254
6135
< unk > retains ibf bantamweight title
boxer challenger wba title of title of china s title
0.09530091080326727 0.009995999841991786 0.1013065523932079
6136
enraged muslim pilgrims kill afghanistan 's aviation minister
provincial military forces 's hospital in china 's death
0.0953034865934771 0.009994371298518016 0.10131041251826603
6137
turkish top court defers kurdish party 's defense hearing until march
military ruling ruling party chief of <oov> < unk > < unk > < unk > state < unk > contributed
0.09529571913314569 0.009992743285600845 0.10130871821600033
6138
# suspects arrested in wendy 's execution-style killings
police police find old of unk > city 's store city says no casualties in china
0.09530055696390577 0.00999111580298104 0.10130385174258218
6139
watchman questioned in france over stolen paintin

out of office mccall joins investment firm
<oov> 's chief president executive in beijing
0.0950941982706008 0.009969051308558593 0.10106549918495028
6184
new allegations target dna bullet analysis at fbi lab
university panel < < unk > < > policy policy on <
0.09507882578462107 0.009967439758072244 0.10104916140622655
6185
polio immunization slows as nigerians raise doubts
international development world < unk > to to in china province
0.09506345826792725 0.009965828728533199 0.10103282890882778
6186
pietersen out of one-day series
ayala stops ankle in out injury against <oov>
0.09506829610595764 0.009964218219688898 0.10103670207804097
6187
former mexican president says he 'll fight drug accusations
pinochet of former of president ruling ruling chief
0.09509332950455096 0.009962608231286944 0.10107423586884001
6188
ugandan gov t to set up anti-corruption tribunals
government issues military agency on <oov> china in indonesia
0.09507796709267624 0.009960998763075105 0.10105790723622793


high court rejects $ # # . # million tobacco ruling
judge <oov> <oov> <oov> case of ruling case case
0.09505487483778115 0.009924557820748478 0.10104556795185686
6233
obama s new security goals prize nonmilitary moves
president president s < >
0.09507170645368528 0.00992296607129848 0.10106945799709313
6234
judge sympathizes with umpires but wo n't let them strike
military <oov> world <oov> 's world in world elections
0.09505646083045666 0.00992137483235183 0.1010532505791975
6235
dollar down gold down
dollar exchange rates sharply on year of trading in europe
0.09505725344536278 0.009919784103662982 0.10105308170785245
6236
southern china braces for more floods with heavy rain forecast in coming days
southern korea province 's most most < of <
0.09505982700382154 0.009918193884986538 0.10105978322454838
6237
bangladesh gets new president
s. indonesian prime president <oov> resigns on <oov> party
0.09506239973728961 0.00991660417607726 0.10106362041268357
6238
alcohol ads still being p

dozens arrested in malaysian vigils over security law
thai activists group leader in
0.09518424367150057 0.009999243834171825 0.10119797745435262
6282
southeast europe foreign ministers to meet on rhodes next week
rok vanuatu chief says says korea to have been in the # years
0.09518133769334201 0.009997652611410182 0.10119513457654851
6283
right to die ruling evokes passionate reactions
< unk > rights slams state government of china s <
0.09516619348686733 0.00999606189500423 0.10117903352092775
6284
flamini set to sign contract with ac milan after medical tests
di joins di <oov> at
0.09515105409878478 0.009994471684712309 0.10116293758813727
6285
europeans scramble to save failing banks
central bank <oov> <oov> <oov> bank to be loans
0.09515359267950887 0.009992881980292918 0.10117335650480316
6286
us banks borrow record amount from fed
us banking banking system increases in # # years
0.09515613045279633 0.009991292781504704 0.10117998560568617
6287
bush promises pay raise to troops
b

new jets defense is like old times
new <oov> <oov> <oov> 's # . # million dollars for the < unk >
0.09505314794412693 0.010009716513010239 0.10105132583316204
6330
credit suisse sees first-quarter profit of # # # million swiss francs
swiss banking group posts up in first round of year
0.09506972198898732 0.01000813569865253 0.1010511597993918
6331
home sales post modest gain in march but pace is still significantly below last year
u.s. growth in first year of the # years
0.0950862907996633 0.01000655538352563 0.10106678412280892
6332
sadc brings more private involvement
s. s. african economic economic council concludes
0.09507127875501542 0.010004975567393088 0.10105082788912992
6333
obama offers support to lebanese president
uae leader congratulates # president of state of state state
0.09507205676941874 0.010003396250018597 0.10105742714958264
6334
us ice storm cancels hundreds of flights in texas as death toll climbs to # #
< <oov> unk unk >
0.09505705170995388 0.010001817431166007 

perry says war with north korea not imminent but sanctions will come
us <oov> <oov> policy to be needed in south china
0.09502233313845361 0.009996580772689936 0.10105477543477932
6380
some catholic leaders say abuse scandal is exaggerated
commission <oov> < unk > bank of financial policy
0.09500744402326426 0.009995014401525303 0.10103894109202866
6381
rouse kennedy continue winning ways
lee champion world world championships at world cup
0.09499255957331545 0.009993448521155333 0.10102311171068884
6382
ioc executive board nominates # # candidates for membership
world chief 's < #
0.09500900810721688 0.009991883131349386 0.1010386156092304
6383
world summit on intellectual property knowledge economy to be held in beijing
china national national chief of < unk > published in
0.09500978978174981 0.009990318231876974 0.10104019313397619
6384
chinese man arrested for killing six men in gay sex games
china activist dies of
0.09499491195685446 0.009988753822507749 0.10102437099286533
6385
a

military says # us soldiers killed in separate attacks in baghdad
u.s. soldiers killed <oov> military in central
0.09513887751122901 0.010019731354052338 0.10124514440338522
6431
allies bomb iraqi military site for fifth time in a week
army navy <oov> military base on a year in south iraq
0.09516648335534778 0.010018173802155238 0.10125766935868054
6432
us offers to raise philippine import quote on wool clothes
us takes <oov> to of monetary currency
0.09519609911573929 0.010016616734420988 0.10128633840070038
6433
mother teresa 's powerful speech broke all the rules
china leader is most of in the world city
0.09519857231107656 0.010015060150623875 0.10128786517190634
6434
u. of california regents nominate texas < unk >
state national official of china president president says he is out
0.0951979058285716 0.010013504050538323 0.10128766506855459
6435
greek arsonists target foreign car dealership
vandals jetliner explodes damages on central
0.0951831166556916 0.010011948433938891 0.10127

former fatah guerrilla shot dead in lebanese refugee camp
palestinian vice chief dies
0.09499957129743263 0.010027080786703097 0.10107805728466297
6479
milosevic 's wife says her husband 's life is in danger
leader chief chief dies president 's president of
0.09500420027887106 0.010025533636450559 0.1010881774218921
6480
raikkonen and other michelin drivers roars past schumacher in second malaysian gp practice as track heats up
button loses <oov> at
0.09498954366050037 0.010023986963566194 0.10107258220784984
6481
un peacekeeping mission begins plan for withdrawal from car chad
dprk military agency concludes on south china
0.094974891563684 0.010022440767829101 0.1010569918049179
6482
barbados swimmer who crossed english channel in intensive care unit
man champ first first champion in china # , # # # m of his year
0.09496988309798941 0.010020895049018517 0.10105425835974954
6483
suicide car bomber hits u.s. intelligence headquarters in north of iraq
car military casualties casualties
0

water rises to alert level in china 's second biggest freshwater lake
east yangtze river river <
0.09484851457843171 0.010011054108243104 0.10096056035519875
6526
thousands left homeless after fire in bangladesh slums
# <oov> <oov> landslide hits
0.09483398508784066 0.010009520552160346 0.10094509458308551
6527
us defends herbicide to colombia
us <oov> defends < of < drug rating system
0.09485349622846469 0.010007987465845112 0.10096792425155188
6528
iraqi oil minister in turkey for pipeline talks
kuwait 's < of economic ties with
0.09483897042506063 0.010006454849081584 0.10095246208857307
6529
senate debates patients rights
congress launches rules of < control control in beijing
0.09482444906991977 0.010004922701654071 0.10093700466060056
6530
nato allies draft options to respond to attacks from croatia
military develops bosnia on in fighting against
0.09480993216099907 0.01000339102334702 0.10092155196545961
6531
us cites rising rights abuse in northern caucasus
u.s. department sees

new yorkers hold funeral for firefighter killed in iraq
new us man s death
0.0947023342082047 0.010063099788256025 0.10084381515030821
6576
neutrinos thought to account for missing mass in universe
< < > of of of of # .
0.09468793738026184 0.010061569976795361 0.10082848468281805
6577
kenyan mp dies in flood havoc
vanuatu 's leader of of # . # million in asia
0.09468874480732062 0.010060040630393658 0.10083215872375394
6578
turkey condemned for violating freedom of expression
court 's supreme of condemns 's of property of iran
0.09468955198896084 0.010058511748838888 0.10083854549945809
6579
france probes < unk > theft from kiev mayor 's daughter
customs de france 's # of < unk > dies at least #
0.09473360697036586 0.010082308763717756 0.10087387368481501
6580
lyle < unk > # # nuclear physicist who worked on a-bomb project
dprk 's unk unk > #
0.09479517889273438 0.01014154876542488 0.10094970566997381
6581
< unk > fight between airport business town goes to court
<oov> insurance carrie

coca-cola enterprises profit skids dragging shares down
starbucks stock markets plunge as financial exchange remain say
0.09459616345446564 0.010090972696217579 0.10075385548050877
6625
spanish bank santander reports # # percent rise in nine-month net profit
vanuatu ricard beats third-quarter net loss to to
0.09460075132779378 0.010089449990212414 0.10076020877572178
6626
anc involvement in kwazulu\/natal killings denied
president leader president 's of xinhua
0.09458647843230075 0.010087927743684019 0.10074500657162165
6627
british jobless rate hits decade high
major economy 's says economic crisis says financial economy
0.09457220984300639 0.01008640595642445 0.10072980895409689
6628
corn tops $ # a bushel on supply demand worries
rice rice boosts economy to grow to boost economy
0.0945579455579622 0.010084884628225894 0.10071461592107214
6629
prodi determined to join single currency
italy 's minister says < of china unk > relations
0.09454368557522083 0.010083363758880663 0.10069942

evans says no worries about us current account deficit
us <oov> < china of < of unk > interests
0.0944425471998671 0.01008945300533751 0.10058775387146994
6674
bush signs executive order to create office of homeland defense
president 's secretary of china 's secretary state state of reporting in china
0.09443992295327082 0.01008794170320969 0.1005914105889847
6675
mountain man ledger 's steady climb to the top
no 's unk > takes the way for the unk > 's a article for a <
0.0944433986363426 0.010086430853770837 0.10060357574867156
6676
four cases recorded of chinese people buying taiwanese real estate
chinese provincial ministry <oov> < unk > on # # years
0.09444286938990246 0.010084920456817593 0.10060348536595987
6677
centennial technologies still has a viable business ceo says
dr. executive chief <oov>
0.09442872911899515 0.010083410512146711 0.10058842270907022
6678
five face death sentence in egypt
military agency says most in china in indonesia
0.0944333056565522 0.0100819010195550

european water police organization to increase operations in # # # #
< l < exercise
0.09432553677479875 0.01007639635350105 0.10038195011337087
6724
record sales in july for u.s. automakers
computer companies service sees to to in the year
0.09432803239988592 0.010074898227370585 0.10038561024567635
6725
wall street rally stumbles but analysts say momentum still strong
stock stocks index in # . #
0.09431401009686824 0.010073400546647028 0.10037068745539157
6726
animated oscar race to feature only # nominees
first world wins winners in #
0.09432476405890299 0.010071903311131771 0.1003805411978427
6727
british tourism industry bounces back
uk business becomes overseas bank says china 's # years
0.09431074640931777 0.010070406520626327 0.10036562359623803
6728
a quake in the bay area raises concerns
< hits north korea of the national city of <
0.09431159176646349 0.010068910174932326 0.10036928397906178
6729
sweden beats us # - # to win bronze at hockey worlds
sweden world world title at 

drama in london 's churches
world world s history is the a century in china
0.09452711554442979 0.010033032990447971 0.10057273776550489
6773
muslim cleric says detainees questioning their actions some have
al-qaida human defector in the
0.09451316320265202 0.010031552099969676 0.10055789308096386
6774
craig ferguson says he will be the entertainer at white house correspondents dinner
president s world # . . <oov> <oov> on president 's presidency
0.09449921497903888 0.010030071646590106 0.10054305277797079
6775
health care reform is rife with contraction
economic insurance of chinese care system in china # years
0.09450002666341559 0.010028591630115768 0.10054297264623434
6776
congolese tribal rebels attack zambian villages
pla damages <oov> claims in # of south korea
0.09448608449365113 0.010027112050353284 0.10052813892350695
6777
court blocks dismantling work on ghost fleet headed for uk
retired anti-doping department chief <oov> to
0.09447214643722782 0.01002563290710939 0.10051330

police chief of northern iraqi province mosul replaced due security complaints
national officer officer official says official of his party in # years
0.09454061329647948 0.010055740973474173 0.10055857163824994
6824
world bank announces new strategy for mexico
bank development development of philippines 's philippines
0.09454769160435533 0.010054267820679934 0.1005731396763926
6825
obama vows swift engagement with iran
obama says <oov> of economic of in # nd # years
0.09454715863223065 0.010052795099452356 0.10057468324918221
6826
rangers best pitcher big bats fizzle vs. giants
texas 's <oov> <oov> giants at
0.09455772095033763 0.010051322809601821 0.10058924466053999
6827
shenzhen a-share market down
shenzhen stock indices up # . # points
0.09456479364354406 0.010049850950938823 0.1005989206631767
6828
bell hits for cycle as phillies rout expos # # - #
ucla . #
0.09462415458151718 0.010048379523273972 0.10065739812720845
6829
early strike for new zealand as west indies face # # # -ru

switzerland bans u.s. military planes from airspace
world war currency summary
0.09464951019034591 0.010043957289962102 0.10061268070922004
6875
two jackson jurors say they regret acquittal ; contradict early impression of harmony on panel
judge panel panel of
0.09470845311455846 0.010042496775596833 0.10064652114147599
6876
wall street sputters mulls economic hit from katrina
u.s. sees earnings recovery on economy economy
0.0946946833481853 0.010041036685923148 0.10063188803284825
6877
hingis enters bausch and lomb championship top eight
stanford loses wins first second time of
0.0946809175852331 0.010039577020755839 0.10061725917864955
6878
with sports-track-worlds iaaf congress turns down proposal to halve doping ban to two years
committee committee committee < unk > on # # years
0.09468169070767711 0.010038117779909799 0.10062339869662608
6879
sablikova breaks pechstein s # , # # # -meter record
veerpalu 's champion 's title in
0.09466793083400937 0.010036658963200032 0.10060877532

smithsonian official david < unk > # # dies
former china chief of # unk > at us dies #
0.09459124552044634 0.010017656026253525 0.10054355664462601
6923
levens stays with packers
ucla of major president of #
0.09457758613481161 0.010016209433325547 0.10052903771947877
6924
u.n. haitian police find # # skulls in an upscale haitian suburb
police forces find < of in in china
0.09461807449950484 0.010014763258125817 0.10057640162228731
6925
seven soldiers killed in clash with kurds
kuwait navy colonel says < # of
0.09460441518457781 0.010013317500473426 0.10056188214753312
6926
nepal 's supreme court to rule on reinstating parliament
ruling ruling court chief 's # in china
0.09462684526321745 0.01001187216018756 0.10056798712165402
6927
house panel questions how u.s. agency handled mad cow case
hong unk 's ruling chief < unk > rating in china 's # # th # century
0.09461318862513644 0.010010427237087519 0.10055347305221808
6928
authorities investigating sect 's money deals
swiss spy organiz

police battle their own in the interest of rugby
football football official declares a death in < unk > contributed
0.094467277888515 0.00994726091005011 0.10041069148546451
6972
us consumer confidence edges higher in december
consumer confidence rates for
0.094525427117381 0.009993631152726232 0.10046798848983998
6973
marlins take third straight from slumping cubs # - #
seattle baseball 's < #
0.09454054891994482 0.009992198374066343 0.10048225831227872
6974
california air regulators pass strict emission rules for ocean-going ships headed into ports
california korea <oov> <oov> of unk > system
0.09454491524034048 0.009990766006180152 0.10048833269366415
6975
without the hebron and oslo accords israel would be at war
east korea < unk > economic policy of <
0.0945313642993572 0.009989334048891034 0.10047392989408072
6976
new life jazz orchestra swings with spirituality
national china city china 's top cities
0.0945178172422779 0.009987902502022462 0.10045953122255677
6977
leitner and re

albania 's parliament votes to dismiss attorney general
cambodian parliament declares national commission of democracy 's <
0.09452195012425557 0.00992673175603372 0.1004418007465141
7020
actors work double time to keep up with the stage version of the # # steps
lee s character <oov> at <oov> at
0.09450848929399008 0.0099253180944336 0.10042749687286748
7021
wall street heads for moderately lower open
u.s. economic markets exchange rise on the #
0.09449503229708078 0.009923904835414032 0.10041319707265776
7022
williams reaches quarterfinals at j & amp ; s cup
u.s. wins cup cup s # s
0.09451717138701572 0.009922491978803067 0.10042737514824537
7023
us president condemns bombing in philippines
president <oov> president 's death
0.09453218673628447 0.00992107952442886 0.1004486666250926
7024
key lawmaker shifts position backs kennard for fcc chairman
former president defends congress 's # of # congress
0.09451873211249621 0.009919667472119661 0.10043436991763101
7025
schroeder bush to hol

word for word schumer meets his match
president is <
0.09457682116575845 0.010007626165833958 0.10056483300321947
7069
russian teachers launch new strike for back-pay
indonesian president economic council system for < unk > contributed reporting in beijing
0.0945743245253621 0.010006210860196024 0.10056148952194664
7070
mugabe to be sworn in saturday
zimbabwean president leader 's in in south korea
0.09457862679847785 0.010004795954814207 0.10056747024215806
7071
arabs fearful of the damage they 'll suffer
< currency banking policy is n't not to to the <
0.09457810798950156 0.010003381449518744 0.10056896093081477
7072
china holds qatar to # - # draw
uzbekistan cup results results of # nd world
0.09458240851141426 0.01000196734413996 0.10057493890394544
7073
england beat scotland in six nations
world 's international cup world 's cup tournament
0.09456903997310877 0.01000055363850828 0.10056072336487774
7074
us stocks opens higher after steep losses
stocks mostly mostly on on financial

india 's bharti s africa 's mtn in fresh merger talks
malaysia electronics group group group to in china
0.09454883833959102 0.010028741311378531 0.10057332546645349
7116
more than # # # treated after us cruise ship accident
national park suffers <oov> at the unk > in australia
0.09453555527716624 0.010027332384529504 0.1005591960304509
7117
poking fun at elderly people is america 's mindless pastime
world 's china official published of # # years
0.09453788363168708 0.010025923853502037 0.10056262920982575
7118
# # # african immigrants aboard # boats arrive in spain 's canary islands
two <oov> small # of spain 's largest largest # years since north #
0.09457476635268584 0.01003531952106799 0.10056127337579475
7119
turkey committed to transformation as part of eu bid foreign minister says
vietnam stresses economic reforms in china 's currency system
0.09456148524520759 0.010033910264008438 0.10054715158484183
7120
software innovation to enjoy more government support
< unk < < unk > on d

relatives survivors mourn jonestown tragedy on # # th anniversary with < unk > < unk > memories
new world < 's unk > becomes # dies at age #
0.09445994568554804 0.01005229010373191 0.1004540339045353
7163
georgia holds tense parliamentary elections
georgia election of <oov> in
0.09447467563032326 0.010050887132328738 0.10046792727035464
7164
tropical storm < unk > gathering strength heading toward hainan
hainan volcano 's major of of china s city island island contributed
0.09447312087979341 0.010049484552488894 0.10046786197210313
7165
kenya s victor kigen wins belgrade marathon
mario clarin winner in in china #
0.09445993919695822 0.010048082364048474 0.10045384385267071
7166
general assembly to meet on global nuclear test-ban treaty
nuclear <oov> agency says not be a unk > says it is no a policy
0.09445606178728604 0.010046680566843668 0.1004505611005879
7167
kenyan under- # # soccer team leaves for djibouti
< 's team world 's title in #
0.09447775852856274 0.010045279160710756 0.10

paralegal sought in jewel theft from law firm 's vault had troubled history of criminal activity addiction
< s > unk of obama s > < < unk > and the unk > < > < unk > contributed
0.09436097658762341 0.009999406268438447 0.1003501230091265
7212
siemens to pay # # # million dollar corruption settlement
swiss bank insurance firm <oov> for < unk > for < unk >
0.09434789633026444 0.009998020157228516 0.10033621254017597
7213
gm backs global outlook except for north america
general motors says sees china # . # <oov> dollars in central bank
0.09433481969875643 0.009996634430248999 0.1003223059272113
7214
berlusconi speaks out for former socialist premier
argentine prime president <oov> in china 's elections
0.09432174669159196 0.00999524908734015 0.10030840316862936
7215
microsoft will likely continue to dominate software market
computer computer development on
0.09430867730726447 0.00999386412834232 0.10029450426282797
7216
ekeus determined to end iraq-un crisis
u.n. bank council warns
0.0942

rangers consider parting with ivan rodriguez if right deal comes along
usc ucla loses for a loss in st. #
0.09434210291767488 0.009991398457183243 0.10032657509446162
7261
vanderbilt 's < unk > is grateful for a second life
china national boxing becomes
0.09432911350518448 0.009990022799953835 0.1003127617149911
7262
serbian deputy prime minister arrested on suspicion of passing
former chief former president leader 's > # dies at #
0.0943161276690742 0.00998864752148468 0.10029895213876382
7263
two injured in boat crash
hainan tourists east asia
0.09430314540786716 0.009987272621619369 0.10028514636420927
7264
five presidential hopefuls named in central africa ; not patasse
<oov> of s. president president 's <
0.09429016672008739 0.009985898100201585 0.10027134438975782
7265
turkey prepares for major anti-war protests
< military < > < unk in china of # # years
0.09427719160425965 0.009984523957075095 0.1002575462138407
7266
major earthquake jolts new zealand 's kermadec islands
quake o

brazil reaches semifinals at u # # world cup
brazil champion win # - #
0.09426690039266378 0.010021997665718317 0.10026101023152739
7312
chinese general meets us army chief of staff
chinese leader leader general resigns of # st # years
0.09429502906365192 0.010020627417199623 0.10029857367010662
7313
thailand raises annual export growth goal
shenzhen korea china achieves first currency year in year
0.09428213842399866 0.010019257543321674 0.10028486231348734
7314
nkorea says us recognized it as nuclear power
south korea military agency policy on policy of north china
0.09426925130830374 0.010017888043930842 0.1002711547051886
7315
hingis chang continue to advance
sorenstam < < unk <oov> <oov> her < of her time of <
0.09425636771512234 0.010016518918873589 0.10025745084367362
7316
congress agrees to satellite export control shift
state state 's system
0.09424348764301041 0.010015150167996454 0.10024375072740638
7317
dow chemical # q profit rises # percent on sales increase
opec 's stock

girl # found in ravine near dead mother more than a week after california crash
# man -year-old woman of the in in # years of china s famed city
0.09443871220557906 0.010098636491781162 0.10048920995443157
7363
# # # # world figure skating championship program
international team federation <oov> title of national team
0.09442588956984171 0.010097265325930275 0.10047556579829382
7364
un official glad about new somali force but wary
senior chief praises < on < of china 's <
0.09441307041567801 0.010095894532375303 0.10046192534678713
7365
indonesia does not object if u.s. cancels f- # # sale
indonesian national <oov> development in < unk > policy
0.09440025474167019 0.010094524110964637 0.10044828859840288
7366
oil prices steady on lack of new market cues
crude price reserves reserves from iran 's says 's economy will remain remain
0.09438744254640123 0.010093154061546754 0.10043465555163328
7367
ensure the program that helps ensure our prosperity
chief 's chief of
0.09437463382845492 0.

lebed optimistic over chechen peace process
dprk confirms < 's <
0.09448822443969564 0.010091227289651535 0.10046704115383533
7413
the hague wants to amend tough eu action project against drugs
philippines world agency issues <oov> in # # # # dollars from
0.09447548159081638 0.010089866368911191 0.10045349199117129
7414
haitian senate votes to dismiss prime minister
zimbabwean prime leader declares economic economic of in # # # #
0.09447397914364035 0.0100885058151937 0.10045680193022317
7415
ji commander visited australia to set up terror network experts warn
indonesian official <oov> says to be held on # years
0.09447472419161883 0.010087145628350612 0.10045674034171971
7416
england # # # - # at tea in first innings of third test
first england 's # nd world 's second
0.0945125410257801 0.010085785808233551 0.10048171439744533
7417
sri lanka landmine attack leaves at least six sailors wounded
sri navy aircraft rocks ne southern philippines
0.09451905734898153 0.010084426354694228 0.10

repair mission set for hubble telescope
china unk world world < unk > # dies in
0.0943664835258093 0.010091631147247086 0.10032771283466514
7463
# # killed in rio de janeiro flooding
hainan disease of in of central korea s < of national test
0.09436500554185855 0.010090279287749799 0.10032766893475427
7464
philippine military releases report on < unk > operation
philippine navy hospital in philippines after
0.09437468966469872 0.010088927790390067 0.10033655441529697
7465
palestinian film among movies submitted for foreign film oscar
dprk winner 's first title at
0.09436205076156966 0.010087576655022398 0.10032311708378294
7466
thousands demonstrate to back niger president
liberian <oov> <oov> < unk > < in # # years
0.09434941524325664 0.010086225881501373 0.10030968335091152
7467
beijingers eat more fresh water fish
hk kong reports most
0.09433678310840014 0.01008487546968165 0.10029625321523727
7468
iraq olympic spokesman says talks under way to allow beijing team
anti-corruption com

phish swim toward the mainstream
< < unk > classic ; # . a a time at
0.09425934233903673 0.0100748933828265 0.10024575080155813
7514
stadium breakthrough boost gunners as wenger vows to fight fa charges
< 's > makes first currency in china city
0.09424680118119491 0.01007355292335566 0.10023241315509704
7515
a year after metrolink tragedy emotional and physical scars remain for victims
<oov> 's 's a a chinese of central america 's city
0.09424635715960757 0.01007221282053228 0.1002357080316229
7516
canadian soldier killed in car crash in afghanistan
air air helicopter in vietnam 's first
0.09425282307949284 0.010070873074214039 0.10024454428576429
7517
serbian reformists agree to form new government fail to end deadlock over premiership
former parliament leader hails government 's state state of # # # #
0.09425051828548349 0.010069533684258697 0.10024598950013128
7518
< unk > chides < unk > in criticism of kelly firing
<oov> baseball player is of of of the year 's top article
0.0942490

archdiocese places liens on home of priest accused of stealing
san angeles becomes taiwan s largest city of # dies #
0.09415478560490274 0.010009627680055677 0.10017449355126402
7563
law to segregate omaha schools divides nebraska
south korea ruling becomes ruling of s.
0.09414233949973355 0.010008304530329299 0.10016125171470734
7564
chinese strongmen are climbing up tough road
chinese women becomes most in china 's most cities
0.09414458226626955 0.010006981730364942 0.10016453465791185
7565
< unk > meeting on drug control ends in yangon
chinese <oov> military unk > <
0.09419821718337458 0.01003208983374404 0.10019534856020806
7566
trading stocks adrenaline highs lows
hong 's 's sports stocks of < unk unk > and
0.09419778257366604 0.010030764240478482 0.100196790917839
7567
german export booms despite economic slowdown
germany gdp bank says it is no half in china
0.0941853373652404 0.010029438997481986 0.10018355313333406
7568
u.s. files to be presented at fujimori 's trial
former mi

new white house spokesman debuts
new house of president president of of of secretary of commerce state of the article
0.0940623287700658 0.01001525773089117 0.10011698245008266
7612
diana 's lawyer threatens to halt divorce proceedings
prince founder becomes founder s prince of the state state says
0.09404997490497911 0.010013942356878707 0.1001038333848804
7613
hewlett plans to cut prices on computers
economist vice executive calls of financial to to # # # # #
0.09404772580478452 0.010012627328335454 0.10010710274359544
7614
hewitt beats henman in straight sets to reach final
roddick wins world <oov> #
0.09403537710129124 0.010011312645125326 0.10009395842863436
7615
clinton accuses obama of political plagiarism says he represents change you can xerox
president <oov> obama clinton of the party 's nation of nation s nation 's top city
0.09404764763075149 0.010009998307112312 0.10010269844547012
7616
asian clay shooting championship kicks off
world cup championships championships opens 

baystars beat swallows # - #
yano defends < 's
0.09429319174718576 0.010103865074727637 0.10025757765276067
7661
militants battle nigerian forces in oil region
oil military forces <oov> of of of power in north korea
0.094316476894123 0.010102546548683694 0.10027349369668204
7662
iowa man charged with threatening to kill kobe bryant accuser
<oov> leader describes chinese leader 's ruling court
0.09430417046446563 0.01010122836672275 0.10026040999447736
7663
moroccan militant leader deported from italy
s. china defector official official in # years years
0.09429186724587926 0.010099910528710131 0.10024732970615452
7664
< unk > < unk > disgusted but not surprised at latest spying revelation
state of the central korea is not be be in beijing
0.09429142597581185 0.010098593034511239 0.10024729744295258
7665
the afp sunday economics news advisory
president <oov> < china unk > policy policy
0.09427912762887357 0.010097275883991542 0.10023422227698897
7666
authorities drop charges against prin

vietnam to spend # # bln usd on social security in ten years
vietnam achieves # . # million usd in corruption of corruption
0.09425317868515924 0.010068478332873217 0.10016408074725958
7710
new airport in central china to open
central central city city
0.09427337407174052 0.010067172772923415 0.10021592669114608
7711
< unk > to close us plant cut # , # # # jobs
us develops < of development in south china
0.09429356422160805 0.010065867551508541 0.10021913997693746
7712
mtv to launch pan african music channel
international international organization <oov>
0.0942813405290722 0.01006456266849694 0.10020614812576077
7713
diplomats welcome sri lanka lifting censorship on foreign press
sri lanka military agency praises
0.09432096705654736 0.010095662530756368 0.10024500669372892
7714
philips to sell its travel agency to u.s. < unk > international
roche mitsubishi < of < of unk > in # # # # dollars
0.09433651458729245 0.010104323419091298 0.10027521513419543
7715
nbc to buy one-third of paxso

recount in new jersey house race likely to confirm democrat as
presidential <oov> 's party party of < unk > < unk > campaign
0.09444682349477032 0.010107070694924943 0.10041648109649298
7759
lopez looking for fresh start following horrible season in al
romney makes himself a his to to keep his presidency
0.09443465408058467 0.010105768405182007 0.1004035424956559
7760
few tears for british in basra one month on
iraqi army 's forces iraq of war war war is a than a year
0.09442248780203784 0.010104466450994274 0.10039060722865055
7761
british fm urges saddam to go into exile to avert war
british pm <oov> success on of recovery in indonesia
0.09442463756673049 0.01010316483223207 0.10039198820300098
7762
patrick open to corporate < unk > cut
mexico governor s < rating on financial policy rules
0.09442678677764553 0.010101863548765785 0.10039336882161357
7763
for the love of the game boone signs on the dotted line
<oov> the boxing 's s . . #
0.09443302378422369 0.01010056260046588 0.100398

bush keeps bird flu a top priority visits lab in vietnam
us vice official launches first year of # # months
0.09438020919744965 0.010095833249637396 0.10037958073719468
7806
unqualified imports rise slightly in # # # #
dprk sees first of < of enterprises reserves up in south china
0.09437879438240558 0.010094540238206858 0.10037836781585399
7807
long island train killer sentence to life behind bars
former police of beauty dies at age
0.09436670848224135 0.010093247557935607 0.10036551362609655
7808
watkins glen is dropped in indycar scheduling
taiwan 's ibf first top president #
0.09435462567705798 0.010091955208696435 0.10035266272806503
7809
cyprus president says ready to attend peace talks at any time
eritrea <oov> says no . of its economic program
0.09435854903825666 0.010090663190362201 0.10035581819308513
7810
soggy field delays bangladesh-nz third day play
new test <oov> <oov> <oov> test test in
0.09434647037094505 0.010089371502805831 0.10034297182618894
7811
heirloom jewelry i

jet-setting county chief returns to a storm of criticism
taiwan marine president declares bid of china to step
0.0943694826306484 0.010120552708628718 0.10033206617136994
7853
zambia tanzania to create joint investment promotion network
zambian central economic bank issues in s. africa
0.09435746869269415 0.010119264286896237 0.1003192931521247
7854
foreign minister faces formal probe
minister acknowledges no of policy on spying in pakistan
0.09435960128465168 0.010117976193173363 0.10032066685604005
7855
ic card producer gemplus to expand production in china
rok < bank world unk > in china 's largest currency
0.09437073257910675 0.010129415931471292 0.10033103945562795
7856
chinese premier meets malaysian chief of armed forces
chinese premier leader leader leader in china kong
0.09439053777984752 0.010146306708631595 0.10036069105831028
7857
guo inks all-chinese final in # # # # women 's table tennis world cup
world cup champion champion champion defends china title in world cup
0.094

rumsfeld avoids discussing the politics of sandinista leader daniel ortega
former chief <oov> <oov> at presidential presidential presidential election
0.09445533975183416 0.010154612215443415 0.1004787945730489
7902
a mediocre honeymooners
world < < > classic on <oov> of the unk > < unk > < > contributed contributed reporting from < article
0.09444338943050928 0.010153327471995105 0.10046608217495008
7903
results of women 's # # # m breaststroke swimming
us champion women wins
0.0944630676861158 0.010152043053592576 0.10048499854659146
7904
nigeria recalls envoy after kadhafi partition comment
nigerian <oov> chief warns in
0.09445111941041556 0.010150758960112485 0.10047228857966171
7905
families friends painfully accept loss
tourism world cup #
0.09443917415691734 0.010149475191431554 0.1004595818275965
7906
supersub huckerby has canaries singing
reid defends # of the # title
0.09442723192447464 0.01014819174742657 0.10044687828917621
7907
north ireland unionists set paramilitary dead

n. ireland 's paisley slams pope visit to britain
<oov> chief <oov> of of of china leader says
0.09429534274601488 0.010126396790139902 0.10031422015288378
7952
us says it is too early to remove north korea from terror list
china defends <oov> nuclear policy on nuclear issue
0.0942834876614353 0.01012512367010091 0.10030160835754146
7953
six killed in plane crash
south african national national traffic suffers injuries reported reported say say say it 's no injuries
0.09427163555739237 0.01012385087014238 0.10028899973298364
7954
ira-linked men accused of training colombian rebels set to go to trial
ira marine colonel <oov> <oov> to in in # # years
0.09427121291477696 0.010122578390143619 0.10029210569078491
7955
hall unexpected hero with the bat for south africa
lee < wins world title title
0.09425936533240738 0.010121306229983994 0.10027950142967007
7956
doldrums for < unk > italian fashion
central economic bank crisis crisis crisis crisis
0.0942475207275654 0.010120034389542931 0.10

european stocks close firmer
asean exchange rates index
0.09420117544845125 0.010093107655170245 0.10021945684138235
8003
chongqing police arrest amphetamine dealers
shenzhen customs seize large of unk > toothpaste of hong
0.09418940765639022 0.010091846804744864 0.10020693723403178
8004
un unk > china defends policy of its stability in
0.09417764280407241 0.010090586269295859 0.10019442075423737
8005
top-seeded davydenko advances to semis at st. poelten
gasquet upset # #
0.09416588089039638 0.010089326048705213 0.10018190740082732
8006
as complaints mount government blasts demoralizing media
prime pm <oov> prime prime ministry of china of < unk > region
0.0941541219142612 0.010088066142854974 0.10016939717263042
8007
israeli and palestinian police hold first joint workshop look to future cooperation
israel forces guard launches < < unk > operation
0.09414236587456658 0.01008680655162725 0.10015689006847601
8008
kenya hockey union names team for south africa
world boxing world champion

accor plans to build new hotels in south africa ahead of world cup
royal < company 's largest < unk > bank says < unk > bank unk > < > contributed
0.0942629340562815 0.010103096196694725 0.10026809258485547
8052
netflix to take jaws field of dreams on location
<oov> classic of < of unk > event at
0.09426502595807623 0.010101841777003058 0.10027116334564702
8053
iniesta toure sidelined for # weeks
deportivo world < unk > in world 's world title
0.09425332328570404 0.010100587668775 0.10025871503238251
8054
italian man dies month after robbery in colombia killed his wife
hainan 's recovering recovering of # nd # -year-old leader dies in china
0.09426072060826617 0.010099333871894567 0.10026883909727646
8055
british balloonists on course in round-the-world bid
second world world < first round in s. china
0.09426281200587108 0.010098080386245828 0.10027190862202547
8056
greece aims to improve national healthcare system
cambodian minister defends office on financial ministry
0.0942511139651

chinese president arrives in toronto
lu reaffirms national commission secretary says june #
0.09413139290801348 0.010082621616718006 0.10014755982133065
8101
canada gains # # , # # # jobs unemployment at # pct
canadian trade sector index falls in july #
0.09413520243622425 0.010081377309471713 0.10015062688787127
8102
rebel group disavows legitimacy of colombian president-elect
colombia military commander resigns after president 's president
0.09412358654253766 0.01008013330931013 0.1001382687157479
8103
first chinese graduate of yale honored
china 's university 's
0.0941119735152036 0.010078889616119592 0.10012591359314262
8104
chechen rebel representative says they are not behind moscow theater raid
russian communist president 's unk > in in unk > < unk > in russia
0.09410036335316126 0.010077646229786491 0.10011356151892684
8105
macedonian opposition stages protest rally
east 's < unk >
0.09408875605535033 0.010076403150197272 0.10010121249197249
8106
# # accused of aiding health < 

ukrainian journalists freer than ever but murky media ownership still big problem experts warn
chinese election watchdog body says on < unk > to become control in # # # #
0.09408975971790984 0.010020780218185634 0.1001075218508844
8151
body recovered off penghu might be victim of ship sinking
coastal flag develops off in china china province
0.0940935509898689 0.010019551127026774 0.10011276527306104
8152
more than # # # couples join during first month same-sex civil unions are offered in new jersey
dprk state agency rules to on < unk > law
0.09408201143247498 0.010018322337337416 0.10010048752406998
8153
merck kgaa s q # net profit jumps to $ # # # million
chinese company reports first # <oov> in china 's
0.09408409961146684 0.010017093849006658 0.10010183769250494
8154
us customs holds anti-drug trafficking seminar in phnom penh
vietnam <oov> issues committee on <oov> on < unk >
0.09407256404260815 0.01001586566192365 0.10008956429406299
8155
injury forces button to skip testing
< < 

former academia sinica president wu < unk > dies
former dprk leader becomes president in beijing
0.09399508914820215 0.010003246257879713 0.1000449039074684
8200
life term urged in canada trial of rwandan militia boss
rwandan leader <oov> <oov> war of s. africa
0.09401410949822066 0.010002026647265487 0.10005012363911307
8201
cheney treated for irregular heartbeat
us national official stresses in of china of president president
0.09400264855594366 0.010000807334008476 0.10003792686675674
8202
pakistan campaigns end on angry note with < unk >
former leader pm president dies of of state # # # million dollars
0.09399119040765551 0.00999958831799994 0.10002573306777249
8203
corporations winners in supreme court decision
fed supreme <oov> of # of china s # .
0.09399327693059316 0.009998369599131203 0.10003095322740556
8204
china russia makes progress in talks over oil gas projects
chinese < 's < 's < of
0.09398182271697744 0.009997151177293629 0.10001876325016605
8205
cdc officials to consi

winnie mandela could be ejected from government
dprk leader stresses nation
0.09388239480357764 0.00994262787066677 0.09992444871573358
8250
new pentagon report touts progress in iraq
us policy issues improved situation in south korea
0.09388616572034891 0.009941422995743035 0.09992748743983491
8251
uefa fines juventus basel for champions league crowd trouble
argentina 's < soccer <oov> <oov> # years at the world #
0.09387478971577841 0.009940218412804014 0.09991537942003123
8252
for some local tv stations a gold mine in attack ads
< of a < of the <
0.09388072409343061 0.009939014121743582 0.09993356267912742
8253
government to check major development projects
state ministry chief says china on < of power of xinhua
0.09386935150420064 0.009937810122455665 0.09992145685687677
8254
for guard no holiday from labor
new economy is most a year 's economy in hk
0.09385798166995837 0.009936606414834244 0.0999093539672381
8255
< unk > chief named nigerian oil minister
president <oov> <oov> stat

us treasury to unveil new banking sector plan monday
senate announces economic program to in in south china
0.09388627285003759 0.00993735842672577 0.09990052887967243
8299
turkey on alert on anniversary of sept. # # attacks
dprk army <oov> < unk
0.09387496261357813 0.009936161298858437 0.09988849412134457
8300
engaging quest for tasmanian tiger of myth and nightmare the literate traveler
dr. develops <oov> 's > is a unk > to china open in
0.09386365510182028 0.009934964459386159 0.09987646226225985
8301
sarkozy says eu could send peacekeepers to georgia
france says says < 's < of < of stability with
0.09386329925884875 0.009933767908204732 0.09988163878647939
8302
france calls fatah hamas to keep calm
president says no military
0.09385199587502663 0.009932571645210006 0.09986961065078738
8303
about # , # # # siemens workers demonstrate against massive job cuts
s. china 's bank enterprises enterprises in # #
0.09386745285592334 0.00994642684428945 0.09987478675340102
8304
malaysia on t

s. african leaders to meet burundi rebels
s. president 's mbeki s.
0.09384389511326607 0.009971142897699656 0.09991519593937456
8349
mccain romney look to floriday primary to gain front-runner status
republican senator says # of the of of the house of obama 's <
0.09383265766923383 0.009969948891844346 0.09990323148051462
8350
aid groups assess long-term needs for north korea blast victims aim to get kids back to school
construction of < in in in south
0.09382142291616041 0.009968755171909977 0.09989126988670709
8351
newcastle takes first place with second come-from-behind win over leeds
newcastle wins first round at
0.0938580778397907 0.009967561737793862 0.09992719814363434
8352
bangladeshi pm to attend hague peace conference
myanmar pm says # . <oov> <oov> from # years
0.09386014308198262 0.00996636858939336 0.09993233699265439
8353
iran confirms detention of three americans
military military says in iran in indonesia in indonesia
0.09386220782980179 0.009965175726605881 0.099944313

tough job even tougher for trade center workers as temperature
< damages unk > in central korea 's <
0.09380041440360128 0.009995666001470829 0.09989260398011189
8397
chavez orders investigation of alleged coup plot
pinochet 's <oov> in in south china
0.09378924635807162 0.009994475899553759 0.09988071058756753
8398
cars hit copter on emergency landing in sao paulo
hainan reports casualties on
0.09380784287636233 0.009993286080994288 0.0998985819315452
8399
fda managers attempted to discredit safety officer group charges
china national official resigns on on of china # # # years
0.09379667660533789 0.009992096545691229 0.09988669065884771
8400
breakthrough unlikely as lebanon s political leaders resume all-party talks
mauritanian <oov> leader faces <
0.09378551299231654 0.009990907293543445 0.09987480221673169
8401
wife of poisoned ex-russian spy exposed to radiation friend says
journalist defector 's of china dies in
0.09379135276738078 0.009989718324449842 0.09987991733521799
8402
ka

newman works to protect open space
drought development develops environment to in north china
0.09381550652438293 0.009978040981562816 0.09989015547964188
8446
fiat joins crowd eyeing india 's small car segment
mitsubishi < <oov> > bank of <
0.09380440146915989 0.009976859868757234 0.09987833136085877
8447
chinese experts tell govt to come clean about aids crisis
senior world chief of china state official of state state system
0.09379329904266336 0.009975679035538065 0.0998665100410149
8448
mississippi democrats head to polls with obama riding high
hillary obama becomes obama of the state state race of obama s presidency
0.09379130256667287 0.009974498481806048 0.09986784076303502
8449
redgrave delivers a feast to < unk > kosovo
minister 's minister says
0.0937802043176412 0.009973318207461971 0.09985602348214957
8450
top clinton aide sees swift end to nominating epic
presidential clinton 's first in of of # .
0.09378389809375128 0.009972138212406662 0.09986111116783046
8451
cmgi chief

morocco rejects un report on rights abuse in western sahara
lebanon chief of of of unk > economic situation
0.09372484528738258 0.00997929104163645 0.09979266933257377
8496
< unk > scheme wins international energy award
us < becomes world largest first century of thailand 's # # # # dollars
0.09372166122305912 0.009978116731087894 0.0997907324843743
8497
bosnian police make raids in search of serbian pm 's killer
serbian pm 's <oov> 's brother 's resignation
0.09375475668591086 0.010010560097246643 0.0998378214674918
8498
leaflet seeks resignation of central bank governor
central central < bank resigns of <
0.09379414688260328 0.010009382384294026 0.09987313466496621
8499
noriega fights extradition to france
roh <oov> president president president in vietnam in # years
0.09378311357512385 0.010008204948417742 0.0998613862665819
8500
aussies exact swift revenge over india as martyn hits century
khan knocks khan wins in # nd title
0.09377208286310608 0.01000702778952002 0.099849640631876

iran-eu still short of agreement on tehran 's nuclear program
china <oov> council body
0.09375801288551401 0.010040916034920357 0.09980486220315046
8547
contador expected to seal third tour victory
de de champion quits at
0.09374704575334819 0.010039741521405922 0.09979318775441924
8548
italian professor solves da vinci mystery identifies mona lisa
famed filmmaker author president 's dies
0.09373608118659342 0.010038567282631488 0.09978151603655323
8549
clinton to testify in lewinsky probe monday
clinton 's first first president president 's speech of congress
0.09373681372300009 0.010037393318500669 0.09978655353238068
8550
nba shutdown looms as talks break off
national commission system rules on the of the year
0.09372585291690526 0.01003621962891712 0.09977488531985351
8551
american troops in baghdad seriously wound a woman tareq maher and riyadh mohammed contributed reporting
military defector becomes man 's military is not not out from
0.09371489467384236 0.010035046213784546 0.09

britain 's tony blair in new row over queen mum 's funeral
blair becomes first party on on south china
0.09370555551819416 0.010047263278656344 0.09976237626610555
8598
saudi confirms pardoning indian who faced eye-gouging
<oov> <oov> of in
0.09369465952336647 0.010046094992228593 0.09975077598979554
8599
irish prime minister says u.s. action against terrorists is right
netanyahu defends pm on of cpc of development development
0.09368376606219644 0.01004492697746377 0.09973917841091055
8600
eu warns expansion threatened if latest treaty reform rejected by
bank bank of agency of
0.09367287513380047 0.010043759234267135 0.09972758352850984
8601
israeli officials denounce anti-semitic tv series
international tv organization is out in china
0.09367859224035903 0.010042591762543985 0.09973259684471682
8602
somali islamic courts announces establishment of islamic army
dprk military leader <oov> of in in # years
0.0936806183350674 0.010041424562199662 0.09973553354894221
8603
episode at guant

south korea keep olympic hopes alive
south korean league wins # nd world cup
0.09368939065965547 0.010037718232159912 0.09976901876468985
8646
india can help u.s. health costs ge chief says
world world 's development bank improves system
0.09367855701133682 0.010036557533936952 0.09975748210664584
8647
pakistan raises new objections in mine talks
aga refutes < of
0.09366772586819758 0.010035397104114552 0.09974594811634561
8648
us sheds # # # , # # # jobs unemployment rate holds at # . # percent
us bank economic economy in in <oov> > says contributed contributed
0.09366740695086126 0.010034236942599625 0.09974726200802131
8649
chavez threatens to cut off oil sales to us calls exxon mobil outlaws
argentine president <oov> <oov> in central bank city
0.09365657960061842 0.010033077049299129 0.09973573186560909
8650
dollar dips amid u.s. non-farm employment data
u.s. dollar rates sharply
0.09371510288083101 0.010060812477286957 0.09979355251611007
8651
serbia russia negotiate terms for $ #

asu women fall to trojans
s. korean president < > #
0.09368257936607712 0.010008755841495545 0.09977584613298861
8696
mac urges china to respond positively to president 's remarks
kmt chairman chairman defends of
0.09367180877750893 0.010007605145261757 0.0997643750078871
8697
britain takes new measure to increase pressure on zimbabwe
oecd minister expresses of of china currency policy
0.09366104066522274 0.010006454713586247 0.09975290652012898
8698
defector 's story of ordeal in nkorea camp to be filmed
north china china becomes top # # years of # years
0.09366072434915883 0.010005304546377788 0.09975786103005277
8699
world bank provides ethiopia with # # million dollars to combat food insecurity
china bank bank world
0.09370742464517662 0.010004154643545198 0.0997847056998957
8700
big brother africa winner will be goodwill ambassador
s. korea national president world <oov> on # # years
0.0936966561523422 0.01000300500499733 0.09977323882955556
8701
guinean troops deploy in town afte

host united arab emirates defeats australia # - # to advance to world youth championship quarterfinals
top-seeded champion <oov> world 's world # title
0.09376426610348464 0.00998864658488955 0.09986531111980619
8746
german minister indicates curb on us film imports
publisher world publisher shows on chinese of chinese countries
0.09376624905330261 0.009987504764292283 0.09987022559531343
8747
irish economy grows # . # percent in first quarter
hk 's bank minister 's economy policy
0.09377186008242637 0.009986363204712412 0.09987786034683604
8748
zambia 's opposition leader denies resignation
s. china 's vice vice minister resigns
0.0937774698290292 0.009985221906060444 0.09988549335327258
8749
court orders dresden bridge to go up despite unesco protest
publisher develops second branch in beijing
0.09376675362861449 0.00998408086824693 0.0998740791727957
8750
young liberals in hungary protest the execution of two iranian youths
key parliamentary court ruling <oov> china in china
0.09377

epa offers schools universities rewards for recycling batteries
taiwan unveils development launches # . # million yuan in in # years
0.09376010358460561 0.009969814472183144 0.09989045611622267
8796
senate bill would open gas and oil fields in gulf
senate congress bill passes on control in mexico s # # # # -
0.09377380279018654 0.009968681281176985 0.09991010112834033
8797
maker of rifle distances itself from killing spree
army military forces <oov> unk
0.09376314546517345 0.009967548347743507 0.0998987464174495
8798
gator fans blitz souvenir stores
china <oov> > < < > < unk unk > in the century
0.09375249056227968 0.0099664156717949 0.09988739428717479
8799
recognize then pressurize afghanistan 's taliban government
rice 's > on the of china unk > contributed reporting in asia
0.09375057834620887 0.009965283253243395 0.09988551335762658
8800
four terror suspects released in spain
netherlands arrests two # of the years of the year
0.09373992729209092 0.009964151092001263 0.099874165310

serial killer feared at work in new orleans
rare <oov> becomes the
0.09362865296830614 0.009965644384224056 0.09974940559938507
8844
supreme court orders officials suspended over crackdown during anti-musharraf protest
supreme court says chief says of democracy in democracy
0.09364318988547253 0.009978648494060794 0.09977042808187603
8845
cox news service business budget
financial financial services rates for july #
0.09363260514602577 0.009977520580814036 0.09975915076435801
8846
china reaffirms support for peaceful reunification of korean peninsula
premier reiterates wen stresses stability to china stability
0.09363615028558883 0.009976392922520545 0.09976402169474824
8847
us dismisses syrian elections in advance
<oov> election elections china in china 's < unk > says contributed
0.09364440325387689 0.009975265519093885 0.09977329439902255
8848
au chief warns against rise in somali piracy
s. <oov> council on of < of the unk > < unk > < > contributed reporting
0.09363382196537363 0.00

crash investigation continues as memorials for singer are planned
mysterious disease <oov> dies from # # in # # years
0.0937956802580967 0.009995806557885113 0.0999616496010771
8893
vietnam warns eu anti-dumping measures on shoes would be painful
eu proposes < < < unk
0.09380387261182448 0.009994682802229364 0.09997851731894096
8894
next canadian pm paul martin wants closer us ties economic discipline
economist world 's bank party in central
0.0937933281117557 0.009993559299216524 0.09996727872661644
8895
white house seeks release of intelligence budget reports
state korea chief <oov> <oov> on china of china s house department contributed contributed reporting reporting for <
0.09379527458618521 0.009992436048761402 0.09996407104905118
8896
dutchman deported from kenya free in somalia ; says no links with islamists
liberian dictator of <oov>
0.09378473342248705 0.009991313050778848 0.09995283660636192
8897
preparations for < unk > summit at final stage
world bank chief concludes on eco

russian pilot received conflicting orders before collision
german cargo carrier plane found
0.09358808315958135 0.009941038077359968 0.09974655761786125
8942
chinese president ends state visit to libya
chinese president expresses new tour tour with <
0.09360557107514939 0.00995589900141853 0.09976735007404378
8943
greece iran sign bilateral agreements
algeria algeria military <oov> accord to be cooperation
0.09359510650599621 0.00995478598867382 0.09975619665312996
8944
turkish consortium to buy albanian albtelecom for euro # # # million minister of economy says
dutch <oov> < company to < company 's company of china < unk > group
0.09359954851659617 0.009953673224758253 0.09976536968970105
8945
thousands attend deputy 's funeral
founder becomes <oov> becomes china s party of dies in china
0.09358908695981552 0.00995256070958839 0.09975421898335371
8946
three killed in road accident in nairobi
# th in indonesia in indonesia #
0.09361055826052568 0.009951448443080838 0.09977100997363272


italy 's left-wing leader calls for parliamentary debate on iraq
roh <oov> < unk > chief 's policy policy
0.09366901401857562 0.009979062523065043 0.09981337214882631
8990
vietnam stock market closes higher
taiwan 's stock market ends higher
0.09371420207306644 0.010000194744759543 0.09985787688946812
8991
israeli politicians strike deal to avert early elections
israeli army forces cabinet has a currency of < unk > in # years
0.0937117239644659 0.009999082747123075 0.09985471561898432
8992
britain u.s. wanted peaceful solution in iraq blair 's office says
british minister says <oov> china to # years in iraq
0.09373466017483233 0.010010324911717692 0.09986585029592239
8993
beijing to host week-long hi-tech fair
world world bank published published in china
0.09372423942328426 0.010009212035129396 0.09985474792234864
8994
new swine flu cases hit europe belgium sees first
irish netherlands reports < unk > to to in asia
0.09371382098848843 0.010008099405956972 0.09984364801706602
8995
savi

british artist beryl cook dies at # #
robin herriot dies of age
0.09370205146634616 0.010010645157008726 0.0998686807805246
9039
eu seeks central un role in iraq ; tells arafat to back palestinian government
european <oov> agency sees < on < of development in south china
0.09370090461111633 0.010009537907240226 0.09986768978507372
9040
must credit the san francisco chronicle food community mourns in katrina 's wake
<oov> <oov> < of the of unk > 's new < unk > classic
0.09370634104533951 0.010008430902384305 0.09988122158472393
9041
bank of england says growth outlook uncertain
uk economy sees china of economy in china # years
0.09370703701558773 0.010007324142359714 0.09988399928885035
9042
originality left behind in road trip
the the women of chinese women s # # -year-old team 's role at #
0.09369667577752763 0.010006217627085237 0.09987295506071138
9043
german business confidence dips in march
oecd sees second consecutive round in central china
0.09370013662044885 0.0100051113564797 

arson attack on turkish association
police police find control
0.09369705384338003 0.010044942782962883 0.09990369884384721
9087
russia hails us invite to middle east leaders
china hails middle state east korea state of peace unk > contributed reporting
0.09371213494987424 0.01004383760716984 0.09992021290492721
9088
precede vatican city pope arrives for beatification ceremony of missionary priest
clinton president dies of #
0.09372382778431321 0.010042732674539789 0.09993122278249544
9089
georgians asked to turn off heaters during major soccer match
coastal coastal carrier <oov> <
0.09371351826635212 0.010041627984992484 0.09992023045791261
9090
north south yemen to hold ceasefire talks
lebanon bank <oov> process process in philippines
0.09370321101621283 0.010040523538447721 0.09990924055135103
9091
interstates changed america but now struggle with mid-life crisis
china the war of chinese city is a good to the world 's economy
0.09369290603314716 0.010039419334825325 0.09989825306201

three killed # # # injured as police open fire in northern india
indian official becomes # in pakistan india s <
0.09357138820691546 0.010015394794110639 0.09977532511111019
9136
u.s. official warns of looming crisis for overcrowded asian cities
s. asian development chief of asia-pacific development of regional issues
0.09359397833733711 0.010014298778046498 0.099791764668441
9137
ex-taiwan leader s graft trial to open
new presidential <oov> <oov>
0.09358373717546631 0.010013203001837062 0.09978084533758769
9138
oracle posts plunge in profits revenue
china technologies earnings outlook for second day in china
0.09358565483125794 0.0100121074654036 0.09978360454488117
9139
nashville 's suspended defenseman karalahti works out with former finnish club
dowie degeneres inc. wins <oov> .
0.09357541682066488 0.010011012168667422 0.0997726884958116
9140
bomb discovery raises concerns school killers had help
largest military unk of <
0.0935651810498466 0.01000991711154987 0.09976177483485166
9

fiba orders replay of korac cup game
< 's soccer chief <oov> unk > < > dies at < # # # # # # s <
0.09351624740060781 0.010058459503923707 0.09969907315537753
9186
new research shows dinosaurs coexisted with their ancestors
science science finds a on in china of the < unk > unk > < unk > < unk > < unk > contributed unk > < unk > < unk > contributed reporting
0.09350606931534436 0.010057364765187973 0.09968822214610942
9187
international rugby board orders stricter refereeing on tackles and < unk >
interpol < <oov> unk > on <oov> policy policy
0.0935442604542201 0.0100698734859666 0.09972401311426976
9188
white house condemns slaying of baghdad governor but presses for jan. # # election
official official official praises in of china of iraq economy
0.0935449629285994 0.010068777743476288 0.0997287065995519
9189
dinar firms up slightly after baghdad accepts resolution # # # #
kuwait kuwait exchange exchange currency reserves to # years
0.09354687416221734 0.01006768223942412 0.09973339906

georgia ukraine urge european union to expand into former soviet states
african union chief praises to unk > to be up on #
0.09354197871898053 0.01009351967459571 0.0997384998243855
9233
# # # # economic growth forecast for venezuela
s. korean official official reaffirms stable in # years
0.09354388116969975 0.010092426710905985 0.09974123523317549
9234
venus williams ousted at australian open
davenport beats wins second title title title title
0.09353375298854236 0.010091333983890945 0.09973043605222777
9235
pension transferred to ira still protected
state government files of
0.09352362700034397 0.010090241493473723 0.09971963920952427
9236
possible anthrax finding at mail center closes dozen postal facilities
blackwater department guard <oov> < on # . # million
0.09351350320439243 0.010089149239577481 0.09970884470430566
9237
turkish president vetoes law allowing pro-islamic ex-premier to serve prison sentence at home
indonesian prime president minister <oov> president of corruption


strong demand in asia seen driving chip recovery
china develops economic industry in world # years
0.09348101696743671 0.010084485076045573 0.09964972947231572
9282
movie studios win $ # # # million court judgment against < unk >
dprk < 's <oov> <oov> unk > unk > unk > unk > to unk > published from < article
0.09348710475104642 0.010089067916833092 0.09967130963932645
9283
world court says united states violated rights of # # mexicans on death row
us justice supreme chief justice justice <oov> death in # # years
0.09350396128257565 0.010097772281183363 0.09967254171272136
9284
middle east economic briefs
syria hails state state process process of palestinians
0.09349389193503285 0.010096684862242895 0.09966180807695647
9285
reyna one of u.s. bright spots
results <oov> <oov> < unk > < unk > in # # # #
0.09348382475597233 0.010095597677483312 0.09965107675273155
9286
asean decides to remove tariffs for # , # # # farm products
asian economic system develops agricultural in > area
0.093473

emergency state declared in southern mozambique
water shortage in in unk > city 's central region
0.09345919842056086 0.010057631336346712 0.09963731033898927
9331
georgia on my mind goods sold from < unk > vending machines
professor unk < <
0.09350275791928361 0.010056553694502037 0.09966235009287275
9332
british defense secretary calls on saddam to make his choice
us warns on of 's 's of arms development
0.09350464439380597 0.010055476283564123 0.09966697777583441
9333
british defense secretary to visit france
henry henry congratulates hails state to visit # # # #
0.09351410486915941 0.010065111476249331 0.09968690785702762
9334
mexico 's fox faces high expectations
mexico s presidency presidency is little of nation s president s president of nation s legacy
0.0935107828784922 0.010064033379475954 0.09968813152918422
9335
credit insurance offer has strings attached
bank credit rules for to be a to be built to grow
0.09350969286568882 0.010062955513632591 0.09968935493922833
9336
anna

lawrence summers america 's man to save world finances talks
central america chief says economic
0.09355089422092183 0.010041336136302228 0.0996713805649073
9381
syria urges for eu role in solving mideast conflict
rok issues council 's regional policy
0.09354092396682176 0.01004026597365315 0.09966075801555584
9382
angolan rival armies start to disengage
vanuatu military official <oov> in china to promote development stability
0.09354161227415693 0.010039196039086477 0.09966079416666244
9383
nigeria willing to receive un mission on bakassi
dprk 's <oov> of in in s. china
0.0935316451337974 0.010038126332529302 0.09965017500905278
9384
myanmar business organization to help trading companies to operate globally
myanmar chief < <oov> banking on economic development
0.09353499782449272 0.010037056853908748 0.09965287582143197
9385
dole attacks clinton for dreamy foreign policy
clinton presidential concedes 's has than # . # percent from <
0.09353471808582058 0.010035987603151966 0.09965291

< unk > criminals extradition treaty to take effect
s. s. dprk <oov> military agency <oov> in dprk
0.09363376585342341 0.010061621139234528 0.09973788607476106
9430
volcano 's eruption eases but high alert continues
pla earthquake official warns on on <
0.09362383860937618 0.010060554385508994 0.09972731165935873
9431
armstrong pushed bush for cancer research during their bike ride
us <oov> # <oov> of obama 's # # # # m # at
0.09361391347011938 0.01005948785795832 0.09971673948596115
9432
us military says it s investigating atheist soldier s claims of harassment in iraq
pentagon <oov> department rules on trial for <oov> article
0.09360399043498369 0.010058421556510582 0.09970616955385536
9433
political strategists launch nonpartisan political web site
< of unk < economic ties in philippines
0.09359406950330007 0.010057355481093887 0.09969560186232873
9434
mine blast derails train in eastern turkey
weather 's largest east of central korea province
0.09358415067439976 0.01005628963163637

house committee widens probe of vytorin safety
chinese company chief calls of < of drug and china and china 's <
0.09368179033265965 0.010099792086074592 0.09981563090203958
9478
unemployment rate falls to # . # percent in january ; job losses
fed economy slows down on high in central asia
0.09368362886860675 0.010098726707162558 0.09981682240839075
9479
afghanistan 's president foresees constitution ratified this year with strong bill of rights
afghan national national president < unk > dies # # # # # # - # #
0.09367995203077772 0.010097661552990302 0.09981801366339578
9480
anna nicole smith s former companion stern says book s claim of gay sex is absurd
california founder <oov> a first
0.09367007226363674 0.010096596623486716 0.0998074865579683
9481
top gambian opposition member gets one year in jail
anti-corruption ruling party chief of president president in indonesia
0.09367191145364491 0.010095531918580728 0.0998101431553997
9482
lehmann martyn in fine form for australia
world wo

s. korea to pursue multilateral bilateral nuclear talks with dprk
dprk <oov> military issues on dprk korea in china
0.0937377578747981 0.010048906180739062 0.09990603479037068
9526
worries over myanmar refugee flood at crammed border camp
drought intensifies in in unk > towns towns
0.093727919739001 0.010047851509645366 0.0998955492703465
9527
supreme court affirms ban on aiding groups tied to terror
ruling supreme court calls of to on on military system
0.09376006079055531 0.010058457371708697 0.09992004688647232
9528
red shirts ready to negotiate with thai government
taiwan military forces forces forces of democracy in china 's ambon
0.09375022237913971 0.010057401919728454 0.09990956209666263
9529
hang seng china enterprises index higher march # #
hong enterprises exchange index ends # # #
0.09379284642463556 0.010071335372769835 0.0999515398994014
9530
world leaders race to consolidate mideast truce
islamic < < < < unk
0.09378300663797749 0.010070278791215831 0.09994105400558066
95

new general strike cripples greece
< traffic < > shipping in in cyprus
0.0938107032440347 0.010094620709488281 0.10006599393797261
9575
albanian opposition threatens to boycott presidential elections
kmt leader <oov> says of nation of the president says says says contributed contributed reporting reporting for # # # #
0.0938009078275949 0.010093566661173621 0.10005554536389534
9576
tiznow to go for second straight classic win
<oov> unk world < < unk unk > race in # st round
0.09379111445655422 0.010092512832956752 0.10004509897160427
9577
u.s. treasury secretary predicts # percent growth in # nd and # rd quarters
u.s. treasury secretary secretary to
0.09385091981746978 0.010154096243246662 0.1001181707850533
9578
# million identified themselves as multiracial in census
world unk > leads most than than
0.09384112327051597 0.0101530363167077 0.10010772003653713
9579
no . # stanford # # washington st # #
kerr # - # in
0.09387307806403745 0.01015197661142467 0.10012336477925328
9580
horse 

three killed ten wounded in blast
lebanese bomb killed at southern china port town
0.09387305949782704 0.01015426231509056 0.10012677953125411
9624
< unk > staying cool ahead of second world cup
world 's cup < unk > 's world cup < unk > dies in the < unk > of <
0.09389447305906765 0.010169610380177514 0.10016832048496996
9625
cox news service commentary budget
new international daily daily news service budget
0.09392923746703612 0.010185866436715011 0.10020985280859258
9626
< unk > < unk > # # archaeologist who worked sites in turkey
< discovers of unk of turkey of south china
0.09395410286959804 0.010184808494625615 0.10024395766689782
9627
sunni anger over saddam hanging spills into streets ; mob breaks locks off revered shiite shrine in samarra
ali dictator of 's
0.09394434545939247 0.01018375077227702 0.10023354703675275
9628
finnish pm seen winning close election race
latvian president 's election of in in south korea
0.09394612809341651 0.01018269326960077 0.10023611883872194
962

uaw focusing on talks with chrysler
general <oov> announces <oov> < unk > to financial financial sector
0.0940017474088159 0.010135331905556115 0.10026032720276314
9674
# die of encephalitis meningitis in eastern nepal
disease reports # dead #
0.09401270216828171 0.010134284434296757 0.10027580257200634
9675
armstrong takes control after time-trial victory
former <oov> de champion at # < unk > in # # #
0.09400298710140476 0.010133237179524173 0.10026544029004168
9676
malta 's ruling nationalists re-elected in tightest election ever
lesotho s election of <oov> presidential
0.09401049523113872 0.010132190141171256 0.10027230133843769
9677
philippine president wins release of more than # # # filipinos from saudi jails
philippines declares president in of of currency in iraq
0.09402374161268547 0.01013114331917093 0.10029146054748572
9678
< unk > leak in east china cuts water supplies to # , # # # students residents
dprk provincial agency agency reports
0.09401402841623788 0.01013009671345

putin s party wins in regional elections across russia
dprk korea ruling official official
0.09390177658907485 0.010153773501014554 0.10012832593481102
9724
british prosecutors testify that gang tried to buy radioactive material for bomb
senior senior official <oov> in # years
0.09389212187217283 0.010152729518544781 0.10011803102159544
9725
merkel sarkozy urge european effort for more transparency on financial markets
eu financial chief says financial markets to improve economic ties
0.09390303046455772 0.010163108708592336 0.10013058414087174
9726
kenyan death squads blamed for activists deaths
boxer boxer < dies unk >
0.09389337760369583 0.010162063981134627 0.10012029111207436
9727
us military official visits philippines
us military <oov> committee concludes on china in asia
0.09390656794644621 0.01017386765427872 0.10013284141848922
9728
customs officials search vehicles of three tour teams during # # th stage
tour of cycling tour of france de china de france < unk > < unk > in be

pre-historic animal fossils found in north china
rare ancient china of
0.09390116331653993 0.010139432059714393 0.10015125720569978
9772
u.s. may protect prairie dog
southern develops company to unk > of <
0.0938915560765853 0.01013839467153558 0.10014101050453283
9773
broadway stock is in a free fall
< company 's <oov> <oov> <oov> securities bonds in late #
0.09389125096505921 0.010137357495610105 0.10014213276546445
9774
nuggets backup eduardo najera a star in mexico
north korea national < in national history of history history
0.09389187583709634 0.010136320531872828 0.10014650214047383
9775
strong quake shakes eastern aegean sea
moderate tremor quakes in <oov> of no
0.09388227249498352 0.010135283780258644 0.10013625906978338
9776
extortion case filed against former bangladeshi pm sheikh hasina
fiji pm defends # <oov> . #
0.09388971618430358 0.01013424724070247 0.10014647217480796
9777
bush to warn of energy crisis
president president <oov> in asia 's economy
0.0938801150271112 0.0

iran refuses immediate tougher nuclear probes but agrees to more iaea talks
u.n. agency <oov> china to < unk > on u.s. military issues
0.09390610026877035 0.010219101085121573 0.1002610882299437
9822
new zealand overwhelms ireland # # - # # in rugby test
first zealand new leader loses second time of
0.09392198930579511 0.010218060867177239 0.10026360644164667
9823
northeast china 's harbin airport gets new name
< < < > < >
0.09391242981578943 0.010217020860982107 0.10025340149442614
9824
a stylish facelift with a few wrinkles
dprk s > unk unk unk > in # st china province
0.09390287227153787 0.010215981066471525 0.10024319862433716
9825
chinese president expresses sympathy with bolivian landslide victims
vice president praises chinese guests in # # years
0.09391593010708796 0.01021494148358087 0.10024571788773144
9826
khartoum southern sudanese rebels agree to continue peace talks
dprk <oov> cease-fire holds <oov> 's of military power in the of of < unk > < unk > < unk > < > contributed

philippines adopts < unk > housing scheme
indonesian currency currency sector improves to combat control of indonesian article
0.09379728289758402 0.010242328343657145 0.10013026783441725
9872
hong kong stocks open higher dec. #
hong kong index closes higher
0.09384854912374388 0.010266610060454425 0.10018089268069694
9873
britain freezes assets of zimbabwe 's ruling party
philip publishes first of <oov> in indonesia
0.09385351201931182 0.010265570403739443 0.10018521432628442
9874
tibetan activists to step up efforts for detained scholar
burma denies accusation in in # china
0.09384400882854436 0.010264530957566523 0.10017507001539679
9875
afghan leader driven from kabul makes stand in north
< military unk war
0.09383450756208404 0.010263491721871722 0.1001649277586371
9876
canadian firm resumes diamond mining in sierra leone
marine national <oov> military < unk > military in south
0.09383513172612919 0.010262452696591111 0.10016603589625124
9877
china devises new rules to invigorate 

leeds finance director quits
chief 's <oov> resigns at <oov> .
0.0937876890696154 0.010243020459148182 0.10011818971384252
9921
congress takes another swing at baseball anti-monopoly exemption
us senate takes rules rules rules in
0.09379263409166393 0.010241988208774389 0.10012825540065964
9922
jordanian premier meets with turkish ambassador
lebanon prime prime minister <oov> china 's democracy
0.0937831829999578 0.010240956166431708 0.10011816589487561
9923
pakistan taliban chief mehsud dead
pakistan says it 's is dead from the to > military forces says says no injuries reported in # years
0.09378380937950441 0.010239924332057256 0.10011927349640873
9924
special envoy of president clinton meets general abacha
senior official official <oov> <oov> in china
0.09377436108115869 0.010238892705588178 0.10010918692845625
9925
worldwide server workstation shipments rise in # rd quarter
supercomputer supercomputer electronics sales
0.09376491468636862 0.010237861286961646 0.10009910239265203
9

macao follows hk 's move to provide deposits guarantee to banks
hksar issues chief defends executive 's # . # # yuan yuan in china
0.09375358085001943 0.01020987639826465 0.10010706244214088
9970
world bank finances education project in sri lanka
china 's <oov> development bank of <oov> development
0.09375671426549777 0.010208852543832412 0.10011373709158176
9971
chinese vice-premier hails achievements in building qinghai-tibet railway
china <oov> underscores <oov> china of control in tibet
0.09375845440355508 0.010207828894725442 0.10011802300412218
9972
mccain hammers dishonest obama on national security
mccain acknowledges concedes in his than
0.093765764230331 0.010206805450881975 0.10012469521623993
9973
most monks in cyberspace are traditional
china unk < > women in
0.0937730725914775 0.010205782212240283 0.1001313660905708
9974
tropical storm karen makes landfall in nova scotia expected to
drought tropical hits <oov> of the north of china s < of the unk > < unk > contributed
0.0

In [35]:
dct = Article.vocab.stoi
sample = test[500].article
print(sample)
print(test[500].title)
sample = [dct['<bos>']] + [dct[ele] for ele in sample] + [dct['<eos>']]

test_sample = {}
test_sample["src"] = torch.tensor(sample, dtype=torch.long, device=device).reshape(-1, 1)
test_sample["src_len"] = [len(sample)]

print(summary(model, test_sample, Article.vocab.itos), end="\n\n")

['the', 'pakistani', 'government', 'saturday', 'handed', 'over', '#', '#', 'more', 'buses', 'to', 'the', 'afghan', 'government', 'as', 'part', 'of', 'its', 'contribution', 'to', 'the', 'reconstruction', 'in', 'afghanistan', ',', 'according', 'to', 'the', 'associated', 'press', 'of', 'pakistan', '.']
['pakistan', 'hands', 'over', '#', '#', 'more', 'buses', 'to', 'afghanistan']




<bos> military military <oov> operation from asia in india <eos>



In [None]:
dct = Article.vocab.stoi
r1 = 0
r2 = 0
rl = 0
rouge = Rouge()
for i in range(10000):
    sample = test[i].article
    label = ' '.join(test[i].title)
    sample = [dct['<bos>']] + [dct[ele] for ele in sample] + [dct['<eos>']]
    test_sample = {}
    test_sample["src"] = torch.tensor(sample, dtype=torch.long, device=device).reshape(-1, 1)
    test_sample["src_len"] = [len(sample)]
    predict = ' '.join(summary(model, test_sample, Article.vocab.itos))
    score = rouge.get_scores(label, predict)[0]
    r1 += score['rouge-1']['r']
    r2 += score['rouge-2']['r']
    rl += score['rouge-l']['r']
#     print('--------------------------')
    print(r1/(i+1),r2/(i+1),rl/(i+1))
    print(i)
    
print(r1/10000)
print(r2/10000)
print(rl/10000)

In [39]:
total = pd.read_csv('total.csv')

In [48]:
print('Article:',total.iloc[10,:]['article'])
print('Title:',total.iloc[10,:]['title'])

Article: new zealand share prices closed #.## percent higher monday in subdued trading ahead of a us holiday , dealers said .
Title: new zealand stocks close #.## percent higher


Article: australia 's current account deficit shrunk by a record #.## billion dollars -lrb- #.## billion us -rrb- in the june quarter due to soaring commodity prices , figures released monday showed .