# Loading/Processing Corpus

## Issues:

* For finding nuggets, get sent_id from update_id (maybe from matches file) and +1 that number
* Try alternative using char matching from start, end columns. See below in test func how. Can use Spacy or splitlines sentences and char match with their lens
* Maybe add matches to filepath sources?
* Functionality for multiple nuggets/matches source files

In [16]:
from bs4 import BeautifulSoup as bs
import gzip
import pandas as pd
import numpy as np
from tqdm import tqdm
from tqdm import tqdm_notebook
# from tqdm.notebook import tqdm
import os
from collections import OrderedDict
import pickle
import warnings
import copy
import spacy
from sentence_transformers import SentenceTransformer

### Meta File Variables

In [5]:
corpus_path = "/nfs/trects-kba2014-filtered" # directory of corpus of gzipped html files
topics_path = corpus_path + "/test-topics.xml"
doc_tags = ['topic_id','streamid', 'docid', 'yyyymmddhh', 'kbastream', 'zulu', 'epoch', 'title', 'text', 'url'] # doc fields
topic_tags = ['id', 'title', 'description', 'start','end','query','type'] # topic fields
test_file_addr = corpus_path + "/1/2012-02-22-15.gz"
proj_dir = '/nfs/proj-repo/AAARG-dissertation'
csv_dir = proj_dir + '/' + 'load_data'
# csv file addresses
corp_csv_name = 'corpus_loaded.csv.gz'
corp_csv_path = csv_dir + '/' + corp_csv_name
topics_csv_name = 'topics_loaded.csv.gz'
topics_csv_path = csv_dir + '/' + topics_csv_name
# nugget/update dataframes
nugget_dir = "/nfs/TemporalSummarization/ts13/results"
updates_sampled_path = nugget_dir + "/updates_sampled.tsv"
nuggets_path = nugget_dir + "/nuggets.tsv"
nug_matches_path = nugget_dir + "/matches.tsv"
# saving nugget and update files
nugget_csv = 'nugget_df.csv.gz'
update_csv = 'update_df.csv.gz'
nugget_csv_path = csv_dir + '/' + nugget_csv
update_csv_path = csv_dir + '/' + update_csv
# supervised input/labels
# supervised_csv = 'supervised_df.csv.gz'
# supervised_csv_path = csv_dir + '/' + supervised_csv

In [249]:
def convert_to_list(item):
    if type(item) is not list:
        item = [item]
    return item

def file_exists(path):
    """Check if path or list of paths has item that does not exist"""
    exists = []
    path = convert_to_list(path)
    for p in path:
        exists.append(os.path.exists(p))
    return all(exists)

In [272]:
class FilePathHandler:
    """Paths will be in the format:
    
    proj_dir/dataset_dir/corpus_name/file_purpose/instance_identifier+split_identifier+sfile_type
    """
    def __init__(self, proj_dir, dataset_dir="dataset", compression='gzip'):
        self.proj_dir = proj_dir
        self.dataset_dir = proj_dir + '/' + dataset_dir
        self.create_dir_if_not_exists(self.dataset_dir)
        self.path_df_path = self.dataset_dir + '/' + 'file_path_df.csv.gz'
        self.corpus_sources_pickle_path = self.dataset_dir + '/' + 'corpus_sources.pickle'
        self.compression = compression
        self.file_purposes = ["topics", "corpus", "nuggets", "embed_labels", "updates"]
#         self.path_df_cols = ["corpus_name", "file_purpose", "split_identifier", "num_splits",
#                             "instance_identifier", "file_type", "path", "exists"]
        self.path_df_cols = {"corpus_name":str, "file_purpose":str, "split_identifier":str, "num_splits":int,
                            "instance_identifier":str, "file_type":str, "path":str, "exists":bool}
        self.corpus_sources_keys = ['corpus_name', 'dir_path', 'nuggets_path', 'matches_path', 'topics_path']
        
        # load meta files
        self.load_corpus_sources()
        self.load_path_df()


    def paths_in_corpus_name(self, corpus_name, selection=None):
        def get_identity_paths(ident_df, exists=True):
            ident_paths = list(ident_df[ident_df['exists']==exists]['relative_path'])
            if len(ident_paths) == 0:
                ident_paths = None
            return ident_paths
        
        if selection is None:  # get all file purposes if none selected
            selection = self.file_purposes
        
        paths = {}
        name_df = self.path_df[self.path_df['corpus_name'] == corpus_name]
        for file_purpose in selection:  # loop for each file purpose/stage of generation
            purp_df = name_df[name_df['file_purpose'] == file_purpose]
            idents = purp_df['instance_identifier'].unique()  # segregate any unique identifiers/instances
            if len(idents) == 0:
                # case where corpus_name or file_purpose not in paths_df
                # will create same dict with None entries
                idents.append(None)  
            for ident in idents:
                ident_df = purp_df[purp_df['instance_identifier'] == ident]
                paths[file_purpose][ident]['exists'] = get_identity_paths(ident_df, exists=True)
                paths[file_purpose][ident]['not_exists'] = get_identity_paths(ident_df, exists=False)
        return paths
        
    
    def get_path(self, corpus_name, file_purpose, inst_identifier, file_type, add_path=True, exists=False,
                split_identifier=None, num_splits=None, warn=False):
        # do check here make sure filename compatible, or elsewhere
        path = self.dataset_dir + '/' + corpus_name + '/' + file_purpose + '/' + str(inst_identifier)
        if split_identifier is not None:
            path += '_' + str(split_identifier)
        path += file_type
        
        if add_path:
            self.add_path_to_df(corpus_name, file_purpose, split_identifier, num_splits, inst_identifier,
                            file_type, path, exists, save=True, warn=warn)
        return path
            
    def add_path_to_df(self, corpus_name, file_purpose, split_identifier, num_splits, inst_identifier,
                       file_type, path, exists, save=True, warn=False):
        if not (self.path_df['path'] == path).any():  # check if row exists
            # create appropriate dir if needed
            new_dir_path = self.dataset_dir + '/' + corpus_name + '/' + file_purpose
            self.create_dir_if_not_exists(new_dir_path)
            # add to path_df
            if num_splits is None:
                num_splits = 1
            
            row = pd.DataFrame({'corpus_name': pd.Series([corpus_name], dtype=str),
                                  'file_purpose': pd.Series([file_purpose], dtype=str),
                                  'split_identifier': pd.Series([split_identifier], dtype=str),
                                  'num_splits': pd.Series([num_splits], dtype=int),
                                  'instance_identifier': pd.Series([inst_identifier], dtype=str),
                                  'file_type': pd.Series([file_type], dtype=str),
                                  'path': pd.Series([path], dtype=str),
                                  'exists': pd.Series([exists], dtype=bool)})
            
            self.path_df = self.path_df.append(row, ignore_index=True)
            if save:  # save new path_df
                self.save_path_df()
        else:
            if warn:
                warnings.warn("Path already exists in dataframe: " + str(path))
            
    def update_path_exists(self, path, save=True):
        self.path_df.loc[self.path_df['path'] == path, 'exists'] = True
        if save:
            self.save_path_df()
        

    def create_dir_if_not_exists(self, dir_path, warn=True):
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
            warnings.warn("Created new directory at " + str(dir_path))
            return True
        return False
    
    def search_path_df(self, search_dict, df_slice=None):
        if df_slice is None:
            df_slice = self.path_df
        for col_name, value in search_dict.items():
            df_slice = df_slice[col_name == value]
        return df_slice
    
    def source_dict_correct(self, source_dict):
        # check has all appropriate keys
        for key in self.corpus_sources_keys:
            if key not in source_dict:
                raise Exception(str(key) + " is missing from corpus_source dict")
        false_paths = []
        for path_type, path in source_dict.items():
            if path_type == "corpus_name":  # dict entry not a path, don't check
                continue
            if not file_exists(path):
                false_paths.append(str(path_type) + " does not exist at " + str(path))
        if len(false_paths) > 0:
            error_str = "\n".join(false_paths)
            raise FileNotFoundError(error_str)
        else:
            return True
        
    
    def create_corpus_source_dict(self, corpus_name, dir_path, topics_file_path, nuggets_file_path,
                                 matches_file_path):
        s_dict = {"corpus_name":corpus_name, "dir_path":dir_path, 
                  "topics_path":topics_file_path, "nuggets_path":nuggets_file_path,
                 "matches_path":matches_file_path}
        self.source_dict_correct(s_dict)
        return p_dict
    
    def add_corpus_source(self, corpus_source_dict, overwrite=False):
        """Add a corpus directory to load from and its meta files"""
        # check paths exist
        self.source_dict_correct(corpus_source_dict)
        corpus_name = copy.deepcopy(corpus_source_dict["corpus_name"])
        # store new entry
        if corpus_name in self.corpus_sources and overwrite==False:
            warnings.warn(str(corpus_name) + " is already present in corpus source dictionary. \n Proceeding with dict entry")
        else:
            del corpus_source_dict["corpus_name"]  # remove corpus_name from dict to add as a key
            self.corpus_sources[corpus_name] = corpus_source_dict
            # create folder for outputting new source files
            new_corpus_dir = self.dataset_dir + '/' + corpus_name
            self.create_dir_if_not_exists(new_corpus_dir)
            self.save_corpus_sources()
            
    def get_corpus_sources(self, corpus_names=None):
        """Retrieve file paths from corpus_load dicts
        Parameters:
            corpus_names: list of corpus names retrieve, if None then retrieve all
        
        Returns:
            A dictionary where keys are the corpus names and values are target file paths
        """
        if corpus_names is None:
            corpus_names = self.corpus_sources.keys()
        corpus_paths = {}
        for name in corpus_names:
            corpus_paths[name] = self.corpus_sources[name]
        return corpus_paths
    
    def save_corpus_sources(self):
        with open(self.corpus_sources_pickle_path, 'wb') as handle:
            pickle.dump(self.corpus_sources, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
    def load_corpus_sources(self):
        if os.path.exists(self.corpus_sources_pickle_path):
            with open(self.corpus_sources_pickle_path, 'rb') as handle:
                self.corpus_sources = pickle.load(handle)
            return True
        else:
            self.corpus_sources = {} # create empty dictionary
            return False
        
    def save_path_df(self):
#         self.force_path_df_col_type()
        self.path_df.to_csv(self.path_df_path, compression=self.compression)
        
    def load_path_df(self):
        """File containing info about file paths to systematically load files"""
        if os.path.exists(self.path_df_path):
            self.path_df = pd.read_csv(self.path_df_path, compression=self.compression)

            for col in self.path_df.columns:
                if "Unnamed" in col:
                    del self.path_df[col]
            return True
        else:
            path_df_dict = {}
            for col_name, data_type in self.path_df_cols.items():  # ensure columns don't infer wrong typing
                path_df_dict[col_name] = pd.Series([], dtype=data_type)
            self.path_df = pd.DataFrame(path_df_dict)  # create empty dataframe
#             self.force_path_df_col_type()
            return False
        
#     def force_path_df_col_type(self):
#         """stop path_df autoconverting ints to floats in these columns"""
#         # make sure these columns are type object
#         obj_cols = ["split_identifier", "num_split", "instance_identifier"]
#         for col in obj_cols:
#             if col in self.path_df.columns:
#                 self.path_df[col] = pd.Series([], dtype=object)  # handles Nones better

In [4]:
# test_path_f = FilePathHandler(proj_dir)
# del test_path_f.corpus_sources['tr14_init_filtered']
# print(test_path_f.corpus_sources)

In [5]:
# # def supervised_path_generator(identifier, base="supervised_df", handle=".csv.gz"):
# #     fn = base + "_" + identifier + handle
# #     path = csv_dir + '/' + fn
# #     return path

# def create_dir_if_not_exists(dir_path, warn=True):
#     if not os.path.exists(dir_path):
#         os.makedirs(dir_path)
#         warnings.warn("Created new directory at " + str(base_dir))
#         return True
#     return False

# def save_path_generator(corpus_dir, file_purpose_dir, file_name, file_type, identifier=None, part=None):
#     proj_dir = '/nfs/proj-repo/AAARG-dissertation'
#     base_dir = proj_dir + '/' + 'dataset'  # base folder for storing corpus files
#     create_dir_if_not_exists(base_dir)
#     # select appropriate corpus directory (e.g. trects-filtered-2014)
#     path = base_dir + '/' + corpus_dir
#     create_dir_if_not_exists(path)
#     # ensure is from pre-selected file_purposes
#     file_purposes = ["corpus", "nuggets", "topics", "embed_labels", "updates"]
#     if file_purpose_dir not in file_purposes:
#         raise ValueError("File purpose must be in defined file purposes")
#     path += '/' + file_purpose_dir
#     create_dir_if_not_exists(path)
    
#     # create file name
#     path += '/' + file_name
#     if identifier is not None:
#         path += '_' + identifier
#     if part is not None:
#         path += '_' + part
#     path += file_type
    
#     return path
        

## Markup Loading Functions

In [7]:
# open and get beautifulsoup object from markup file
def open_markup_file(addr, gz=True, xml=False, verbose=False):
    markup = None
    f = None
    
    if verbose:
        print(addr)

    if gz:
        f = gzip.open(addr)
        if verbose:
            print("gz file opened")
    else:
        f = open(addr)
        
    if xml == False:
        markup = bs(f, "lxml")  # using lxml parser for speed
    else:
        markup = bs(f, "xml")
        
    f.close()
    return markup


# parse markup and return 2D list [entry:tags]
def parse_markup(markup, entry_list, tag_list, find_tag, topic_id=None):
    for e in markup.find_all(find_tag):
        entry = OrderedDict.fromkeys(tag_list)
        if topic_id is not None:
            entry['topic_id'] = topic_id
        for c in e.children:  # children use direct children, descendants uses all
            if str(c.name).lower() in entry:
                entry[c.name] = str(c.string)
            elif c.name is None and c.string != '\n':  # inner body of <doc> tag
                entry['text'] = str(c.string)
        entry_list.append(list(entry.values()))
        
            
# recursively find gz html files from a directory address
def search_dir(path):    
    # separate the subdirectories and html files 
    # (help maintain sequential order of insertion)
    gz_paths = []
    for f in os.scandir(path):
        if os.path.splitext(f.path)[-1].lower() == ".gz":
            gz_paths.append(f.path)
    
    return gz_paths


def list_to_dataframe(markup_list, tags):
    return pd.DataFrame(markup_list, columns=tags)

## Dataframe Loading

### Control Functions

In [237]:
def topic_id_as_int(topic_id):
    try:
        topic_id = int(topic_id)
        return topic_id
    except ValueError:  # non-standard topic_id, e.g. 'TS14.18'
        split = topic_id.split(".", 2)  
        try:
            match = split[0].upper()
            if match == "TS14" or match == "14":
                topic_id = int(split[1])  # extract int '18'
                return topic_id
            else:
                raise ValueError()
        except ValueError:
            return None  # no discernable topic_id

def convert_df_topic_id(df, col_name="query_id", remove_null=True, in_place=False):
    conv_df = df
#     print("df[" + col_name + "] unique entries: " + conv_df[col_name].unique())
    conv_df[col_name] = conv_df[col_name].apply(lambda x: topic_id_as_int(x))
    if remove_null:
        conv_df = conv_df[conv_df[col_name].notna()]
        conv_df = conv_df.astype({col_name:int})
    if in_place:
        df = conv_df
        return df
    else:
        return conv_df

def find_duplicates(df):
    seen = set()
    seen_twice = set()
    for docid in df['docid']:
        if docid not in seen:
            seen.add(docid)
        else:
            seen_twice.add(docid)
    return seen_twice

def load_df_control(save_path, load_func, save=True, force_reload=False, compression='gzip', 
                    name=None, verbose=True, path_handler=None):
    df = None
    save_path = convert_to_list(save_path)  # allows loading groups of saved files same way as singular paths
    if name is not None and verbose:
        print("Loading " + name)
    if not file_exists(save_path) or force_reload:
        if len(save_path) > 1:
            raise ValueError("There should only be one path to save to if no save paths already exist")
        df = load_func()
        if verbose:
            print("df loaded")
        if save:
            # in case of loading df from original file, should only be one save_path
            df.to_csv(save_path[0], compression=compression)
            if path_handler is not None:
                path_handler.update_path_exists(save_path[0])
            if verbose:
                print("saved at: " + str(save_path[0]))
    else:
        df_list = []
        for path in save_path:
            df_list.append(pd.read_csv(path, compression=compression))
        if len(df_list) > 1:
            df = pd.concat(df_list)
        else:
            df = df_list[0]
        if verbose:
            print("loaded from file")
    for col in df.columns:  # bug loading from certain files produces nonsense columns
        if "Unnamed" in col:
            del df[col]
    if verbose:
        print(display(df[0:4]))
    return df

### Dataframes from Corpus Files

#### Topics

In [278]:
# load topics into dataframe
def __load_topics(path, verbose=True):
    topics_list = []
    path = convert_to_list(path)
    for p in path:
        parse_markup(open_markup_file(p, gz=False, xml=True), 
                        topics_list, topic_tags, "event")
    df = list_to_dataframe(topics_list, topic_tags)
    
    df = convert_df_topic_id(df, col_name='id', remove_null=True)
    # drop any duplicates found over the files
    prev_size = len(df)
    df = df.drop_duplicates(subset=['id'], keep='first')  # no duplicate documents
    if verbose:
        num_removed = len(df) - prev_size
        print(str(num_removed) + " duplicate documents removed from topics df")
    return df

def load_topics(saved_path, load_path=None, save=True, force_reload=False, verbose=True, path_handler=None):
    topics = load_df_control(saved_path, lambda: __load_topics(load_path, verbose=verbose), 
                             save=save, force_reload=force_reload, name="topics", verbose=verbose, path_handler=path_handler)
    return topics

# topics = load_topics()

#### Main Corpus Files

In [212]:
# load all formatted gzipped html files into dataframe

def __load_corpus(corpus_dir, doc_tags=None, topic_ids=None, split_every=None, split_start_doc=None,
                 drop_duplicates=True, verbose=True):
    if doc_tags is None:
        doc_tags = ['topic_id','streamid', 'docid', 'yyyymmddhh', 'kbastream', 'zulu', 'epoch', 'title', 'text', 'url'] # doc fields
    df = pd.DataFrame(columns=doc_tags)
    
    for topic_id in topic_ids:
        print("Loading topic " + str(topic_id) + "...")
        topic_list = []
        topic_path = corpus_dir + '/' + str(topic_id)
        gz_paths = search_dir(topic_path)
        
        if split_every is not None and split_start_doc is not None:
            end_split = split_start_doc + split_every
            if end_split >= len(gz_paths):  # last section
                end_split = len(gz_paths) - 1
            gz_paths = gz_paths[split_start_doc:end_split]
        
        for gz_path in tqdm(gz_paths, position=0, leave=True):
            parse_markup(open_markup_file(gz_path, verbose=False),
                             topic_list, doc_tags, "doc", topic_id=topic_id)
        topic_df = list_to_dataframe(topic_list, doc_tags)
        df = df.append(topic_df)
    df['epoch'] = pd.to_numeric(df['epoch'])
    if drop_duplicates:
        prev_size = len(df)
        df = df.drop_duplicates(subset=['streamid'], keep='first')  # no duplicate documents
        if verbose:
            num_removed = len(df) - prev_size
            print(str(num_removed) + " duplicate documents removed from corpus")
    return df

def load_corpus(save_path, corpus_dir=None, doc_tags=None, topic_ids=None, split_every=None, split_start_doc=None,
                save=True, force_reload=False, verbose=True, path_handler=None, drop_duplicates=True,
               compression='gzip'):
    
    corpus = load_df_control(save_path, 
                             lambda: __load_corpus(corpus_dir, doc_tags=doc_tags, 
                                                   topic_ids=topic_ids, split_every=split_every,
                                                   split_start_doc=split_start_doc, 
                                                   drop_duplicates=drop_duplicates, verbose=verbose), 
                             save=save, force_reload=force_reload, name="corpus", verbose=verbose, path_handler=path_handler)
    # remove duplicate documents from corpus and save if required
    if drop_duplicates:
        if corpus['streamid'].duplicated().any():  # if there are any duplicates
            prev_size = len(corpus)
            corpus = corpus.drop_duplicates(subset=['streamid'], keep='first')  # get rid of them
#             if save:
#                 save_path = convert_to_list(save_path)
#                 for path in save_path:
#                     df.to_csv(path, compression=compression)
            if verbose:
                num_removed = len(corpus) - prev_size
                print(str(num_removed) + " duplicate documents removed from corpus")
    
    if verbose:
        print("Corpus loaded succesfully: " + str(len(corpus)) + " documents loaded.")
    return corpus

# corpus = load_corpus(doc_tags=doc_tags)

#### Nuggets (Evaluation Technique)

In [104]:
# import re

# def create_nugget_df(corpus_df, topic_ids=None, nuggets_tsv=None, nuggets_tsv_path=None, spacy_model=None):
#     """Dataframe containing nugget data and its appearances in corpus"""
#     def create_entry(row, reg_cols, multi_col_vals=None):
#         entry_dict = {}
#         for col in reg_cols:
#             entry_dict[col] = row[col]
#         if multi_cols is not None:
#             for k,v in multi_col_vals.items():
#                 entry_dict[k] = v
#         return entry_dict
    
#     def find_nugget_in_text(text, nugget_text):
#         """Retrieve the sentence that the nugget appears in the text
#         Only when spacy model is not None
#         """
#         text = nlp(text)
#         sentences = list(text.sents)
#         sentences = [s.text for s in sentences if len(s) != 0]
# #         target_sent = None
#         target_sents = []
#         for sent in sentences:
#             if nugget_text in sent:
# #                 target_sent = sent
# #                 break
#                 target_sents.append(sent)
#         print("nugget_text: " + str(nugget_text))
#         print("target_sents: " + str(target_sents))
# #         if target_sent is None:
#         if len(target_sents) == 0:
#             raise Exception("Nugget not found in sentences but identified by df.str.contains")
#         #return target_sent
#         return target_sents
    
#     # the data we are going to store in our processed nugget_df
#     entry_list = []
#     reg_cols = ['query_id', 'nugget_id', 'importance', 'nugget_len', 'nugget_text']
#     multi_cols = ['docid', 'streamid', 'epoch', 'yyyymmddhh']  # columns with multiple entries
#     num_cols = ['query_id', 'importance', 'nugget_len', 'epoch']  # comvert these columns to numerical vals
    
#     if nuggets_tsv is None:  # load nuggets_tsv in function instead
#         nuggets_tsv = pd.read_csv(nuggets_tsv_path, "\t")
       
#     # convert topic_ids to int standard
#     nug_tsv = convert_df_topic_id(nuggets_tsv, col_name='query_id', remove_null=True)
    
#     # target only selected topic_ids if not None
#     if topic_ids is not None:
#         nug_tsv[nug_tsv['query_id'].isin(topic_ids)]
    
#     pbar = tqdm(total=len(nug_tsv), position=0, leave=True)
#     for index, row in nug_tsv.iterrows():
#         # find where nugget appears in text
#         nug_text = row['nugget_text']
#         topic_id = 0
#         try:
#             topic_id = int(row['query_id'])  # make sure pattern match in correct topic
#         except ValueError:
#             pbar.update()
#             continue  # topic_id is unknown string in tsv file, e.g. "TS13.07"
#         appears = corpus_df[corpus_df['topic_id'] == topic_id]
#         appears = appears[appears['text'].str.contains(re.escape(nug_text))]  # make sure no accidental regex pattern
        
#         # gather information on docs it appears in
#         dups = find_duplicates(appears)  # get docids where nugget appears
#         for docid in dups:
#             upd = appears[appears['docid'] == docid]  # get docs with this docid
#             for i, r in upd.iterrows():  # gather info on each doc with this docid (e.g. streamid, epoch etc.)
#                 multi_col_vals = {}
#                 for multi_col in multi_cols:
#                     multi_col_vals[multi_col] = r[multi_col]
                    
#                 entry = create_entry(row, reg_cols, multi_col_vals=multi_col_vals)
                
#                 if spacy_model is not None:  # find sentence in document using spacy tokenized sentences
#                     found_sent = find_nugget_in_text(r['text'], nug_text)
#                     entry['sentence_in_doc'] = found_sent
                
#                 entry_list.append(entry)
#         pbar.update()
#     pbar.close()
    
#     print("len entry_list: " + str(len(entry_list)))
    
#     # form multi-index nugget dataframe
# #     reg_cols.extend(multi_cols)  # get new multiindex order
#     nugget_df = pd.DataFrame(entry_list)
#     nugget_df[num_cols] = nugget_df[num_cols].apply(pd.to_numeric, errors='coerce', axis=1)  # convert appropriate cols to numerical values
#     nugget_df.rename(columns={'query_id':'topic_id'}, inplace=True)  # topic_id matches other dataframes
#     return nugget_df

# def load_nugget_df(save_path, corpus_df, topic_ids=None, spacy_model=None, nuggets_tsv=None, nuggets_tsv_path=None, 
#                    path_handler=None, save=True, force_reload=False, verbose=True):
#     nugget_df = load_df_control(save_path, lambda: create_nugget_df(corpus_df, nuggets_tsv=nuggets_tsv,
#                                                                    nuggets_tsv_path=nuggets_tsv_path,
#                                                                    spacy_model=spacy_model, topic_ids=topic_ids), 
#                                 save=save, force_reload=force_reload, name="nugget_df", verbose=verbose,
#                                path_handler=path_handler)
#     return nugget_df


In [328]:
def create_nugget_df(corpus_df, nuggets_tsv=None, matches_tsv=None, nuggets_tsv_path=None,
                     matches_tsv_path=None, topic_ids=None, spacy_if_not_found=True, verbose=True):
    def check_load_tsv(tsv, path):
        if tsv is None:
            if path is None:
                raise Exception("Must either pass the tsv file or the path to load it")
            else:  # load tsv
                path = convert_to_list(path)
                tsv = []
                for p in path:
                    tsv.append(pd.read_csv(p, "\t"))
                tsv = pd.concat(tsv, ignore_index=True)
        return tsv
    
    # perform check if tsvs or their paths have been passed
    nuggets_tsv = check_load_tsv(nuggets_tsv, nuggets_tsv_path)
    matches_tsv = check_load_tsv(matches_tsv, matches_tsv_path)
    
    def parse_update_id(update_id):
        """Separate update_id into component streamid and sent_id"""
        update_id = update_id.split("-")
        sent_id = int(update_id[-1])
        streamid = "-".join(update_id[:-1])
        return streamid, sent_id
    
    def find_nugget_spacy(text, match_start, nlp):
        if nlp is None:
            nlp = spacy.load("en_core_web_sm")
        spacy_text = nlp(text)
        spacy_sents = list(nlp(text).sents)
        nug = None
        char_count = 0
        for s in spacy_sents:  # find sentence by where characters start
            s = str(s)  # convert from spacy tokens to string
            s_len = len(s)
            if char_count + s_len > match_start:
                nug = s
                break
            char_count += s_len
        return nug
    
    def find_nugget_in_text(text, sent_id, match_start, nlp):
        """Retrieve sentence at index sent_id"""
        split = text.splitlines()
        if split[0] == "":
            sent_id += 1  # first entry is empty, adjust offset
        nug = None
        try:
            nug = split[sent_id]
        except IndexError:  # increment has pushed offset out of bounds
            try:
                sent_id -= 1
                nug = split[sent_id]
            except IndexError as e:  # sent_id does not match text indexing
                if spacy_if_not_found:
                    nug = find_nugget_spacy(text, match_start, nlp)
                    sent_id = None  # set to a null value to differentiate
        return nug, sent_id
    
    nlp = None  # spacy model, load if needed
    entry_list = []  # list of dicts to build dataframe
    
    # what columns from each dataframe to extract to put into nugget_df
    nug_tsv_cols = ['importance', 'nugget_len', 'nugget_text']
    mat_tsv_cols = ['query_id', 'match_start', 'match_end']
    corp_cols = ['docid', 'streamid', 'epoch']
    # reference what columns to convert from string into numerical values
    num_cols = ['query_id', 'importance', 'nugget_len', 'epoch', 'sent_id', 'match_start', 'match_end']
    
    # convert topic_ids to int standard
    # set to new var to allow passing same unchanged nuggets/matches_tsv each time
    nug_tsv = convert_df_topic_id(nuggets_tsv, col_name='query_id', remove_null=True)
    mat_tsv = convert_df_topic_id(matches_tsv, col_name='query_id', remove_null=True)
    
    # target only selected topic_ids if not None
    if topic_ids is not None:
        for topic_id in topic_ids:
            nug_tsv = nug_tsv[nug_tsv['query_id'] == topic_id]
            mat_tsv = mat_tsv[mat_tsv['query_id'] == topic_id]
    
    missed_streamids = []  # store streamids not found for debug purposes
    missed_nuggetids = []  # debug purposes
    missed_sentid_streamids = []  # streamid where sent_id indexing out of bounds
    pbar = tqdm(total=len(mat_tsv), position=0, leave=True)
    for index, row  in mat_tsv.iterrows():
        entry = {}
        
        # get streamid and sentid of nugget occurence
        streamid, sent_id = parse_update_id(row['update_id'])
        
        # find occurence in corpus
        occur = corpus_df[corpus_df['streamid'] == streamid]
        if len(occur) == 0:
            missed_streamids.append(streamid)
            continue
        elif len(occur) > 1:
            if verbose:
                print("Number of entries with streamid: " + str(len(occur)))
                print(display(occur))
            raise Exception("There should be one entry in corpus with given streamid " + str(streamid))
        occur = occur.iloc[0].to_dict()
        
        # get text of the occurence
        occur_text = occur['text']
        match_start = int(row['match_start'])
        
        # get text of the nugget
        nug_row = nug_tsv[nug_tsv['nugget_id'] == row['nugget_id']]
        if len(nug_row) != 1:
            missed_nuggetids.append(row['nugget_id'])
            continue
        nug_row = nug_row.iloc[0].to_dict()
        
        # add columns from each dataframe
        for col in mat_tsv_cols:
            entry[col] = mat_tsv.at[index, col]
        for col in nug_tsv_cols:
            entry[col] = nug_row[col]
        # adding these columns here to control order of columns in final df
        found_sent, sent_id = find_nugget_in_text(occur_text, sent_id, match_start, nlp)
        entry['sent_in_text'] = found_sent
        entry['sent_id'] = sent_id
        for col in corp_cols:
            entry[col] = occur[col]
        
        if sent_id is None or found_sent is None:  # sent_id indexing was wrong
            missed_sentid_streamids.append(streamid)
        
        entry_list.append(entry)
        
    if verbose:
        print("Nugget entries were generated for " + str(len(entry_list)) + " nuggets. There were "
             + str(len(missed_streamids)) + " found in matches.tsv but not in corpus")
        print("There were " + str(len(missed_nuggetids)) + " nugget_ids found in matches.tsv but not in nuggets.tsv")
        print(str(len(missed_sentid_streamids)) + " out of " + str(len(entry_list)) + 
              " streamids had out of bounds sent_ids")
        
    nugget_df = pd.DataFrame(entry_list)
    if len(nugget_df) > 0:
        nugget_df[num_cols] = nugget_df[num_cols].apply(pd.to_numeric, errors='coerce', axis=1)  # convert appropriate cols to numerical values
        nugget_df.rename(columns={'query_id':'topic_id'}, inplace=True)  # topic_id matches other dataframes
    
    if verbose:
        print("nugget_df entries: " + str(len(nugget_df)))
    
    return nugget_df

In [313]:
def load_nugget_df(save_path, corpus_df, topic_ids=None,path_handler=None, save=True, force_reload=False, 
                   verbose=True, nuggets_tsv=None, matches_tsv=None, nuggets_tsv_path=None, matches_tsv_path=None,
                  spacy_if_not_found=True):
    
    nugget_df = load_df_control(save_path, 
                    lambda: create_nugget_df(corpus_df, nuggets_tsv=nuggets_tsv, verbose=verbose,
                                             matches_tsv=matches_tsv, nuggets_tsv_path=nuggets_tsv_path, 
                                             matches_tsv_path=matches_tsv_path, topic_ids=topic_ids,
                                            spacy_if_not_found=spacy_if_not_found), 
                                save=save, force_reload=force_reload, name="nugget_df", verbose=verbose,
                               path_handler=path_handler)
    return nugget_df

In [103]:
nugget_df = load_nugget_df("/nfs/proj-repo/AAARG-dissertation/l_d/nugget_df.csv.gz")
print(display(nugget_df[0:5]))

Loading nugget_df
loaded from file


Unnamed: 0.1,Unnamed: 0,topic_id,nugget_id,importance,nugget_len,nugget_text,docid,streamid,epoch,yyyymmddhh
0,0,1,VMTS13.01.052,3,2,Hundreds injured,dd95d5dbbff443c3ddae4e34a5d2e9c1,1330041420-dd95d5dbbff443c3ddae4e34a5d2e9c1,1330041420,2012-02-23-23
1,1,1,VMTS13.01.052,3,2,Hundreds injured,dd95d5dbbff443c3ddae4e34a5d2e9c1,1330041420-dd95d5dbbff443c3ddae4e34a5d2e9c1,1330041420,2012-02-23-23
2,2,1,VMTS13.01.054,1,3,"February 22, 2012",f66f6668504592a391345e012800469c,1329944400-f66f6668504592a391345e012800469c,1329944400,2012-02-22-21
3,3,1,VMTS13.01.054,1,3,"February 22, 2012",f66f6668504592a391345e012800469c,1329944400-f66f6668504592a391345e012800469c,1329944400,2012-02-22-21


None


Unnamed: 0.1,Unnamed: 0,topic_id,nugget_id,importance,nugget_len,nugget_text,docid,streamid,epoch,yyyymmddhh
0,0,1,VMTS13.01.052,3,2,Hundreds injured,dd95d5dbbff443c3ddae4e34a5d2e9c1,1330041420-dd95d5dbbff443c3ddae4e34a5d2e9c1,1330041420,2012-02-23-23
1,1,1,VMTS13.01.052,3,2,Hundreds injured,dd95d5dbbff443c3ddae4e34a5d2e9c1,1330041420-dd95d5dbbff443c3ddae4e34a5d2e9c1,1330041420,2012-02-23-23
2,2,1,VMTS13.01.054,1,3,"February 22, 2012",f66f6668504592a391345e012800469c,1329944400-f66f6668504592a391345e012800469c,1329944400,2012-02-22-21
3,3,1,VMTS13.01.054,1,3,"February 22, 2012",f66f6668504592a391345e012800469c,1329944400-f66f6668504592a391345e012800469c,1329944400,2012-02-22-21
4,4,1,VMTS13.01.054,1,3,"February 22, 2012",ddd856e0a350c52b7c078c9bcdd609d9,1329930660-ddd856e0a350c52b7c078c9bcdd609d9,1329930660,2012-02-22-17


None


#### Update Dataframe (Temporal Information)

In [11]:
def create_update_df():
    """Data Frame containing information about docs which have updates/multiple instances in corpus"""
    def create_entry(row, col_tags):
        entry = {}
        for col in col_tags:
            entry[col] = row[col]
        return entry
    
    col_tags = ['docid', 'streamid', 'epoch', 'yyyymmddhh', 'zulu']
    entry_list = []
    dups = find_duplicates(corpus)
    for docid in tqdm(dups, position=0, leave=True):
        d = corpus[corpus['docid'] == docid]
        for index, row in d.iterrows():
            entry = create_entry(row, col_tags)
            entry_list.append(entry)
             
    update_df = pd.DataFrame(entry_list)
    update_df = update_df.set_index(col_tags)
    return update_df

def load_update_df(save=True, force_reload=False, verbose=True):
    update_df = load_df_control(update_csv_path, create_update_df, 
                                save=save, force_reload=force_reload, name="update_df", verbose=verbose)
    return update_df

# update_df = load_update_df()

## Supervised Learning Input Data (Embeddings) / Labels (Model Summary)

In [12]:
# preprocess corpus into cleaned sentences
# create sentence embeddings of corpus text
# create embeddings from where nuggets appear in article
# match them together in df

In [12]:
class SupervisedTrainingGenerator:
    """Currently not matching nuggets to correct sentence_id"""
    def __init__(self, spacy_model_selector="en_core_web_sm"):
        self.sent_model = self.init_sent_model()
        self.spacy_model_selector = spacy_model_selector
        self.nlp = None
        
    def generate(self, corpus_df, nugget_df, topic_ids=None, save=True, force_reload=False, verbose=True):
        if topic_ids is not None:
            self.supervised_df = {}
            for topic_id in topic_ids:
                print("Processing topic " + str(topic_id))
                t_corpus = corpus_df[corpus_df['topic_id'] == topic_id]
                t_nugget = nugget_df[nugget_df['topic_id'] == topic_id]
                t_path = supervised_path_generator("topic" + str(topic_id))
                self.supervised_df[topic_id] = load_df_control(t_path, 
                                                lambda: self.__generate(t_corpus, t_nugget), 
                                                save=save, force_reload=force_reload, 
                                                verbose=verbose, name="supervised_df" + str(topic_id))
                self.sent_model = self.init_sent_model()
        else:
            # maybe check here load different csv with different topic_ids
            self.supervised_df = load_df_control(supervised_csv_path, 
                                                lambda: self.__generate(corpus_df, nugget_df), 
                                                save=save, force_reload=force_reload, 
                                                verbose=verbose, name="supervised_df")
        return self.supervised_df
    
    def __generate(self, corpus_df, nugget_df):
        # create df each row being a sentence, it's embedding, sent_id, is_nugget, nugget_text, topic_id, streamid, docid etc.
        supervised = []
        for index, article in tqdm_notebook(corpus_df.iterrows(), total=corpus_df.shape[0], position=0, leave=True):
            # preprocess sentences
            sentences = self.preprocess_text(article['text'])
            sent_ids, sentences, embeddings = self.sent_embeddings(sentences)
            
            # if nuggets in article, get the index of the sentence
            streamid = article['streamid']
            article_nugs = self.nugget_matching_sent(streamid, nugget_df, sentences)
            
            # create dictionary for later creating dataframe
            for sent_id, sent, emb in zip(sent_ids, sentences, embeddings):
                t_id = article['topic_id']
                epoch = article['epoch']
                is_nugget = False
                nugget_text = None
                nugget_id = None
                
                nug_dict_index = None
                # check if nugget
                try:
                    nug_dict_index = article_nugs['sent_id'].index(sent_id)  # throws if not in list
                    is_nugget = True
                    nugget_text = article_nugs['nugget_text'][nug_dict_index]
                    nugget_id = article_nugs['nugget_id'][nug_dict_index]
                except ValueError:
                    pass # current sentence is not nugget
                
                s_dict = {"topic_id":t_id, "streamid":streamid, "epoch":epoch, "sent_id":sent_id, 
                          "sentence":sent, "embedding":emb, "is_nugget":is_nugget, 
                          "nugget_id":nugget_id, "nugget_text":nugget_text}
                supervised.append(s_dict)
                
        supervised_df = pd.DataFrame(supervised)
        return supervised_df
                
                
    def nugget_matching_sent(self, streamid, nugget_df, sentences):
        # find sent index of where nugget appears in text
        matches = {"sent_id":[], "nugget_text":[], "nugget_id":[]}
        for index, nug in self.nuggets_in_article(streamid, nugget_df).iterrows():
            match = None
            nug_text = nug['nugget_text']
            for i in range(len(sentences)):
                match = None
                if sentences[i] in nug_text:
                    match = i  # only take first appearance in article if multiple exist
                    break
            if match is not None:
                matches["sent_id"].append(match)
                matches["nugget_text"].append(nug_text)
                matches['nugget_id'].append(nug['nugget_id'])
        return matches
        
                
    def nuggets_in_article(self, streamid, nugget_df):
        # find streamid in nugget_df
        nug_rows = nugget_df[nugget_df['streamid'] == streamid]
        return nug_rows
                
            
    def preprocess_text(self, text, use_spacy=False):
        # remove first char if \n
#         if text[:1] == "\n":
#             text = text[1:]
        
        sentences = None
        if spacy:
            if self.nlp is None:
                self.nlp = spacy.load(self.spacy_model_selector)
            text = self.nlp(text)
            sentences = list(text.sents)
            sentences = [s.text for s in sentences if len(s) != 0]
        else:  # split by newline
            sentences = text.splitlines()
        return sentences
        
    def sent_embeddings(self, sentences):
        # use sentence-transformers embeddings
        result = self.sent_model.encode(sentences, show_progress_bar=False)
        sent_ids = []
        tokens = []  # sentences as text
        embeddings = []
        for i, (tok, emb) in enumerate(zip(sentences,result)):
            sent_ids.append(i)
            tokens.append(tok)
            embeddings.append(emb)
#         embeddings = np.stack(embeddings)
        return sent_ids, tokens, embeddings
#         # we normalize embeddings, so that euclidian distance is equivalent to cosine distance
#         self.normed_embeddings = (embeddings.T / (embeddings**2).sum(axis=1) ** 0.5).T

    def init_sent_model(self):
        sent_model = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens')
        return sent_model

In [14]:
# super_gen = SupervisedTrainingGenerator()
# super_df = super_gen.generate(corpus, nugget_df)

In [15]:
# print(display_df[0:5])

In [16]:
# sli = pd.DataFrame.copy(super_gen.supervised_df[1][0:5], deep=True)
# # testees = sli['nugget_text']
# # testees[2] = "poop"
# # sli['nugget_text'] = testees
# # print(sli['nugget_text'].unique())
# e_df_test = pd.DataFrame(columns=sli.columns)
# print(list(e_df_test[e_df_test['epoch']==None]['topic_id']))

## Filter the Larger Trects Dataset

In [13]:
class TrectsFilter:
    def __init__(self):
        self.base_dir = '/nfs/trects-kba2014'
        self.updates_dir = "/nfs/TemporalSummarization/ts14/results"
        self.updates_csv_paths = self.generate_update_paths()
        self.save_dir = '/nfs/mine-trects-kba2014-filtered'
        self.proc_history_path = self.save_dir + '/' + 'process_history.pickle'
        self.proc_history = None
        self.streamids = set()

    def generate_update_paths(self, verbose=True):
        up_dir = "/nfs/TemporalSummarization"
        ts_dirs = ["ts13", "ts14", "ts15"]
        target_files = ['updates_sampled.extended.tsv', 'updates_sampled.tsv',
                                 'matches.tsv']
        
        up_paths = []
        wrong_paths = []
        for ts_dir in ts_dirs:
            for target_file in target_files:
                full_path = up_dir + '/' + ts_dir + '/results/' + target_file
                if os.path.exists(full_path):
                    up_paths.append(full_path)
                else:
                    wrong_paths.append(full_path)
        if verbose:
            print("Attempted to find streamids in these files, but no path exists:")
            print(wrong_paths)
            print("")
        return up_paths
        
        
    def create_filtered_dataset(self, force_reload=False, verbose=True, no_soup=True):
        """ Outline of Process
        1. Find streamids
            1.1 open updates_sampled.tsv file (or updates_sampled.extended.tsv)
            1.2 Scrape update_id column
            1.3 transform into streamid (drop last hyphenated numbers (these are sentenceids))
            1.4 Put streamids into datastructure for comparing (e.g. set)
        2. Create a new directory for each topic folder there is in target dir
        3. Opening up documents
            3.1 Go for each topic folder
            3.2 Open up each document
            3.3 Parse into html tree
            3.4 if streamid matches, store locally in memory buffer
            3.5 when buffer is size of however many docs are in other html.gz files, print those docs to a new file
            3.6 name this file something like a number, not dates like other files, save with same file extension/format
        4. Quick test
            4.1 Open up a topicid using load corpus
        5. Final check
            5.1 Add in final check that will only run this if it hasn't already been generated
        """
        # get streamids for docs that we will filter for
        for update_csv_path in self.updates_csv_paths:
            self.get_streamids(update_csv_path)
        if verbose:
            print("Number of streamids searching for: " + str(len(self.streamids)))

        # get topicids from folder names
        topic_ids = [int(tid) for tid in os.listdir(self.base_dir) if tid.isdigit()]
        topic_ids.sort()
        
        # create dir to save filtered corpus to
        self.create_dir(self.save_dir)
        
        # load history of files already processed if exists
        self.load_process_history_dict(topic_ids)
        
        
        for topic_id in tqdm_notebook(topic_ids, position=0, leave=True):
            # create save directory
            topic_save_dir = self.save_dir + '/' + str(topic_id)
            self.create_dir(topic_save_dir)
            
            # get paths for files in target topic dir
            topic_dir = self.base_dir + '/' + str(topic_id)
            gz_paths = search_dir(topic_dir)
            
            # remove already processed files
            if not force_reload:
                if verbose:
                    prev_printed = [x for x in gz_paths if x in self.proc_history[topic_id]]
                    print("Previously processed " + str(len(prev_printed)) + " of " + str(len(gz_paths))
                         + " paths for topic " + str(topic_id))
                gz_paths = [x for x in gz_paths if x not in self.proc_history[topic_id]]
            
            if verbose:
                if len(gz_paths) > 0:
                    print("Processing topic " + str(topic_id))
            
            # process each file
            for gz_path in tqdm_notebook(gz_paths):
                if no_soup == True:
                    matches = self.process_file(gz_path, verbose=verbose)
                    if verbose:
                        print("len matches: " + str(len(matches)))
                    save_path = self.get_file_save_path(topic_id, gz_path)
                    self.write_docs_to_file(matches, save_path, no_soup=True, verbose=verbose)
                else:
                    # get file markup
                    markup = open_markup_file(gz_path, verbose=verbose)  # 50MB file proving hard for beautifulsoup
                    # get docs in file that are in streamids
                    matches = self.retrieve_matching_docs(markup, verbose=verbose)
                    save_path = self.get_file_save_path(topic_id, gz_path)
                    # write file and save results
                    self.write_docs_to_file(matches, save_path, verbose=verbose)
                self.proc_history[topic_id].add(gz_path)
                self.save_process_history_dict(verbose=verbose)
                
        print("Finished filtering corpus")
        
    def process_file(self, filepath, verbose=True):
        if verbose:
            print("Processing file at: " + str(filepath))
        matching_docs = []
        doc_buffer = []
        found_sid = False
        save_doc = False
        with gzip.open(filepath, 'rt') as f:
            for line in f:
                doc_buffer.append(line)  # add cur line to buffer
                buff_len = len(doc_buffer)
                if buff_len > 1:
                    if not found_sid:
                        if self.is_tag(line, tag="streamid"):
                            found_sid = True
                            sid = self.get_inner_tag(line, tag="streamid", remove_whitespace=True)
                            if sid in self.streamids:
                                save_doc = True
                    if self.is_tag(line, tag="doc", start_tag=False, end_tag=True):
                        if save_doc:  # if has matching streamid save doc file
                            matching_docs.append(doc_buffer)
                        doc_buffer = []
                        save_doc = False
                        found_sid = False
                        
                elif self.is_tag(line, tag="doc", start_tag=True, end_tag=False):
                    # reset variables
                    doc_buffer = []
                    doc_buffer.append(line)
        return matching_docs
                
                
    def is_tag(self, line, tag="streamid", start_tag=True, end_tag=True):
        start, end = self.create_tags(tag)
        start_true = False
        end_true = False
        
        if re.search(start, line, re.IGNORECASE):
            start_true = True
        if re.search(end, line, re.IGNORECASE):
            end_true = True
        
        if start_tag == True:
            if end_tag == True:
                return start_true and end_true
            else:
                return start_true
        else:
            return end_true
        
    def create_tags(self, tag):
        start_tag = "<" + tag + ">"
        end_tag = "</" + tag + ">"
        return start_tag, end_tag
            
    def get_inner_tag(self, line, tag="streamid", remove_whitespace=False):
        start_tag, end_tag = self.create_tags(tag)
        no_tags = line.replace(start_tag,'').replace(end_tag,'')
        no_tags = line.replace(start_tag.upper(), '').replace(end_tag.upper(),'')  # repeat for uppercase
        if remove_whitespace == True:
            no_tags = no_tags.rstrip()
        return no_tags
        
    def save_process_history_dict(self, verbose=True):
        with open(self.proc_history_path, 'wb') as handle:
            pickle.dump(self.proc_history, handle, protocol=pickle.HIGHEST_PROTOCOL)
            if verbose:
                print("saved proc_history")
        
    def load_process_history_dict(self, topic_ids):
        if os.path.exists(self.proc_history_path):
            with open(self.proc_history_path, 'rb') as handle:
                self.proc_history = pickle.load(handle)
            return True
        else:
            self.proc_history = self.create_process_history_dict(topic_ids)
            return False
        
    def create_process_history_dict(self, topic_ids):
        """Create a dictionary to keep track of what files have already been searched"""
        proc_history = {}
        for topic_id in topic_ids:
            proc_history[int(topic_id)] = set()  # sets have faster indexing
        return proc_history

                
    def get_file_save_path(self, topic_id, gz_path):
        filename = self.get_filename_from_gz_path(gz_path)
        save_path = self.save_dir + '/' + str(topic_id) + '/' + filename
        return save_path
        
                
    def get_filename_from_gz_path(self, gz_path):
        split = gz_path.split("/")
        filename = split[-1]
        return filename  # return with file extension on
                

    def write_docs_to_file(self, doc_list, save_path, no_soup=False, verbose=True):
        # transform docs into string
        if len(doc_list) > 0:  # don't write empty files
            out = ""
            if no_soup == True:
                out += "<html>\n"
                for doc in doc_list:
                    out += "".join(list(map(str, doc)))
                    out += "\n"
                out += "</html>"
            else:
                out = "\n".join(list(map(str, doc_list)))
            # write
            with gzip.open(save_path, "wt") as f:
                f.write(out)
                if verbose:
                    print("File written to: " + str(save_path))
        
            
    def retrieve_matching_docs(self, markup, verbose=False):
        """Retrieve docs with matching streamids from markup"""
        matches = []
        doc_count = 0
        match_count = 0
        for doc in markup.find_all("doc"):
            d_streamid = str(doc.find("streamid").string)
            if d_streamid in self.streamids:  # matching doc
                matches.append(doc)
                match_count += 1
            doc_count +=1
        if verbose:
            print("doc count: " + str(doc_count) + "\nmatch_count: " + str(match_count))
        return matches
    
    def get_streamids(self, path):
        # read tsv file
        updates_csv = pd.read_csv(path, "\t")
        # take column with streamids
        updateids = list(updates_csv['update_id'])
        for updateid in updateids:
            streamid = self.parse_streamid(updateid)
            self.streamids.add(streamid)
        return self.streamids
        
    def parse_streamid(self, updateid):
        """Convert updateid in format: epoch-docid-sentid into epoch-docid"""
        split = updateid.split("-")
        split = split[:-1]  # remove sentid from end
        streamid = "-".join(split)
        return streamid
    
    def create_dir(self, dir_path):
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
            print("Created new directory at " + str(dir_path))

In [129]:
trectsfilter = TrectsFilter()
trectsfilter.create_filtered_dataset(verbose=True, force_reload=False, no_soup=True)

Attempted to find streamids in these files, but no path exists:
['/nfs/TemporalSummarization/ts13/results/updates_sampled.extended.tsv', '/nfs/TemporalSummarization/ts15/results/updates_sampled.extended.tsv']

Number of streamids searching for: 53141


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

Previously processed 241 of 241 paths for topic 1


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 241 of 241 paths for topic 2


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 241 of 241 paths for topic 3


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 241 of 241 paths for topic 4


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 241 of 241 paths for topic 5


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 241 of 241 paths for topic 6


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 241 of 241 paths for topic 8


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 241 of 241 paths for topic 9


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 241 of 241 paths for topic 10


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 435 of 435 paths for topic 11


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 649 of 649 paths for topic 12


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 313 of 313 paths for topic 13


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 126 of 126 paths for topic 14


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 241 of 241 paths for topic 15


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 175 of 175 paths for topic 16


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 97 of 97 paths for topic 17


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 505 of 505 paths for topic 18


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 337 of 337 paths for topic 19


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 313 of 313 paths for topic 20


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 241 of 241 paths for topic 21


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 264 of 264 paths for topic 22


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 432 of 432 paths for topic 23


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Previously processed 106 of 288 paths for topic 24
Processing topic 24


HBox(children=(IntProgress(value=0, max=182), HTML(value='')))

Processing file at: /nfs/trects-kba2014/24/2013-02-14-16.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-14-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-16-05.gz
len matches: 60
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-16-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-14-19.gz
len matches: 14
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-14-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-18-11.gz
len matches: 33
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-18-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-15-15.gz
len matches: 326
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-15-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-16-19.gz
len matches: 62
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-16-19.gz
saved proc_history
Proc

len matches: 41
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-08-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-16-22.gz
len matches: 59
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-16-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-16-11.gz
len matches: 37
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-16-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-12-00.gz
len matches: 20
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-12-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-14-12.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-14-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-17-12.gz
len matches: 26
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-17-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-13-08.gz
len 

len matches: 36
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-12-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-18-08.gz
len matches: 27
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-18-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-08-23.gz
len matches: 13
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-08-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-14-01.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-14-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-16-16.gz
len matches: 77
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-16-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-18-10.gz
len matches: 31
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-18-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-12-17.gz
len m

len matches: 55
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-13-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-11-06.gz
len matches: 33
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-11-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-18-13.gz
len matches: 40
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-18-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-13-22.gz
len matches: 24
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-13-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-13-10.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-13-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-17-10.gz
len matches: 37
File written to: /nfs/mine-trects-kba2014-filtered/24/2013-02-17-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/24/2013-02-10-23.gz
len m

HBox(children=(IntProgress(value=0, max=248), HTML(value='')))

Processing file at: /nfs/trects-kba2014/25/2013-02-07-19.gz
len matches: 130
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-07-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-12-19.gz
len matches: 20
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-12-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-05-13.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-05-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-07-10.gz
len matches: 18
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-07-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-11-03.gz
len matches: 24
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-11-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-04-05.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-04-05.gz
saved proc_history
Proce

len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-04-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-03-09.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-03-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-05-09.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-05-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-06-01.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-04-23.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-04-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-09-08.gz
len matches: 32
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-09-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-11-13.gz
len matches: 53
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-11

len matches: 14
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-09-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-03-14.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-03-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-12-23.gz
len matches: 55
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-12-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-10-12.gz
len matches: 17
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-10-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-07-02.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-07-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-06-13.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-08-04.gz
len matches: 38
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-

len matches: 42
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-10-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-04-04.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-04-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-11-11.gz
len matches: 23
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-11-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-03-06.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-03-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-09-03.gz
len matches: 21
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-09-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-04-07.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-04-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-10-00.gz
len mat

len matches: 44
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-10-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-12-15.gz
len matches: 13
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-12-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-11-17.gz
len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-11-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-08-02.gz
len matches: 63
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-08-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-10-09.gz
len matches: 46
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-10-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-04-08.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/25/2013-02-04-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/25/2013-02-07-05.gz
len 

HBox(children=(IntProgress(value=0, max=361), HTML(value='')))

Processing file at: /nfs/trects-kba2014/26/2013-01-26-13.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-26-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-31-06.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-21-14.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-21-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-20-14.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-20-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-26-10.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-26-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-26-12.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-26-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-25-19.gz
len matches: 4
File 

len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-29-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-23-13.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-23-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-21-01.gz
len matches: 38
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-21-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-22-00.gz
len matches: 30
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-22-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-18-02.gz
len matches: 176
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-18-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-16-08.gz
len matches: 22
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-16-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-22-18.gz
len m

len matches: 124
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-18-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-27-06.gz
len matches: 33
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-27-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-27-16.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-27-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-21-04.gz
len matches: 30
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-21-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-24-08.gz
len matches: 23
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-24-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-19-17.gz
len matches: 60
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-19-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-30-15.gz
len 

len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-23-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-26-19.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-26-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-24-15.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-24-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-17-23.gz
len matches: 41
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-17-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-19-09.gz
len matches: 33
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-19-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-18-05.gz
len matches: 56
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-18-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-24-02.gz
len ma

len matches: 26
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-27-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-26-02.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-26-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-25-06.gz
len matches: 21
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-25-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-27-01.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-27-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-21-02.gz
len matches: 12
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-21-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-29-00.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-29-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-30-23.gz
len mat

len matches: 29
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-28-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-29-16.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-29-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-26-21.gz
len matches: 13
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-26-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-21-03.gz
len matches: 40
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-21-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-25-08.gz
len matches: 32
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-25-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-21-07.gz
len matches: 45
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-21-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-20-01.gz
len m

len matches: 108
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-18-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-16-19.gz
len matches: 76
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-16-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-30-01.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-28-17.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-28-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-19-18.gz
len matches: 40
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-19-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-21-09.gz
len matches: 17
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-01-21-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/26/2013-01-17-13.gz
len matches: 72
File written to: /nfs/mine-trects-kba2014-filtered/26/2013-0

HBox(children=(IntProgress(value=0, max=145), HTML(value='')))

Processing file at: /nfs/trects-kba2014/27/2012-10-30-04.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-30-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-28-14.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-28-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-31-18.gz
len matches: 14
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-31-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-27-13.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-27-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-29-23.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-29-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-27-10.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-27-10.gz
saved proc_history
Processi

len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-29-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-27-03.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-29-05.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-29-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-11-01-11.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-11-01-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-11-01-05.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-11-01-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-30-22.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-30-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-29-08.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-29-0

len matches: 16
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-31-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-31-07.gz
len matches: 25
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-31-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-29-10.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-29-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-29-03.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-10-29-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-11-01-04.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-11-01-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-11-01-12.gz
len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/27/2012-11-01-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/27/2012-10-30-17.gz
len mat

HBox(children=(IntProgress(value=0, max=168), HTML(value='')))

Processing file at: /nfs/trects-kba2014/28/2013-04-30-13.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-30-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-27-15.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-27-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-28-05.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-28-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-24-18.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-24-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-25-01.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-25-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-29-05.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-29-05.gz
saved proc_history
Processing

len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-28-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-25-16.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-25-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-29-18.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-29-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-28-14.gz
len matches: 13
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-28-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-26-07.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-26-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-24-03.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-24-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-25-04.gz
len match

len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-29-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-25-00.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-25-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-30-05.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-30-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-26-23.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-26-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-27-10.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-27-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-26-05.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-24-17.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-24-1

len matches: 13
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-25-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-30-12.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-30-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-30-08.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-30-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-25-05.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-25-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-26-02.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-26-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-30-23.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/28/2013-04-30-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/28/2013-04-29-10.gz
len match

HBox(children=(IntProgress(value=0, max=241), HTML(value='')))

Processing file at: /nfs/trects-kba2014/29/2013-02-25-17.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-25-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-26-21.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-26-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-26-02.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-24-18.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-03-02-19.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-03-02-01.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-27-04.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-25-21.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-25-21.gz
saved proc_history
Processing file at: /nfs/trects-kba

len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-03-02-12.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-03-02-23.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-03-01-22.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-22-20.gz
len matches: 22
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-22-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-25-23.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-25-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-03-03-05.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-25-06.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-28-06.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-26-04.gz
len matches

len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-23-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-26-07.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-22-08.gz
len matches: 33
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-22-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-26-08.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-24-09.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-24-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-28-16.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-28-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-03-01-18.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-03-01-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/201

len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-24-00.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-24-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-03-03-00.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-26-19.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-26-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-23-22.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-23-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-03-02-08.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-28-02.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-24-21.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-25-20.gz
len matches: 0
saved proc_history
P

len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/29/2013-02-22-01.gz
len matches: 14
File written to: /nfs/mine-trects-kba2014-filtered/29/2013-02-22-01.gz
saved proc_history

Created new directory at /nfs/mine-trects-kba2014-filtered/30
Previously processed 0 of 241 paths for topic 30
Processing topic 30


HBox(children=(IntProgress(value=0, max=241), HTML(value='')))

Processing file at: /nfs/trects-kba2014/30/2012-03-07-10.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-07-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-05-06.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-05-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-06-08.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-06-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-14-05.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-12-23.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-09-19.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-09-20.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-05-15.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/30/

len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-09-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-04-21.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-04-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-08-15.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-08-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-06-15.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-06-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-08-16.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-08-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-05-03.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-05-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-06-03.gz
len matche

len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-06-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-08-14.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-08-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-08-21.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-08-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-08-03.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-08-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-08-20.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-13-14.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-13-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-05-18.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-05-

len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-09-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-10-21.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-10-03.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-11-21.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-11-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-11-06.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-13-01.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-13-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-05-19.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-05-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-12-16.gz
len matches: 0
saved proc_history
Processing file at: /nfs

len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-08-02.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-08-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-07-16.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-07-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-07-07.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-13-23.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-13-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-07-01.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-07-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012-03-09-07.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/30/2012-03-09-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/30/2012

HBox(children=(IntProgress(value=0, max=119), HTML(value='')))

Processing file at: /nfs/trects-kba2014/31/2012-08-03-18.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-08-03-02.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-08-01-22.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/31/2012-08-01-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-07-31-16.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-07-31-11.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/31/2012-07-31-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-08-02-18.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-08-01-13.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/31/2012-08-01-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-08-01-18.gz
len matches: 19
File written to: /nfs/mine-trects-kba2014-filtered/31

len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-08-03-11.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-07-30-02.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-07-31-03.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-08-03-10.gz
len matches: 24
File written to: /nfs/mine-trects-kba2014-filtered/31/2012-08-03-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-07-30-21.gz
len matches: 56
File written to: /nfs/mine-trects-kba2014-filtered/31/2012-07-30-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-07-31-01.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-07-31-13.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/31/2012-07-31-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/31/2012-08-01-06.gz
len matches: 1
File written to: /

HBox(children=(IntProgress(value=0, max=457), HTML(value='')))

Processing file at: /nfs/trects-kba2014/32/2012-09-11-08.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-11-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-28-14.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-21-08.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-11-06.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-11-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-22-16.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-15-07.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-15-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-22-11.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-22-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-23-19.g

len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-12-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-14-21.gz
len matches: 12
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-14-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-14-05.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-14-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-28-09.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-28-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-23-08.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-25-18.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-25-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-13-00.gz
len matches: 24
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-13

len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-15-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-16-01.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-16-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-23-03.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-21-01.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-21-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-29-09.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-29-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-20-18.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-20-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-13-10.gz
len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-13-

len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-25-10.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-21-23.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-21-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-18-02.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-22-00.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-19-06.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-19-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-11-12.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-11-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-28-21.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-28-11.gz
len matches: 0
saved proc_history
P

len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-27-20.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-20-07.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-20-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-24-12.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-23-22.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-23-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-11-11.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-11-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-27-01.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-19-04.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-19-04.gz
saved proc_history
Processing file at: /nfs

len matches: 25
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-12-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-20-00.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-12-11.gz
len matches: 25
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-12-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-12-14.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-12-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-28-00.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-28-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-19-13.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-26-12.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-26-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/20

len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-19-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-12-13.gz
len matches: 30
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-12-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-25-21.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-25-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-28-03.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-28-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-21-22.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-26-11.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-26-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-11-19.gz
len matches: 44
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-11

len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-20-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-22-14.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-21-18.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-21-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-11-07.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-11-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-16-12.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-16-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-28-16.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012-09-22-10.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/32/2012-09-22-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/32/2012

HBox(children=(IntProgress(value=0, max=217), HTML(value='')))

Processing file at: /nfs/trects-kba2014/33/2013-01-10-07.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-15-14.gz
len matches: 24
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-15-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-14-20.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-14-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-15-10.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-15-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-11-14.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-11-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-18-07.gz
len matches: 82
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-18-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-17-00.gz
len matches: 103
Fi

len matches: 84
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-18-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-13-08.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-13-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-16-22.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-16-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-11-05.gz
len matches: 15
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-11-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-11-12.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-11-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-10-11.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-10-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-13-03.gz
len mat

len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-16-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-16-09.gz
len matches: 38
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-16-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-13-20.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-13-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-15-04.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-18-21.gz
len matches: 77
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-18-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-16-12.gz
len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-16-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-12-09.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-1

len matches: 29
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-15-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-17-10.gz
len matches: 54
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-17-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-11-11.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-11-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-14-09.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-14-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-12-06.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-10-05.gz
len matches: 88
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-10-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-10-06.gz
len matches: 93
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-

len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-12-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-15-01.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-11-00.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-10-18.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-10-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-12-18.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-12-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-12-05.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-12-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/2013-01-11-07.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/33/2013-01-11-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/33/20

HBox(children=(IntProgress(value=0, max=97), HTML(value='')))

Processing file at: /nfs/trects-kba2014/34/2013-02-19-13.gz
len matches: 27
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-19-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/34/2013-02-17-04.gz
len matches: 47
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-17-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/34/2013-02-17-05.gz
len matches: 62
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-17-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/34/2013-02-19-10.gz
len matches: 21
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-19-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/34/2013-02-17-21.gz
len matches: 29
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-17-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/34/2013-02-20-00.gz
len matches: 15
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-20-00.gz
saved proc_history
Proc

len matches: 12
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-18-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/34/2013-02-16-06.gz
len matches: 56
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-16-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/34/2013-02-17-23.gz
len matches: 37
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-17-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/34/2013-02-19-06.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-19-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/34/2013-02-17-01.gz
len matches: 51
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-17-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/34/2013-02-18-14.gz
len matches: 26
File written to: /nfs/mine-trects-kba2014-filtered/34/2013-02-18-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/34/2013-02-17-08.gz
len m

HBox(children=(IntProgress(value=0, max=121), HTML(value='')))

Processing file at: /nfs/trects-kba2014/35/2013-04-17-23.gz
len matches: 58
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-17-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-19-10.gz
len matches: 42
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-19-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-16-01.gz
len matches: 119
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-16-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-18-11.gz
len matches: 34
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-18-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-18-00.gz
len matches: 50
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-18-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-15-13.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-15-13.gz
saved proc_history
Proc

len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-15-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-18-21.gz
len matches: 48
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-18-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-18-18.gz
len matches: 20
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-18-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-18-07.gz
len matches: 62
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-18-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-16-05.gz
len matches: 61
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-16-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-19-13.gz
len matches: 77
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-19-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-15-19.gz
len m

len matches: 73
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-19-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-15-23.gz
len matches: 157
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-15-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-15-01.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-15-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-17-16.gz
len matches: 127
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-17-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-17-13.gz
len matches: 56
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-17-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-17-01.gz
len matches: 67
File written to: /nfs/mine-trects-kba2014-filtered/35/2013-04-17-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/35/2013-04-18-03.gz
len

HBox(children=(IntProgress(value=0, max=121), HTML(value='')))

Processing file at: /nfs/trects-kba2014/36/2013-03-19-00.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-19-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-20-05.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-20-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-20-00.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-20-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-22-12.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-22-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-23-20.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-23-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-20-21.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-20-21.gz
saved proc_history
Processing

len matches: 20
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-19-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-20-17.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-20-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-20-18.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-20-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-22-20.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-22-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-23-15.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-23-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-23-11.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-23-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-19-01.gz
len match

len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-21-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-22-02.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-22-11.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-21-04.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-21-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-21-01.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-21-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-21-07.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-21-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013-03-21-05.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/36/2013-03-21-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/36/2013

HBox(children=(IntProgress(value=0, max=169), HTML(value='')))

Processing file at: /nfs/trects-kba2014/37/2011-12-29-12.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/37/2011-12-29-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-03-10.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-03-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2011-12-29-20.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2011-12-29-10.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/37/2011-12-29-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-01-08.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-01-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2011-12-30-22.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/37/2011-12-30-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2011-12-29-18.gz
len matches: 3
File w

len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/37/2011-12-31-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2011-12-30-17.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/37/2011-12-30-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-01-23.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-01-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-04-07.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-04-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-03-11.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-03-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-04-20.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-04-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-04-18.gz
len matche

len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-03-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-03-08.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-03-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-03-18.gz
len matches: 12
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-03-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-02-01.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-02-08.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-02-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2011-12-31-22.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/37/2011-12-31-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-01-03.gz
len matches: 12
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-0

len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-04-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-03-20.gz
len matches: 12
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-03-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-01-07.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-01-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-02-03.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-02-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2011-12-30-09.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/37/2011-12-30-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2012-01-02-16.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/37/2012-01-02-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/37/2011-12-31-08.gz
len matc

HBox(children=(IntProgress(value=0, max=217), HTML(value='')))

Processing file at: /nfs/trects-kba2014/38/2013-04-06-05.gz
len matches: 16
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-06-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-06-03.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-06-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-06-22.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-06-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-11-22.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-11-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-10-18.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-04-16.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-04-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-08-03.gz
len matches: 0
save

len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-07-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-09-14.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-09-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-12-14.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-12-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-09-07.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-04-05.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-11-04.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-11-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-05-22.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-05-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013

len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-07-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-11-09.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-11-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-07-23.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-07-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-09-00.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-07-17.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-07-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-08-14.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-08-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-08-12.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013

len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-05-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-09-19.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-12-18.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-12-09.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-10-11.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-10-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-05-10.gz
len matches: 13
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-05-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-09-05.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/38/2013-04-08-18.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/38/2013-04-08-18.gz
saved proc_history
Processing file at: /nf

HBox(children=(IntProgress(value=0, max=97), HTML(value='')))

Processing file at: /nfs/trects-kba2014/39/2013-02-01-05.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-02-23.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/39/2013-02-02-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-04-05.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/39/2013-02-04-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-02-16.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/39/2013-02-02-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-03-17.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/39/2013-02-03-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-01-15.gz
len matches: 17
File written to: /nfs/mine-trects-kba2014-filtered/39/2013-02-01-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-04-14.gz
len matches: 8
File 

len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/39/2013-02-04-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-03-16.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/39/2013-02-03-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-02-17.gz
len matches: 14
File written to: /nfs/mine-trects-kba2014-filtered/39/2013-02-02-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-01-16.gz
len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/39/2013-02-01-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-02-22.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/39/2013-02-02-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-01-00.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/39/2013-02-04-21.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/39/2013-02-04

HBox(children=(IntProgress(value=0, max=121), HTML(value='')))

Processing file at: /nfs/trects-kba2014/40/2011-12-22-17.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-22-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-25-12.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-25-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-24-05.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-24-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-22-01.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-22-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-26-04.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-26-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-24-22.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-24-22.gz
saved proc_history
Processin

len matches: 14
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-22-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-22-00.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-22-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-22-20.gz
len matches: 21
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-22-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-22-03.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-22-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-24-19.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-24-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-26-02.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-26-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-24-11.gz
len matc

len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-23-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-21-21.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-21-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-22-13.gz
len matches: 16
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-22-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-26-05.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-24-15.gz
len matches: 12
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-24-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-23-20.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-23-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/40/2011-12-26-11.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/40/2011-12-2

HBox(children=(IntProgress(value=0, max=241), HTML(value='')))

Processing file at: /nfs/trects-kba2014/41/2013-01-15-14.gz
len matches: 24
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-15-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-21-14.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-21-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-20-14.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-20-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-23-07.gz
len matches: 31
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-23-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-19-12.gz
len matches: 60
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-19-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-23-05.gz
len matches: 26
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-23-05.gz
saved proc_history
Proce

len matches: 17
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-24-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-18-10.gz
len matches: 68
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-18-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-17-09.gz
len matches: 58
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-17-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-15-17.gz
len matches: 16
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-15-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-24-00.gz
len matches: 36
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-24-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-22-01.gz
len matches: 28
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-22-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-16-15.gz
len 

len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-24-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-17-23.gz
len matches: 41
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-17-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-19-09.gz
len matches: 33
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-19-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-18-05.gz
len matches: 56
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-18-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-24-02.gz
len matches: 30
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-24-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-22-14.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-22-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-18-03.gz
len 

len matches: 79
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-18-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-19-16.gz
len matches: 88
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-19-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-16-13.gz
len matches: 18
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-16-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-20-08.gz
len matches: 22
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-20-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-18-08.gz
len matches: 97
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-18-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-20-11.gz
len matches: 20
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-20-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-19-21.gz
len 

len matches: 41
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-24-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-15-21.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-15-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-20-07.gz
len matches: 28
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-20-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-16-23.gz
len matches: 27
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-16-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-21-00.gz
len matches: 27
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-21-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-22-17.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/41/2013-01-22-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/41/2013-01-18-12.gz
len m

HBox(children=(IntProgress(value=0, max=121), HTML(value='')))

Processing file at: /nfs/trects-kba2014/42/2013-02-12-19.gz
len matches: 20
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-12-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-13-13.gz
len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-13-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-11-03.gz
len matches: 24
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-11-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-12-22.gz
len matches: 66
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-12-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-10-14.gz
len matches: 59
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-10-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-13-16.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-13-16.gz
saved proc_history
Proce

len matches: 17
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-10-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-10-07.gz
len matches: 17
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-10-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-10-04.gz
len matches: 41
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-10-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-10-02.gz
len matches: 18
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-10-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-11-20.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-11-20.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-11-01.gz
len matches: 34
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-11-01.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-14-03.gz
len m

len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-11-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-13-09.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-13-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-10-09.gz
len matches: 46
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-10-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-13-19.gz
len matches: 14
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-13-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-13-02.gz
len matches: 31
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-13-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-12-09.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/42/2013-02-12-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/42/2013-02-11-14.gz
len ma

HBox(children=(IntProgress(value=0, max=121), HTML(value='')))

Processing file at: /nfs/trects-kba2014/43/2013-01-21-14.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-21-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-20-14.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-20-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-19-12.gz
len matches: 60
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-19-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-19-14.gz
len matches: 89
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-19-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-19-04.gz
len matches: 34
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-19-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-21-13.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-21-13.gz
saved proc_history
Proce

len matches: 33
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-19-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-18-05.gz
len matches: 56
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-18-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-18-03.gz
len matches: 38
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-18-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-18-04.gz
len matches: 42
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-18-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-18-00.gz
len matches: 80
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-18-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-18-23.gz
len matches: 52
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-18-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-19-11.gz
len 

len matches: 55
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-17-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-20-09.gz
len matches: 33
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-20-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-18-06.gz
len matches: 73
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-18-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-20-07.gz
len matches: 28
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-20-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-21-00.gz
len matches: 27
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-21-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-18-12.gz
len matches: 108
File written to: /nfs/mine-trects-kba2014-filtered/43/2013-01-18-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/43/2013-01-19-18.gz
len

HBox(children=(IntProgress(value=0, max=121), HTML(value='')))

Processing file at: /nfs/trects-kba2014/44/2012-04-13-00.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-13-00.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-12-14.gz
len matches: 12
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-12-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-14-13.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-14-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-15-18.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-15-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-13-17.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-13-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-15-03.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-15-03.gz
saved proc_history
Processin

len matches: 27
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-11-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-14-17.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-12-04.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-12-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-12-22.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-12-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-13-09.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-13-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-12-06.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-12-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-15-09.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-15-

len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-14-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-11-17.gz
len matches: 14
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-11-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-15-04.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-15-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-13-10.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-13-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-15-21.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-15-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-11-10.gz
len matches: 40
File written to: /nfs/mine-trects-kba2014-filtered/44/2012-04-11-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/44/2012-04-14-09.gz
len matc

HBox(children=(IntProgress(value=0, max=241), HTML(value='')))

Processing file at: /nfs/trects-kba2014/45/2012-11-06-21.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-11-06-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-30-04.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-30-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-28-14.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-28-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-31-18.gz
len matches: 14
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-31-18.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-04-18.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-29-23.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-29-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-07-03.gz
len matches: 0
save

len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-30-10.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-31-17.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-31-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-05-15.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-11-05-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-28-19.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-28-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-04-09.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-03-13.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-11-03-13.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-30-07.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-30-0

len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-11-02-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-03-15.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-28-22.gz
len matches: 13
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-28-22.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-28-15.gz
len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-28-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-29-14.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-29-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-04-13.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-02-07.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-28-23.gz
len matches: 6
File written to: /nfs/mine-trects-kba2014

len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-04-03.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-01-12.gz
len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-11-01-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-30-17.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-30-17.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-29-16.gz
len matches: 41
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-29-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-30-03.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-30-03.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-01-21.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-11-01-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/20

len matches: 10
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-30-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-02-23.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-11-02-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-10-30-02.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-10-30-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-03-23.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-11-03-23.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-01-16.gz
len matches: 27
File written to: /nfs/mine-trects-kba2014-filtered/45/2012-11-01-16.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-06-15.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/45/2012-11-04-11.gz
len matches: 0
saved proc_history
Processing file at: /nfs/trects-kba2014/45/20

HBox(children=(IntProgress(value=0, max=121), HTML(value='')))

Processing file at: /nfs/trects-kba2014/46/2012-09-11-08.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-11-08.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-11-06.gz
len matches: 1
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-11-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-15-07.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-15-07.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-15-05.gz
len matches: 14
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-15-05.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-15-15.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-15-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-12-10.gz
len matches: 9
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-12-10.gz
saved proc_history
Processin

len matches: 11
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-15-21.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-12-09.gz
len matches: 14
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-12-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-14-04.gz
len matches: 3
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-14-04.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-15-06.gz
len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-15-06.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-14-11.gz
len matches: 23
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-14-11.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-11-15.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-11-15.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-13-16.gz
len mat

len matches: 8
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-15-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-14-14.gz
len matches: 4
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-14-14.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-15-09.gz
len matches: 12
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-15-09.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-12-19.gz
len matches: 7
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-12-19.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-15-12.gz
len matches: 2
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-15-12.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-11-02.gz
len matches: 5
File written to: /nfs/mine-trects-kba2014-filtered/46/2012-09-11-02.gz
saved proc_history
Processing file at: /nfs/trects-kba2014/46/2012-09-12-08.gz
len match

In [129]:
# # test1_path = "/nfs/trects-kba2014-filtered-mine/1/2012-02-27-00.gz"
# # test2_path = "/nfs/trects-kba2014-filtered-mine/1/2012-02-26-15.gz"
# # test1_markup = open_markup_file(test1_path)
# # for doc in test1_markup.find_all("doc"):
# #     print(doc.find("zulu"))
# def get_stream_id(line, tag="streamid"):
#     start_tag = "<" + tag + ">"
#     end_tag = "</" + tag + ">"
#     no_tags = line.replace(start_tag,'').replace(end_tag,'')
#     return no_tags

# sidstr = "<streamid>1330304100-51d9dc9b383110fa1f802f80220ed4e1</streamid>"
# print(get_stream_id(sidstr))
# # print(sidstr.find("<streamid>"))
# # print(sidstr.replace('<streamid>','').replace('</streamid>',''))
# # # print(sidstr[10:])

</streamid>
1330304100-51d9dc9b383110fa1f802f80220ed4e1


## Control Generate and Load a Corpus

In [None]:
class CorpusGenerator:
    def __init__(self, proj_dir, corpus_split_step=200):
        self.path_handler = FilePathHandler(proj_dir)
        # ["topics", "corpus", "nuggets", "embed_labels", "updates"]
        self.file_purposes = self.path_handler.file_purposes
        self.corpus_split_step = corpus_split_step
        self.topic_dfs = {}  # dict of topic dfs per corpus_name
        self.force_reload_options = ["topics", "corpus", "nuggets"]
        
    def generate(self, selection=None, corpus_names=None, new_corpuses=None, 
             force_reload=False, save=True, verbose=True):
        """
        Parameters:
            force_reload: force generate new dataframes if files already exist
                options: True, False or list of selection i.e. ["topics", "corpus", "nuggets"]
        """
        # interpret force_reload input
        if type(force_reload) is not list:
            if type(force_reload) is bool:
                if force_reload == True:
                    force_reload = self.force_reload_options
                else:
                    force_reload = []  # empty list, no chosen selection
        else:
            for select in force_reload:
                if select not in self.force_reload_options:
                    raise ValueError(str(select) + " is not a force_reload option")
        
        # add new corpuses to load
        if new_corpuses is not None:
            for new_corpus in new_corpuses:
                self.path_handler.add_corpus_source(new_corpus, overwrite=True)
        # get corpus paths to load from (if corpus_names is None loads all)
        self.corpus_sources = self.path_handler.get_corpus_sources(corpus_names=corpus_names)
        corpus_names = self.corpus_sources.keys()
        
        if selection is None:  # if none do all
            selection = self.file_purposes
        
        for corpus_name in corpus_names:
            print("corpus_name generate loop: " + str(corpus_name))
            if "topics" in selection:
                # create topic_df for corpus_name
                print("Generating topic_df")
                topic_reload = self.check_force_reload(corpus_name, "topics", force_reload, change_paths=True,
                                                      verbose=verbose)
                self.load_topic_df_control(corpus_name, save=save, force_reload=topic_reload, verbose=verbose,
                                          add_path=True)
                
            if "corpus" in selection:
                # create corpus df csvs
                print("Generating corpus_dfs")
                corp_reload = self.check_force_reload(corpus_name, "corpus", force_reload, change_paths=True,
                                                     verbose=verbose)
                self.corpus_splitter(corpus_name, force_reload=corp_reload, verbose=False)
                
            if "nuggets" in selection:
                print("Generating nugget files")
                nug_reload = self.check_force_reload(corpus_name, "nuggets", force_reload, change_paths=True,
                                                    verbose=verbose)
                self.nuggets_generator(corpus_name, force_reload=nug_reload, verbose=verbose)
        print("Finished generating files")
        
    def check_force_reload(self, corpus_name, select, force_reload, change_paths=True, inst_identifier=None,
                          verbose=True):
        if select in force_reload:
            if change_paths:
                self.change_force_reload_paths(corpus_name, select, inst_identifier=inst_identifier)
                if verbose:
                    print("Changed paths for " + str(select) + " in " + str(corpus_name))
            return True
        else:
            return False
        
    def change_force_reload_paths(self, corpus_name, select, inst_identifier=None):
        # change path df paths to not exists if force_reload
        targ = self.path_handler.path_df
        targ = targ[targ['corpus_name'] == corpus_name]
        targ = targ[targ['file_purpose'] == select]
        if inst_identifier is not None:
            targ = targ[targ['instance_identifier'] == inst_identifier]
        targ['exists'] = False  # set value for all items
        self.path_handler.save_path_df()
                    
            
    def nuggets_generator(self, corpus_name, force_reload=False, verbose=True):
        # if not exists load topics
        self.load_topic_df_control(corpus_name, force_reload=force_reload, verbose=verbose)
        
        # this is used for the filename of the resulting saved file
        base_identifier = "nuggets"
        
        topic_df = self.topic_dfs[corpus_name]
        topic_ids = list(topic_df['id'].unique())
        # create nuggets_df per topic
        for topic_id in tqdm_notebook(topic_ids):
            # get paths for corpus files for this topic_id (inelegantly)
            print("nuggets_generator topic_id: " + str(topic_id))
            corp_paths = self.path_handler.path_df
            corp_paths = corp_paths[corp_paths['corpus_name'] == corpus_name]
            corp_paths = corp_paths[corp_paths['file_purpose'] == 'corpus']
            inst_idents = corp_paths['instance_identifier'].unique()
            corp_paths = corp_paths[corp_paths['instance_identifier'] == str(topic_id)]  # paths for this topic
            corp_paths = corp_paths[corp_paths['exists'] == True]
            
            # check that all paths are loaded
            try:
                num_split = int(corp_paths.iloc[0]['num_splits'])
                if num_split < len(corp_paths):
                    raise IndexError()
            except IndexError:
                if verbose:
                    print(display(corp_paths))
                warnings.warn("Corpus files for topic " + str(topic_id) + " have not been fully loaded")
                continue
                
            corp_paths = list(corp_paths['path'])  # transform into actual list of paths
            
            # load corpus for a given topic
            corpus_df = load_corpus(corp_paths, save=False,
                                    force_reload=False, verbose=False, path_handler=self.path_handler)
            
            # nugget save destination
            save_path = self.path_handler.get_path(corpus_name, "nuggets", base_identifier, ".csv.gz",
                                        split_identifier=str(topic_id), num_splits=len(topic_ids), add_path=True)
            
            nuggets_tsv_path = self.corpus_sources[corpus_name]['nuggets_path']
            matches_tsv_path = self.corpus_sources[corpus_name]['matches_path']
            # generate nugget file
            load_nugget_df(save_path, corpus_df, topic_ids=[topic_id], matches_tsv_path=matches_tsv_path, 
                            nuggets_tsv_path=nuggets_tsv_path, save=True, force_reload=force_reload, 
                           verbose=verbose, path_handler=self.path_handler, spacy_if_not_found=True)
            
            
                    
    def corpus_splitter(self, corpus_name, force_reload=False, verbose=True):
        # split by topic and then every 200 html gz files, then parse together in loading
        # add check for what's been done already (i.e. check current topics, if all splits taken place)
        
        # if not exists load topics
        self.load_topic_df_control(corpus_name, force_reload=force_reload, verbose=verbose)
        topic_df = self.topic_dfs[corpus_name]
        
        corpus_dir = self.corpus_sources[corpus_name]["dir_path"]
        if verbose:
            print("corpus_dir:" + str(corpus_dir))
            
        for topic_id in topic_df['id'].unique():
            # confirm dir exists
            t_dir = corpus_dir + '/' + str(topic_id)
            if not file_exists(t_dir):
                warnings.warn("Corpus loading path at " + t_dir + " does not exist. Removing from topic_df")
                topic_df = self.remove_topic_and_save(corpus_name, topic_df, topic_id, verbose=verbose)
                continue
            
            # check if path exists
            p_df = self.path_handler.path_df
            p_df = p_df[p_df['corpus_name'] == corpus_name]
            p_df = p_df[p_df['file_purpose'] == "corpus"]
            t_df_paths = p_df[p_df['instance_identifier'] == str(topic_id)]
            t_df_paths = t_df_paths[t_df_paths['exists'] == True]  # only concerned with created files
            start_split = 0
            num_splits = 0
            if len(t_df_paths) == 0 or force_reload:  # not yet processed
                start_split = 0
            else:
                # check if all splits been processed
                num_splits = list(t_df_paths['num_splits'])[0]  # ensure same num_splits is inputted into path_df
                if len(t_df_paths) < num_splits:  # not counting from zero
                    break
                # get start point if partway through
                start_split = max(list(map(int, list(t_df_paths['split_identifier']))))
            
            num_files = len(search_dir(t_dir))
            if len(num_files) == 0:
                warnings.warn("No files found in directory " + str(t_dir) + ". Removing " + str(topic_id)
                             + " from topic_df")
                topic_df = self.remove_topic_and_save(corpus_name, topic_df, topic_id, verbose=verbose)
            
            # create split indexes to feed to load_corpus
            splits = [start_split]
            add = splits[-1] + self.corpus_split_step
            while add < num_files:
                splits.append(int(add))
                add = splits[-1] + self.corpus_split_step
            
            if start_split == 0:  
                num_splits = int(len(splits))  # for inputting into path_df
            
            if verbose:
                print("creating corpus df for topic " + str(topic_id) + " starting at file no. " 
                      + str(split_start_doc) + " of " + str(num_files) + " splitting every " 
                      + str(split_every) + " files")
            # create corpus_df files
            for split_num in splits:
                # get save path
                save_path = self.path_handler.get_path(corpus_name, "corpus", str(topic_id), ".csv.gz",
                                        split_identifier=str(split_num), num_splits=num_splits, add_path=True)
                
                load_corpus(save_path, corpus_dir=corpus_dir, topic_ids=[topic_id], 
                            split_every=self.corpus_split_step, split_start_doc=split_num, 
                            save=True, force_reload=force_reload, 
                            verbose=verbose, path_handler=self.path_handler)
    
    def remove_topic_and_save(self, corpus_name, topic_df, topic_id, verbose=True):
        # remove topic_id from topic_df and save
        topic_df = topic_df[topic_df['id'] != topic_id]
        path = self.path_handler.path_df
        path = path[(path['corpus_name'] == corpus_name) & (path['file_purpose'] == "topics")]['path']
        path = list(path)[0]
        topic_df.to_csv(path, compression='gzip')
        if verbose:
            print(str(topic_id) + " remoed from topic_df and saved to " + str(path))
        return topic_df
                
    def load_topic_df_control(self, corpus_name, save=True, force_reload=False, verbose=True, add_path=False):
        if self.topic_dfs is None:
            self.topic_dfs = {}
        if corpus_name not in self.topic_dfs:
            self.topic_dfs[corpus_name] = self.load_topic_df(corpus_name, save=save, force_reload=force_reload,
                                                             verbose=verbose, add_path=add_path)
                

    def load_topic_df(self, corpus_name, save=True, force_reload=False, verbose=True, add_path=False):
        load_path = self.corpus_sources[corpus_name]["topics_path"]
        save_path = self.path_handler.get_path(corpus_name, "topics", "topics_df", ".csv.gz", add_path=add_path)
        
        topic_df = load_topics(save_path, load_path=load_path, save=save, force_reload=force_reload, 
                               verbose=verbose, path_handler=self.path_handler)
        return topic_df

In [329]:
proj_dir = '/nfs/proj-repo/AAARG-dissertation'
orig_tr14_filtered_dict = { "corpus_name":"original-trects-kba2014-filtered",
                        "dir_path":"/nfs/original-trects-kba2014-filtered", 
                      "topics_path":"/nfs/original-trects-kba2014-filtered/test-topics.xml", 
                      "nuggets_path":"/nfs/TemporalSummarization/ts13/results/nuggets.tsv",
                        "matches_path":"/nfs/TemporalSummarization/ts13/results/matches.tsv"}

mine_tr14_filtered_dict = {"corpus_name":"mine-trects-kba2014-filtered",
                          "dir_path":"/nfs/mine-trects-kba2014-filtered",
                          "topics_path":["/nfs/TemporalSummarization/ts13/test-topics.xml",
                                        "/nfs/TemporalSummarization/ts14/trec2014-ts-topics-test.xml",
                                        "/nfs/TemporalSummarization/ts15/trec2015-ts-topics-test.xml"],
                          "nuggets_path":["/nfs/TemporalSummarization/ts13/results/nuggets.tsv",
                                         "/nfs/TemporalSummarization/ts14/results/nuggets.tsv",
                                         "/nfs/TemporalSummarization/ts15/results/nuggets.tsv"],
                          "matches_path":["/nfs/TemporalSummarization/ts13/results/matches.tsv",
                                         "/nfs/TemporalSummarization/ts14/results/matches.tsv",
                                         "/nfs/TemporalSummarization/ts15/results/matches.tsv"]}

# tr14_filtered_mine = {"corpus_name":"tr14_filtered_mine",
#                      "dir_path":"/nfs/trects-kba2014-filtered-mine",
#                      "topics_path":}

# nlp = spacy.load("en_core_web_sm")
corp_gen = CorpusGenerator(proj_dir)

force_reload = ["nuggets"]
# force_reload = False
selection = ["nuggets"]
corpus_names = ["mine-trects-kba2014-filtered"]

corp_gen.generate(new_corpuses=[orig_tr14_filtered_dict, mine_tr14_filtered_dict], corpus_names=corpus_names, 
                  force_reload=force_reload, verbose=True, selection=selection)
p_han = FilePathHandler(proj_dir)
print(display(p_han.path_df[0:10]))

corpus_name generate loop: mine-trects-kba2014-filtered
Generating nugget files
Changed paths for nuggets in mine-trects-kba2014-filtered
Loading topics
0 duplicate documents removed from topics df
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/topics/topics_df.csv.gz


Unnamed: 0,id,title,description,start,end,query,type
0,1,2012 Buenos Aires Rail Disaster,http://en.wikipedia.org/wiki/2012_Buenos_Aires...,1329910380,1330774380,buenos aires train crash,accident
1,2,2012 Pakistan garment factory fires,http://en.wikipedia.org/wiki/2012_Pakistan_gar...,1347368400,1348232400,pakistan factory fire,accident
2,3,2012 Aurora shooting,http://en.wikipedia.org/wiki/2012_Aurora_shooting,1342766280,1343630280,colorado shooting,shooting
3,4,Wisconsin Sikh temple shooting,http://en.wikipedia.org/wiki/Wisconsin_Sikh_te...,1344180300,1345044300,sikh temple shooting,shooting


None


HBox(children=(IntProgress(value=0, max=46), HTML(value='')))

nuggets_generator topic_id: 1
Loading nugget_df


  0%|          | 0/1172 [00:00<?, ?it/s]

Nugget entries were generated for 1160 nuggets. There were 12 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
0 out of 1160 streamids had out of bounds sent_ids
nugget_df entries: 1160
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_1.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,1,35,53,1,2,676+ injuries,"Buenos Aires Train Crash Kills 49, Injuries ov...",1,a361ee100c9b058a0f0f4355cec64047,1330003205-a361ee100c9b058a0f0f4355cec64047,1330003205
1,1,50,74,3,6,"train accident in Buenos Aires, Argentina.","""The train was full and the impact was 49 dead...",3,06251bb5df849f4e5efe4245acb1c342,1329996913-06251bb5df849f4e5efe4245acb1c342,1329996913
2,1,50,101,3,6,"train accident in Buenos Aires, Argentina.",Firemen rescue wounded passengers from a commu...,2,a8bb0847959520d7f32b2b12d486e33e,1329996320-a8bb0847959520d7f32b2b12d486e33e,1329996320
3,1,0,8,1,3,49 confirmed deaths,"49 dead, hundreds injured in Buenos Aires trai...",1,a8bb0847959520d7f32b2b12d486e33e,1329996320-a8bb0847959520d7f32b2b12d486e33e,1329996320


None
nuggets_generator topic_id: 2


  0%|          | 0/836 [00:00<?, ?it/s]

Loading nugget_df


  0%|          | 0/836 [00:00<?, ?it/s]

Nugget entries were generated for 112 nuggets. There were 724 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
0 out of 112 streamids had out of bounds sent_ids
nugget_df entries: 112
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_2.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,2,30,72,3,7,Pakistan garment factory fires 11 September 2012,The fire in Karachi was one of two deadly blaz...,3,176fb48ae343811f53922a9312d34f55,1347480009-176fb48ae343811f53922a9312d34f55,1347480009
1,2,0,20,3,1,Karachi,The fire in Karachi was one of two deadly blaz...,3,176fb48ae343811f53922a9312d34f55,1347480009-176fb48ae343811f53922a9312d34f55,1347480009
2,2,0,45,3,3,Total killed 315,Deaths in factory fires in Pakistan up to 314 ...,33,0474a30204e831c3dcc81ba93b7d8193,1347489318-0474a30204e831c3dcc81ba93b7d8193,1347489318
3,2,55,109,3,3,Total killed 315,Pakistan factory fires death toll hits 2 The n...,31,d7774dc37afe6c977309ed94977599f3,1347483745-d7774dc37afe6c977309ed94977599f3,1347483745


  0%|          | 0/271 [00:00<?, ?it/s]

None
nuggets_generator topic_id: 3
Loading nugget_df


  0%|          | 0/271 [00:00<?, ?it/s]

Nugget entries were generated for 33 nuggets. There were 238 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
0 out of 33 streamids had out of bounds sent_ids
nugget_df entries: 33
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_3.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,3,71,190,1,36,"The victims include: Veronica Moser (6), Matt ...","The Aurora Sentinel is updating as they can, b...",10,9a1a165564271233b3acd7e53119dd2c,1342974777-9a1a165564271233b3acd7e53119dd2c,1342974777
1,3,0,43,1,20,"Aurora Chief of Police, Daniel Oates, updated ...","FirstDude • Greenwood , Indiana • 2 mins 51 se...",41,d6d75cc5f684df23d9ba5cbe932f6b81,1342998315-d6d75cc5f684df23d9ba5cbe932f6b81,1342998315
2,3,71,174,1,36,"The victims include: Veronica Moser (6), Matt ...",College students who moved to Colorado to blaz...,4,5dfefea0666b6c9a325c562a62c8eb73,1343009373-5dfefea0666b6c9a325c562a62c8eb73,1343009373
3,3,0,1,1,22,"Among foreigner victims, three Indonesians are...",College students who moved to Colorado to blaz...,4,5dfefea0666b6c9a325c562a62c8eb73,1343009373-5dfefea0666b6c9a325c562a62c8eb73,1343009373


  0%|          | 0/730 [00:00<?, ?it/s]

None
nuggets_generator topic_id: 4
Loading nugget_df


  0%|          | 0/730 [00:00<?, ?it/s]

Nugget entries were generated for 84 nuggets. There were 646 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
0 out of 84 streamids had out of bounds sent_ids
nugget_df entries: 84
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_4.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,4,154,272,1,2,Suspect's motivation-unclear.,Adam Harrington | CBS 2 Chicago WBBM -TV: Wate...,51,19f9754f77b78dcb183a76e003ac7488,1344276214-19f9754f77b78dcb183a76e003ac7488,1344276214
1,4,0,52,3,5,Seven fatalities including shooter,"At least seven people are dead, including the ...",62,19f9754f77b78dcb183a76e003ac7488,1344276214-19f9754f77b78dcb183a76e003ac7488,1344276214
2,4,35,88,3,4,'Wisconsin Sikh Temple shooting,Scared Monkeys | The Tipping Point Mass Shooti...,76,19f9754f77b78dcb183a76e003ac7488,1344276214-19f9754f77b78dcb183a76e003ac7488,1344276214
3,4,35,88,1,6,"Shooting occurs in Oak Creek, Wisconsin",Scared Monkeys | The Tipping Point Mass Shooti...,76,19f9754f77b78dcb183a76e003ac7488,1344276214-19f9754f77b78dcb183a76e003ac7488,1344276214


  0%|          | 0/109 [00:00<?, ?it/s]

None
nuggets_generator topic_id: 5
Loading nugget_df


  0%|          | 0/109 [00:00<?, ?it/s]

Nugget entries were generated for 29 nuggets. There were 80 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
0 out of 29 streamids had out of bounds sent_ids
nugget_df entries: 29
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_5.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,5,26,85,3,6,Hurricane Isaac - catagory one hurricane,National Hurricane Center predicted Isaac woul...,7,753845f8ad64dd620e5019ff39e9310c,1346181303-753845f8ad64dd620e5019ff39e9310c,1346181303
1,5,59473,59526,2,13,The president signed an emergency declaration ...,ISAAC ON SAME PATH AS KATRINA. 7th ANNIVERSARY...,142,ecec234e3610a929c59042e225140664,1346174526-ecec234e3610a929c59042e225140664,1346174526
2,5,59644,59716,2,11,President Obama ordered federal aid to Louisia...,ISAAC ON SAME PATH AS KATRINA. 7th ANNIVERSARY...,142,ecec234e3610a929c59042e225140664,1346174526-ecec234e3610a929c59042e225140664,1346174526
3,5,55920,55995,2,20,Approx 78% of the Gulf's crude oil production ...,ISAAC ON SAME PATH AS KATRINA. 7th ANNIVERSARY...,142,ecec234e3610a929c59042e225140664,1346174526-ecec234e3610a929c59042e225140664,1346174526


None
nuggets_generator topic_id: 6
Loading nugget_df


  0%|          | 0/760 [00:00<?, ?it/s]

Nugget entries were generated for 229 nuggets. There were 528 found in matches.tsv but not in corpus
There were 3 nugget_ids found in matches.tsv but not in nuggets.tsv
0 out of 229 streamids had out of bounds sent_ids
nugget_df entries: 229
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_6.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,6,130,161,2,5,3 killed in Haiti,"An estimated 15,000 lose their lives. Oct. 25,...",11,fbee3f4f14962fbfd4448a4f6bf14f8e,1351805764-fbee3f4f14962fbfd4448a4f6bf14f8e,1351805764
1,6,53,124,2,4,One killed in Jamaica,"An estimated 15,000 lose their lives. Oct. 25,...",11,fbee3f4f14962fbfd4448a4f6bf14f8e,1351805764-fbee3f4f14962fbfd4448a4f6bf14f8e,1351805764
2,6,0,51,2,7,"Haiti, at least 51 people have died",Hurricane Sandy killed at least 52 people in H...,15,bd3ded1f9fe6ae3a18c100b94472cc24,1351800696-bd3ded1f9fe6ae3a18c100b94472cc24,1351800696
3,6,0,78,2,6,"Haiti estimated 200,000 are left homeless",Hurricane Sandy killed at least 52 people in H...,15,bd3ded1f9fe6ae3a18c100b94472cc24,1351800696-bd3ded1f9fe6ae3a18c100b94472cc24,1351800696


None
nuggets_generator topic_id: 7


Unnamed: 0,corpus_name,file_purpose,split_identifier,num_splits,instance_identifier,file_type,path,exists


None
nuggets_generator topic_id: 8
Loading nugget_df


  0%|          | 0/245 [00:00<?, ?it/s]

Nugget entries were generated for 240 nuggets. There were 5 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
0 out of 240 streamids had out of bounds sent_ids
nugget_df entries: 240
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_8.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,8,0,40,3,19,"On 8 December, forecasters said the storm had ...",Typhoon Bopha returns to the Philippines .,2,22ed6799f7b89147d4711dd3b4626099,1355025957-22ed6799f7b89147d4711dd3b4626099,1355025957
1,8,434,498,3,16,"As of December 10, the storm had caused crop d...",Weakened typhoon set to make second landfall i...,1,188094a5273b5086a65e11f3a6189ff8,1355032068-188094a5273b5086a65e11f3a6189ff8,1355032068
2,8,844,874,3,25,The death toll from the typhoon reached 902 an...,Weakened typhoon set to make second landfall i...,1,188094a5273b5086a65e11f3a6189ff8,1355032068-188094a5273b5086a65e11f3a6189ff8,1355032068
3,8,0,82,3,25,The death toll from the typhoon reached 902 an...,Authorities feared the number of fatalities fr...,2,b18b53383cd7a55a6c61929f2bdca0b0,1355033632-b18b53383cd7a55a6c61929f2bdca0b0,1355033632


None
nuggets_generator topic_id: 9
Loading nugget_df


  0%|          | 0/361 [00:00<?, ?it/s]

Nugget entries were generated for 361 nuggets. There were 0 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
0 out of 361 streamids had out of bounds sent_ids
nugget_df entries: 361
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_9.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,9,173,224,1,7,"epicenter was located in the Pacific Ocean,",CBS has this report: “The quake shook building...,4,216e533c16cbb355156925963edc3c65,1352355817-216e533c16cbb355156925963edc3c65,1352355817
1,9,88,114,1,5,telephone and internet services interrupted,"PST , November 7, 2012 MEXICO CITY — A deadly ...",5,94ce35166d6a1cac7897a69ad891658c,1352353329-94ce35166d6a1cac7897a69ad891658c,1352353329
2,9,184,201,2,6,many are missing after quake,"PST , November 7, 2012 MEXICO CITY — A deadly ...",5,94ce35166d6a1cac7897a69ad891658c,1352353329-94ce35166d6a1cac7897a69ad891658c,1352353329
3,9,151,177,3,5,39 casualties reported in Guatamala,"PST , November 7, 2012 MEXICO CITY — A deadly ...",5,94ce35166d6a1cac7897a69ad891658c,1352353329-94ce35166d6a1cac7897a69ad891658c,1352353329


None
nuggets_generator topic_id: 10


  0%|          | 0/366 [00:00<?, ?it/s]

Loading nugget_df


  0%|          | 0/366 [00:00<?, ?it/s]

Nugget entries were generated for 350 nuggets. There were 2 found in matches.tsv but not in corpus
There were 14 nugget_ids found in matches.tsv but not in nuggets.tsv
0 out of 350 streamids had out of bounds sent_ids
nugget_df entries: 350
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_10.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,10,953,984,3,4,Tel Aviv bus bombing,Israel arrests suspects in Tel Aviv bus bombin...,1,0c46b448c2b5a925980a36ad05703040,1353628046-0c46b448c2b5a925980a36ad05703040,1353628046
1,10,1395,1467,2,13,The White House called the bombing a terrorist...,Israel arrests suspects in Tel Aviv bus bombin...,1,0c46b448c2b5a925980a36ad05703040,1353628046-0c46b448c2b5a925980a36ad05703040,1353628046
2,10,1009,1072,1,9,21 wounded in terror attack on Tel Aviv bus,Israel arrests suspects in Tel Aviv bus bombin...,1,0c46b448c2b5a925980a36ad05703040,1353628046-0c46b448c2b5a925980a36ad05703040,1353628046
3,10,168,215,3,3,Terror suspects arrested,Israel arrests suspects in Tel Aviv bus bombin...,1,0c46b448c2b5a925980a36ad05703040,1353628046-0c46b448c2b5a925980a36ad05703040,1353628046


None
nuggets_generator topic_id: 11
Loading nugget_df


  0%|          | 0/712 [00:02<?, ?it/s]

Nugget entries were generated for 466 nuggets. There were 221 found in matches.tsv but not in corpus
There were 25 nugget_ids found in matches.tsv but not in nuggets.tsv
5 out of 466 streamids had out of bounds sent_ids
nugget_df entries: 466
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_11.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,11.0,0.0,80.0,3.0,53.0,"3,206 passengers and 1,023 crew members were o...","Most of the 3,200 passengers and 1,023 crew on...",59.0,8b5e243c365d9b0edccaba22a834a9c6,1326502200-8b5e243c365d9b0edccaba22a834a9c6,1326502000.0
1,11.0,71.0,152.0,1.0,32.0,Rescued passengers huddle ashore,"Most of the 3,200 passengers and 1,023 crew on...",59.0,8b5e243c365d9b0edccaba22a834a9c6,1326502200-8b5e243c365d9b0edccaba22a834a9c6,1326502000.0
2,11.0,49.0,105.0,1.0,177.0,shifted the centre of gravity to the starboard...,A statement from the Italian coastguard said t...,30.0,b4eda8d2ece6f98685ef70b841bf30be,1326502560-b4eda8d2ece6f98685ef70b841bf30be,1326503000.0
3,11.0,0.0,133.0,3.0,125.0,"Friday 13 January 2012, the ''Costa Concordia'...",The Costa Concordia was on a trip around the M...,158.0,433f8576b39b614c312c77c77f739ee3,1326506940-433f8576b39b614c312c77c77f739ee3,1326507000.0


None
nuggets_generator topic_id: 12


  0%|          | 0/559 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 557 nuggets. There were 0 found in matches.tsv but not in corpus
There were 2 nugget_ids found in matches.tsv but not in nuggets.tsv
13 out of 557 streamids had out of bounds sent_ids
nugget_df entries: 557
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_12.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,12.0,3.0,9.0,2.0,6.0,France,"In France , freezing weather was also forecast...",88.0,9aacc4b859165b551d575740a9914bb1,1327696800-9aacc4b859165b551d575740a9914bb1,1327697000.0
1,12.0,12.0,28.0,2.0,21.0,freezing temperatures,"In France , freezing weather was also forecast...",88.0,9aacc4b859165b551d575740a9914bb1,1327696800-9aacc4b859165b551d575740a9914bb1,1327697000.0
2,12.0,101.0,113.0,3.0,9.0,cold wave,"In France , freezing weather was also forecast...",88.0,9aacc4b859165b551d575740a9914bb1,1327696800-9aacc4b859165b551d575740a9914bb1,1327697000.0
3,12.0,26.0,97.0,2.0,16.0,many people dead,Forecasters say this week will be the coldest ...,118.0,653a5605d2320814cddc1ce79ad6dcfc,1327872780-653a5605d2320814cddc1ce79ad6dcfc,1327873000.0


None
nuggets_generator topic_id: 13


  0%|          | 0/969 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 969 nuggets. There were 0 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
9 out of 969 streamids had out of bounds sent_ids
nugget_df entries: 969
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_13.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,13.0,56.0,64.0,3.0,8.0,Brisbane,\nFloods paralyze Indonesian capital ; at leas...,,42d73965e0c2a17dab548fc2ebeec124,1358411862-42d73965e0c2a17dab548fc2ebeec124,1358412000.0
1,13.0,15.0,21.0,3.0,6.0,floods,\nFloods paralyze Indonesian capital ; at leas...,,42d73965e0c2a17dab548fc2ebeec124,1358411862-42d73965e0c2a17dab548fc2ebeec124,1358412000.0
2,13.0,30.0,39.0,3.0,9.0,Australia,\nFloods paralyze Indonesian capital ; at leas...,,42d73965e0c2a17dab548fc2ebeec124,1358411862-42d73965e0c2a17dab548fc2ebeec124,1358412000.0
3,13.0,56.0,64.0,3.0,8.0,Brisbane,Fresh video of flood chaos in Australia as wat...,255.0,42d73965e0c2a17dab548fc2ebeec124,1358411862-42d73965e0c2a17dab548fc2ebeec124,1358412000.0


None
nuggets_generator topic_id: 14


  0%|          | 0/541 [00:00<?, ?it/s]

Loading nugget_df


  0%|          | 0/541 [00:05<?, ?it/s]

Nugget entries were generated for 501 nuggets. There were 40 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
14 out of 501 streamids had out of bounds sent_ids
nugget_df entries: 501
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_14.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,14.0,0.0,178.0,3.0,92.0,There were two booms heard from near the finis...,About three hours after the winners crossed th...,118.0,081bf4425236dd9fa4bd311149ab4382,1366055539-081bf4425236dd9fa4bd311149ab4382,1366056000.0
1,14.0,0.0,46.0,3.0,88.0,Authorities are investigating a report of two ...,I do n't know what it was .,40.0,638a0a9884a3da3ba7a62552bb976423,1366055661-638a0a9884a3da3ba7a62552bb976423,1366056000.0
2,14.0,9.0,225.0,3.0,88.0,Authorities are investigating a report of two ...,BOSTON - Two explosions shattered the finish o...,172.0,5d6d46ffaf959610d3a2167f5b8e8320,1366056669-5d6d46ffaf959610d3a2167f5b8e8320,1366057000.0
3,14.0,0.0,225.0,3.0,92.0,There were two booms heard from near the finis...,Two explosions shattered the euphoria of the B...,203.0,5d6d46ffaf959610d3a2167f5b8e8320,1366056669-5d6d46ffaf959610d3a2167f5b8e8320,1366057000.0


None
nuggets_generator topic_id: 15
Loading nugget_df


  0%|          | 0/629 [00:00<?, ?it/s]

Nugget entries were generated for 629 nuggets. There were 0 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
8 out of 629 streamids had out of bounds sent_ids
nugget_df entries: 629
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_15.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,15.0,40.0,64.0,2.0,58.0,deadly riots between fans after a Port Said fo...,More than 20 people have been killed in footba...,47.0,37f4da4a9ae7f016224688b0adb75601,1328123760-37f4da4a9ae7f016224688b0adb75601,1328124000.0
1,15.0,0.0,36.0,3.0,12.0,scores dead,More than 20 people have been killed in footba...,47.0,37f4da4a9ae7f016224688b0adb75601,1328123760-37f4da4a9ae7f016224688b0adb75601,1328124000.0
2,15.0,12.0,36.0,2.0,58.0,deadly riots between fans after a Port Said fo...,35 killed in Egypt football violence,2.0,e671f9481c8a0830b8f24a501331d965,1328124600-e671f9481c8a0830b8f24a501331d965,1328125000.0
3,15.0,0.0,9.0,3.0,12.0,scores dead,35 killed in Egypt football violence,2.0,e671f9481c8a0830b8f24a501331d965,1328124600-e671f9481c8a0830b8f24a501331d965,1328125000.0


None
nuggets_generator topic_id: 16
Loading nugget_df


  0%|          | 0/1392 [00:00<?, ?it/s]

Nugget entries were generated for 1377 nuggets. There were 14 found in matches.tsv but not in corpus
There were 1 nugget_ids found in matches.tsv but not in nuggets.tsv
37 out of 1377 streamids had out of bounds sent_ids
nugget_df entries: 1377
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_16.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,16.0,7.0,38.0,3.0,46.0,Taliban claimed responsibility for the attack,\nTalk of peace stirs up,,485cf8540d88564859e5d5065a5addd7,1329798360-485cf8540d88564859e5d5065a5addd7,1329798000.0
1,16.0,7.0,14.0,3.0,7.0,Taliban,\nTalk of peace stirs up,,485cf8540d88564859e5d5065a5addd7,1329798360-485cf8540d88564859e5d5065a5addd7,1329798000.0
2,16.0,8.0,63.0,3.0,25.0,Afghanistan protests 2012,Similar protests have in the past turned viole...,28.0,fcd88c657ca963ece0e939f51c97e154,1329810600-fcd88c657ca963ece0e939f51c97e154,1329811000.0
3,16.0,8.0,16.0,3.0,11.0,Many deaths,Similar protests have in the past turned viole...,28.0,fcd88c657ca963ece0e939f51c97e154,1329810600-fcd88c657ca963ece0e939f51c97e154,1329811000.0


None
nuggets_generator topic_id: 17


  0%|          | 0/1879 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 1847 nuggets. There were 32 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
89 out of 1847 streamids had out of bounds sent_ids
nugget_df entries: 1847
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_17.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,17.0,34.0,89.0,1.0,39.0,Islamist militants attacked a gas plant,The Algerian news agency APS said Islamist mil...,103.0,ebc6b6151c055ee0d627f6ea6eb36219,1358339847-ebc6b6151c055ee0d627f6ea6eb36219,1358340000.0
1,17.0,108.0,166.0,1.0,86.0,"in far eastern Algeria, in the Sahara Desert a...",The Algerian news agency APS said Islamist mil...,103.0,ebc6b6151c055ee0d627f6ea6eb36219,1358339847-ebc6b6151c055ee0d627f6ea6eb36219,1358340000.0
2,17.0,173.0,249.0,1.0,103.0,BP operates the gas field jointly with Algeria...,The Algerian news agency APS said Islamist mil...,103.0,ebc6b6151c055ee0d627f6ea6eb36219,1358339847-ebc6b6151c055ee0d627f6ea6eb36219,1358340000.0
3,17.0,34.0,45.0,1.0,25.0,Wednesday 16 January 2013,The incident had begun before dawn Wednesday a...,109.0,ebc6b6151c055ee0d627f6ea6eb36219,1358339847-ebc6b6151c055ee0d627f6ea6eb36219,1358340000.0


None
nuggets_generator topic_id: 18
Loading nugget_df


  0%|          | 0/674 [00:00<?, ?it/s]

Nugget entries were generated for 673 nuggets. There were 1 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
17 out of 673 streamids had out of bounds sent_ids
nugget_df entries: 673
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_18.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,18.0,56.0,148.0,1.0,92.0,"Election irregularities include vote fraud, ob...",`` The pressure on Golos and its leaders -LRB-...,68.0,52aa6675ad83cb453171075246fe3c53,1322957340-52aa6675ad83cb453171075246fe3c53,1322957000.0
1,18.0,0.0,99.0,1.0,24.0,Protests began in Moscow,Moscow neighbourhoods were dotted with United ...,70.0,52aa6675ad83cb453171075246fe3c53,1322957340-52aa6675ad83cb453171075246fe3c53,1322957000.0
2,18.0,0.0,61.0,2.0,41.0,"Putin won election, (United Russia Party)",Putin won his third presidential term in Sunda...,45.0,64cdc74f2b792a926480b2721ee963cf,1322963580-64cdc74f2b792a926480b2721ee963cf,1322964000.0
3,18.0,0.0,61.0,2.0,41.0,United Russia lost about 20% of its seats,Putin won his third presidential term in Sunda...,45.0,64cdc74f2b792a926480b2721ee963cf,1322963580-64cdc74f2b792a926480b2721ee963cf,1322964000.0


None
nuggets_generator topic_id: 19
Loading nugget_df


  0%|          | 0/541 [00:04<?, ?it/s]

Nugget entries were generated for 530 nuggets. There were 11 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
24 out of 530 streamids had out of bounds sent_ids
nugget_df entries: 530
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_19.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,19.0,17.0,58.0,2.0,42.0,caused by general discontent with policies,Romania Protests Bring Long - Brewing Anger to...,47.0,7da8766ce9363f85fa1aa2dc1738a28f,1326485220-7da8766ce9363f85fa1aa2dc1738a28f,1326485000.0
1,19.0,108.0,118.0,1.0,9.0,Timișoara,"Over the past three days , protesters held dem...",51.0,ae4418db5825d047d400caa51011b75f,1326593040-ae4418db5825d047d400caa51011b75f,1326593000.0
2,19.0,101.0,106.0,1.0,11.0,Cluj-Napoca,"Over the past three days , protesters held dem...",51.0,ae4418db5825d047d400caa51011b75f,1326593040-ae4418db5825d047d400caa51011b75f,1326593000.0
3,19.0,12.0,52.0,3.0,31.0,protests against health reforms,Big protest against Romania cuts Romanian heal...,36.0,23c03e5146a082aa43661443ba8e07ab,1326594600-23c03e5146a082aa43661443ba8e07ab,1326595000.0


None
nuggets_generator topic_id: 20


  0%|          | 0/682 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 682 nuggets. There were 0 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
30 out of 682 streamids had out of bounds sent_ids
nugget_df entries: 682
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_20.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,20.0,0.0,102.0,1.0,28.0,Muslim Brotherhood pro-Morsi,"The leadership of the Muslim Brotherhood , Mor...",122.0,f73714fbe8e2d23a9959bd6332217c97,1353233193-f73714fbe8e2d23a9959bd6332217c97,1353233000.0
1,20.0,33.0,112.0,1.0,28.0,Muslim Brotherhood pro-Morsi,"Without an economic resurgence , the Muslim Br...",134.0,f73714fbe8e2d23a9959bd6332217c97,1353233193-f73714fbe8e2d23a9959bd6332217c97,1353233000.0
2,20.0,47.0,93.0,1.0,79.0,demonstrations against Egyptian president Moha...,"In Islamist -led Egypt , Coptic Christians ......",93.0,4df2b8fe53bba45c76cabf99e34aa309,1353247080-4df2b8fe53bba45c76cabf99e34aa309,1353247000.0
3,20.0,69.0,106.0,1.0,51.0,anti-Morsi demonstrators clash w pro-Morsi in ...,It took days before an angry phone call from P...,257.0,1b6bd28ad01f26e366b19e9a387c9f6d,1353274998-1b6bd28ad01f26e366b19e9a387c9f6d,1353275000.0


None
nuggets_generator topic_id: 21


  0%|          | 0/1872 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 1872 nuggets. There were 0 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
47 out of 1872 streamids had out of bounds sent_ids
nugget_df entries: 1872
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_21.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,21.0,0.0,27.0,2.0,68.0,coincided with the close approach to earth of ...,asteroid 2012 DA14 asteroid impact asteroid fl...,36.0,87447f68d57ceacc74a8901da19facd3,1360900704-87447f68d57ceacc74a8901da19facd3,1360901000.0
1,21.0,152.0,170.0,2.0,58.0,shattered window glass result of explosion an...,What such an impact would do would create a bl...,45.0,87447f68d57ceacc74a8901da19facd3,1360900704-87447f68d57ceacc74a8901da19facd3,1360901000.0
2,21.0,0.0,51.0,2.0,39.0,blast capacity of - 0.1 to 10 kilotons,It 's blast was estimated in the 3-20 megaton ...,49.0,87447f68d57ceacc74a8901da19facd3,1360900704-87447f68d57ceacc74a8901da19facd3,1360901000.0
3,21.0,0.0,102.0,3.0,102.0,Chelyabinsk meteor is the largest object to hi...,"The asteroid will , however , set a record for...",60.0,87447f68d57ceacc74a8901da19facd3,1360900704-87447f68d57ceacc74a8901da19facd3,1360901000.0


None
nuggets_generator topic_id: 22


  0%|          | 0/362 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 350 nuggets. There were 12 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
8 out of 350 streamids had out of bounds sent_ids
nugget_df entries: 350
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_22.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,22.0,0.0,30.0,3.0,35.0,protests against high utility costs,Bulgarians took to the streets in over 10 citi...,68.0,e4b956c9af8fae3c6e0fbb5c63f519ba,1360503654-e4b956c9af8fae3c6e0fbb5c63f519ba,1360504000.0
1,22.0,121.0,205.0,2.0,44.0,caused by abnormally high electricity prices,Bulgarians took to the streets in over 10 citi...,68.0,e4b956c9af8fae3c6e0fbb5c63f519ba,1360503654-e4b956c9af8fae3c6e0fbb5c63f519ba,1360504000.0
2,22.0,0.0,23.0,1.0,51.0,two EVN utility vehicles were set ablaze in Pl...,"Two EVN cars were burnt overnight , the compan...",71.0,e4b956c9af8fae3c6e0fbb5c63f519ba,1360503654-e4b956c9af8fae3c6e0fbb5c63f519ba,1360504000.0
3,22.0,55.0,119.0,1.0,134.0,Demonstrators in Sofia gathered in front of th...,Suspect Emerges in Bulgaria 's Latest Mobster ...,45.0,86e8d38f2f4e779f0188827fe7a64085,1360567482-86e8d38f2f4e779f0188827fe7a64085,1360567000.0


None
nuggets_generator topic_id: 23


  0%|          | 0/483 [00:00<?, ?it/s]

Loading nugget_df


  0%|          | 0/483 [00:03<?, ?it/s]

Nugget entries were generated for 483 nuggets. There were 0 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
10 out of 483 streamids had out of bounds sent_ids
nugget_df entries: 483
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_23.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,23.0,0.0,62.0,3.0,142.0,The Bangladesh International Crimes Tribunal s...,Bangladesh Islamist sentenced to life in priso...,50.0,9ebba0cbe56d529930e7cab21c6b854c,1360058630-9ebba0cbe56d529930e7cab21c6b854c,1360059000.0
1,23.0,0.0,62.0,3.0,142.0,The Bangladesh International Crimes Tribunal s...,Bangladesh Islamist sentenced to life in priso...,50.0,9ebba0cbe56d529930e7cab21c6b854c,1360059070-9ebba0cbe56d529930e7cab21c6b854c,1360059000.0
2,23.0,0.0,62.0,3.0,142.0,The Bangladesh International Crimes Tribunal s...,Bangladesh Islamist sentenced to life in priso...,50.0,9ebba0cbe56d529930e7cab21c6b854c,1360059329-9ebba0cbe56d529930e7cab21c6b854c,1360059000.0
3,23.0,0.0,152.0,2.0,63.0,Bangladesh Jamaat-e-Islami''' has called for H...,Bangladesh Islamist sentenced to life in priso...,11.0,2aede2b1ed47932217a67cf0916da6ca,1360061839-2aede2b1ed47932217a67cf0916da6ca,1360062000.0


None
nuggets_generator topic_id: 24


  0%|          | 0/1462 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 1456 nuggets. There were 0 found in matches.tsv but not in corpus
There were 6 nugget_ids found in matches.tsv but not in nuggets.tsv
94 out of 1456 streamids had out of bounds sent_ids
nugget_df entries: 1456
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_24.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,24.0,14.0,30.0,1.0,21.0,"'Winter Storm ""Nemo""'",Get ready for Nor'easter Nemo | New York City ...,1.0,840b4329478f021fe2d568300672782a,1360204178-840b4329478f021fe2d568300672782a,1360204000.0
1,24.0,33.0,41.0,3.0,8.0,New York,Get ready for Nor'easter Nemo | New York City ...,1.0,840b4329478f021fe2d568300672782a,1360204178-840b4329478f021fe2d568300672782a,1360204000.0
2,24.0,3.0,16.0,3.0,24.0,Nor'easter February 2013,"The nor'easter , named Nemo , is the 14th name...",14.0,840b4329478f021fe2d568300672782a,1360204178-840b4329478f021fe2d568300672782a,1360204000.0
3,24.0,23.0,27.0,1.0,21.0,"'Winter Storm ""Nemo""'","The nor'easter , named Nemo , is the 14th name...",14.0,840b4329478f021fe2d568300672782a,1360204178-840b4329478f021fe2d568300672782a,1360204000.0


None
nuggets_generator topic_id: 25


  0%|          | 0/1099 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 1093 nuggets. There were 6 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
59 out of 1093 streamids had out of bounds sent_ids
nugget_df entries: 1093
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_25.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,25.0,141.0,177.0,3.0,44.0,Monica Quan daughter of Randy Quan shot dead,City News Service reports Irvine Police Chief ...,91.0,7023c09647141dcb785949bcde4abeb5,1360208979-7023c09647141dcb785949bcde4abeb5,1360209000.0
1,25.0,181.0,209.0,3.0,47.0,"Monica Quan boyfriend, Keith Lawrence shot dead",City News Service reports Irvine Police Chief ...,91.0,7023c09647141dcb785949bcde4abeb5,1360208979-7023c09647141dcb785949bcde4abeb5,1360209000.0
2,25.0,141.0,177.0,3.0,44.0,Monica Quan daughter of Randy Quan shot dead,City News Service reports Irvine Police Chief ...,119.0,e8f58520f507d0711631a886ac16d78c,1360209181-e8f58520f507d0711631a886ac16d78c,1360209000.0
3,25.0,181.0,209.0,3.0,47.0,"Monica Quan boyfriend, Keith Lawrence shot dead",City News Service reports Irvine Police Chief ...,119.0,e8f58520f507d0711631a886ac16d78c,1360209181-e8f58520f507d0711631a886ac16d78c,1360209000.0


None
nuggets_generator topic_id: 26


  0%|          | 0/757 [00:00<?, ?it/s]

Loading nugget_df


  0%|          | 0/757 [00:04<?, ?it/s]

Nugget entries were generated for 743 nuggets. There were 5 found in matches.tsv but not in corpus
There were 9 nugget_ids found in matches.tsv but not in nuggets.tsv
8 out of 743 streamids had out of bounds sent_ids
nugget_df entries: 743
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_26.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,26.0,0.0,106.0,2.0,179.0,The London Fire Bridage said that they had res...,The London Fire Brigade said it had received m...,4.0,b150346d65cbcdc6d9179923a6926883,1358325066-b150346d65cbcdc6d9179923a6926883,1358325000.0
1,26.0,0.0,63.0,2.0,72.0,Two cars might also have been involved accordi...,Transport for London has conformed there has b...,7.0,b150346d65cbcdc6d9179923a6926883,1358325066-b150346d65cbcdc6d9179923a6926883,1358325000.0
2,26.0,0.0,64.0,3.0,87.0,A AW109 helicopter crashed into a construction...,A helicopter has crashed into a crane in Vauxh...,61.0,798f28578d4274f1110e73636819794e,1358325327-798f28578d4274f1110e73636819794e,1358325000.0
3,26.0,66.0,108.0,2.0,179.0,The London Fire Bridage said that they had res...,A helicopter has crashed into a crane in Vauxh...,61.0,798f28578d4274f1110e73636819794e,1358325327-798f28578d4274f1110e73636819794e,1358325000.0


None
nuggets_generator topic_id: 27
Loading nugget_df


  0%|          | 0/726 [00:03<?, ?it/s]

Nugget entries were generated for 713 nuggets. There were 13 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
11 out of 713 streamids had out of bounds sent_ids
nugget_df entries: 713
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_27.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,27.0,0.0,93.0,1.0,181.0,movements: 500 km on south-southeast of Chenna...,The IMD suggests landfall over an area extendi...,17.0,8a70830e9a18936bb642c4ff09266811,1351529296-8a70830e9a18936bb642c4ff09266811,1351529000.0
1,27.0,7.0,156.0,1.0,181.0,movements: 500 km on south-southeast of Chenna...,"But , according to global forecasts , the cycl...",18.0,8a70830e9a18936bb642c4ff09266811,1351529296-8a70830e9a18936bb642c4ff09266811,1351529000.0
2,27.0,0.0,148.0,1.0,306.0,India Meteorological Department's Regional Spe...,An IMD warning said heavy to very heavy rainfa...,21.0,8a70830e9a18936bb642c4ff09266811,1351529296-8a70830e9a18936bb642c4ff09266811,1351529000.0
3,27.0,0.0,44.0,2.0,60.0,strong Cyclonic Storm with peak winds of 45 kn...,Squally winds with speed reaching 45-55 km / h...,22.0,8a70830e9a18936bb642c4ff09266811,1351529296-8a70830e9a18936bb642c4ff09266811,1351529000.0


None
nuggets_generator topic_id: 28
Loading nugget_df


  0%|          | 0/2976 [00:00<?, ?it/s]

Nugget entries were generated for 1785 nuggets. There were 0 found in matches.tsv but not in corpus
There were 1191 nugget_ids found in matches.tsv but not in nuggets.tsv
49 out of 1785 streamids had out of bounds sent_ids
nugget_df entries: 1785
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_28.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,28.0,0.0,24.0,2.0,17.0,Rising death toll,Many people may be dead after building collaps...,1.0,17669feb4060d66fa0aae89ac1d4b917,1366779366-17669feb4060d66fa0aae89ac1d4b917,1366779000.0
1,28.0,140.0,153.0,3.0,27.0,"On Wednesday, 24 April 2013",At least three people were killed and many mor...,32.0,17669feb4060d66fa0aae89ac1d4b917,1366779366-17669feb4060d66fa0aae89ac1d4b917,1366779000.0
2,28.0,86.0,140.0,3.0,30.0,Eight story building collapsed,At least three people were killed and many mor...,32.0,17669feb4060d66fa0aae89ac1d4b917,1366779366-17669feb4060d66fa0aae89ac1d4b917,1366779000.0
3,28.0,16.0,49.0,2.0,17.0,Rising death toll,At least three people were killed and many mor...,32.0,17669feb4060d66fa0aae89ac1d4b917,1366779366-17669feb4060d66fa0aae89ac1d4b917,1366779000.0


None
nuggets_generator topic_id: 29


  0%|          | 0/733 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 715 nuggets. There were 2 found in matches.tsv but not in corpus
There were 16 nugget_ids found in matches.tsv but not in nuggets.tsv
58 out of 715 streamids had out of bounds sent_ids
nugget_df entries: 715
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_29.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,29.0,26.0,90.0,3.0,96.0,"On 21 February 2013, Thursday evening two seri...",Hyderabad : Two bomb blast rocked the city of ...,26.0,4359a302b75669a41155b02620226f3f,1361455447-4359a302b75669a41155b02620226f3f,1361455000.0
1,29.0,169.0,224.0,3.0,50.0,The simultaneous blastsoccurred near the bus s...,Hyderabad : Two bomb blast rocked the city of ...,26.0,4359a302b75669a41155b02620226f3f,1361455447-4359a302b75669a41155b02620226f3f,1361455000.0
2,29.0,0.0,27.0,2.0,31.0,there were two loud explosions.,"Twin blasts rock Hyderabad , six killed",9.0,04dc68ccd3dde2dd4f1aac6f2895dc2c,1361456092-04dc68ccd3dde2dd4f1aac6f2895dc2c,1361456000.0
3,29.0,0.0,67.0,3.0,42.0,The blasts occurred in Dilsukh nagar area,Two explosions took place opposite a movie the...,12.0,137442f08125ec446db2173dd3c96156,1361456379-137442f08125ec446db2173dd3c96156,1361456000.0


None
nuggets_generator topic_id: 30
Loading nugget_df


  0%|          | 0/2171 [00:00<?, ?it/s]

Nugget entries were generated for 1739 nuggets. There were 43 found in matches.tsv but not in corpus
There were 389 nugget_ids found in matches.tsv but not in nuggets.tsv
17 out of 1739 streamids had out of bounds sent_ids
nugget_df entries: 1739
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_30.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,30.0,11.0,19.0,2.0,7.0,March 4,"KINSHASA , March 4 -LRB- Reuters -RRB- - Three...",18.0,232cef2c3298c611d377615271582544,1330847640-232cef2c3298c611d377615271582544,1330848000.0
1,30.0,57.0,68.0,2.0,11.0,Brazzaville,"KINSHASA , March 4 -LRB- Reuters -RRB- - Three...",18.0,232cef2c3298c611d377615271582544,1330847640-232cef2c3298c611d377615271582544,1330848000.0
2,30.0,148.0,163.0,1.0,36.0,Early Sunday around 08:00 local time,"KINSHASA , March 4 -LRB- Reuters -RRB- - Three...",18.0,232cef2c3298c611d377615271582544,1330847640-232cef2c3298c611d377615271582544,1330848000.0
3,30.0,33.0,49.0,2.0,48.0,series of explosions continued for several hours,"KINSHASA , March 4 -LRB- Reuters -RRB- - Three...",18.0,232cef2c3298c611d377615271582544,1330847640-232cef2c3298c611d377615271582544,1330848000.0


  0%|          | 0/860 [00:00<?, ?it/s]

None
nuggets_generator topic_id: 31
Loading nugget_df


  0%|          | 0/860 [00:05<?, ?it/s]

Nugget entries were generated for 856 nuggets. There were 4 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
21 out of 856 streamids had out of bounds sent_ids
nugget_df entries: 856
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_31.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,31.0,21.0,81.0,3.0,48.0,2012 power grid failure causes massive blackouts,-LRB- AP -RRB- a A massive power outage plunge...,52.0,3f20a097132d812466f410c2e554cc4d,1343623022-3f20a097132d812466f410c2e554cc4d,1343623000.0
1,31.0,85.0,128.0,2.0,63.0,"Railways, hospitals, and airports were shut do...",-LRB- AP -RRB- a A massive power outage plunge...,52.0,3f20a097132d812466f410c2e554cc4d,1343623022-3f20a097132d812466f410c2e554cc4d,1343623000.0
2,31.0,86.0,127.0,2.0,104.0,Failure in the northern and eastern power grid...,-LRB- AP -RRB- a A massive power outage plunge...,52.0,3f20a097132d812466f410c2e554cc4d,1343623022-3f20a097132d812466f410c2e554cc4d,1343623000.0
3,31.0,96.0,107.0,3.0,48.0,2012 power grid failure causes massive blackouts,Trains across eight northern Indian states and...,53.0,3f20a097132d812466f410c2e554cc4d,1343623022-3f20a097132d812466f410c2e554cc4d,1343623000.0


None
nuggets_generator topic_id: 32


  0%|          | 0/1101 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 1095 nuggets. There were 6 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
25 out of 1095 streamids had out of bounds sent_ids
nugget_df entries: 1095
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_32.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,32.0,24.0,233.0,3.0,100.0,"Cairo, Egypt, the mob scaled the embassy wall ...",The incident followed a protest in neighboring...,64.0,cd44437a4f55dc3a83014e6b55a7d80e,1347403871-cd44437a4f55dc3a83014e6b55a7d80e,1347404000.0
1,32.0,12.0,55.0,3.0,84.0,fierce clashes between the Libyan army and an ...,He said the clashes were outside the consulate...,68.0,cd44437a4f55dc3a83014e6b55a7d80e,1347403871-cd44437a4f55dc3a83014e6b55a7d80e,1347404000.0
2,32.0,163.0,266.0,3.0,100.0,"Cairo, Egypt, the mob scaled the embassy wall ...",Angered by reports in the Egyptian media that ...,11.0,5ce94baf70aa0d7ce8513dbd44bb03ad,1347407596-5ce94baf70aa0d7ce8513dbd44bb03ad,1347408000.0
3,32.0,26.0,162.0,3.0,341.0,"Nakoula Basseley Nakoula, a Coptic Christian i...",Angered by reports in the Egyptian media that ...,11.0,5ce94baf70aa0d7ce8513dbd44bb03ad,1347407596-5ce94baf70aa0d7ce8513dbd44bb03ad,1347408000.0


None
nuggets_generator topic_id: 33


  0%|          | 0/797 [00:00<?, ?it/s]

Loading nugget_df


  0%|          | 0/797 [00:03<?, ?it/s]

Nugget entries were generated for 765 nuggets. There were 2 found in matches.tsv but not in corpus
There were 30 nugget_ids found in matches.tsv but not in nuggets.tsv
12 out of 765 streamids had out of bounds sent_ids
nugget_df entries: 765
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_33.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,33.0,0.0,42.0,3.0,44.0,fought between Mali and an army of Islamists,Troops and Islamists clash in central Mali,51.0,d3b3ac8ef71071fb190c551ea74591f4,1357778932-d3b3ac8ef71071fb190c551ea74591f4,1357779000.0
1,33.0,1.0,65.0,3.0,44.0,fought between Mali and an army of Islamists,Malian troops exchanged fire Wednesday with ar...,67.0,d3b3ac8ef71071fb190c551ea74591f4,1357778932-d3b3ac8ef71071fb190c551ea74591f4,1357779000.0
2,33.0,103.0,197.0,3.0,37.0,Mali successfully recaptured the town,"The latest clashes , which a resident said inc...",68.0,d3b3ac8ef71071fb190c551ea74591f4,1357778932-d3b3ac8ef71071fb190c551ea74591f4,1357779000.0
3,33.0,4.0,139.0,2.0,63.0,importance of Sévaré military airport and Mal...,The Malian military has a command post near Ko...,76.0,d3b3ac8ef71071fb190c551ea74591f4,1357778932-d3b3ac8ef71071fb190c551ea74591f4,1357779000.0


None
nuggets_generator topic_id: 34


  0%|          | 0/911 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 909 nuggets. There were 2 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
7 out of 909 streamids had out of bounds sent_ids
nugget_df entries: 909
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_34.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,34.0,0.0,20.0,3.0,31.0,"bomb attack in Quetta, Pakistan","Bomb blast in Quetta kills 20 , injures more t...",88.0,85004502202cc98d30fe122923403275,1361023384-85004502202cc98d30fe122923403275,1361023000.0
1,34.0,21.0,30.0,3.0,25.0,At least 84 people killed,"Bomb blast in Quetta kills 20 , injures more t...",88.0,85004502202cc98d30fe122923403275,1361023384-85004502202cc98d30fe122923403275,1361023000.0
2,34.0,32.0,68.0,3.0,11.0,190 injured,"Bomb blast in Quetta kills 20 , injures more t...",88.0,85004502202cc98d30fe122923403275,1361023384-85004502202cc98d30fe122923403275,1361023000.0
3,34.0,20.0,44.0,3.0,31.0,"bomb attack in Quetta, Pakistan","QUETTA , Pakistan : A remote-controlled bomb t...",102.0,4486b1466923bcdfd341f15024c60ba8,1361023820-4486b1466923bcdfd341f15024c60ba8,1361024000.0


None
nuggets_generator topic_id: 35


  0%|          | 0/274 [00:00<?, ?it/s]

Loading nugget_df


  0%|          | 0/274 [00:00<?, ?it/s]

Nugget entries were generated for 260 nuggets. There were 14 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
1 out of 260 streamids had out of bounds sent_ids
nugget_df entries: 260
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_35.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,35.0,33.0,59.0,3.0,75.0,At least 31 people killed and 200 wounded duri...,10 Years Later : Dozens Killed in New Wave of ...,487.0,50b0b40d2e56f0e1201fb3055acf15ec,1365989198-50b0b40d2e56f0e1201fb3055acf15ec,1365989000.0
1,35.0,1.0,75.0,1.0,129.0,"In Fallujah, a suicide car bomber killed 2 pol...","In Falluja , a bomb killed two security office...",20.0,6ea77967dbf26dbf72a1d9907c5cf6ca,1365996343-6ea77967dbf26dbf72a1d9907c5cf6ca,1365996000.0
2,35.0,0.0,48.0,1.0,48.0,Fourteen election candidates have been murdered.,Political Candidates Among 22 Killed Across Iraq,26.0,f8a4b9808eed26023d30a65bdbf046a9,1365997620-f8a4b9808eed26023d30a65bdbf046a9,1365998000.0
3,35.0,0.0,49.0,3.0,75.0,At least 31 people killed and 200 wounded duri...,"Bombs hit cities across Iraq , at least five d...",1.0,f753b0ce10566863739d835c04c72e13,1366006358-f753b0ce10566863739d835c04c72e13,1366006000.0


None
nuggets_generator topic_id: 36
Loading nugget_df


  0%|          | 0/1517 [00:00<?, ?it/s]

Nugget entries were generated for 1479 nuggets. There were 0 found in matches.tsv but not in corpus
There were 38 nugget_ids found in matches.tsv but not in nuggets.tsv
28 out of 1479 streamids had out of bounds sent_ids
nugget_df entries: 1479
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_36.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,36.0,0.0,22.0,3.0,32.0,10th anniversary of the Iraq War,"Iraq , a decade later -",1.0,a8ff58d58c163501336dca5aeac60752,1363654310-a8ff58d58c163501336dca5aeac60752,1363654000.0
1,36.0,29.0,72.0,3.0,32.0,10th anniversary of the Iraq War,In the final weeks before the long-planned Mar...,80.0,a8ff58d58c163501336dca5aeac60752,1363654310-a8ff58d58c163501336dca5aeac60752,1363654000.0
2,36.0,0.0,86.0,1.0,42.0,Increased violence and instability in Iraq,"Iraq continues to be plagued by car bombings ,...",16.0,0bc112ce0d82ea65e4355cf14bfd01f1,1363655628-0bc112ce0d82ea65e4355cf14bfd01f1,1363656000.0
3,36.0,0.0,39.0,3.0,32.0,10th anniversary of the Iraq War,"Ten years after the invasion of Iraq , the BBC...",41.0,e18877ca6b3a4e429cc09eb90558848f,1363663083-e18877ca6b3a4e429cc09eb90558848f,1363663000.0


None
nuggets_generator topic_id: 37


  0%|          | 0/831 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 830 nuggets. There were 1 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
15 out of 830 streamids had out of bounds sent_ids
nugget_df entries: 830
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_37.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,37.0,0.0,51.0,3.0,31.0,Suspect in arson fires arrested,Suspect Arrested In Slew Of Los Angeles Arson ...,36.0,98a85dee6e1384b3d427fa532e977a84,1325165340-98a85dee6e1384b3d427fa532e977a84,1325165000.0
1,37.0,0.0,50.0,3.0,73.0,There is currently one person of interest who ...,LA man booked for investigation of attempted a...,30.0,37543b134bb69905218527210ec9956d,1325169840-37543b134bb69905218527210ec9956d,1325170000.0
2,37.0,27.0,65.0,1.0,94.0,There has been at least 1 arrest made of a Ger...,LOS ANGELES - Los Angeles authorities say a ma...,33.0,37543b134bb69905218527210ec9956d,1325169840-37543b134bb69905218527210ec9956d,1325170000.0
3,37.0,63.0,153.0,2.0,83.0,"Samuel Arrington,22 a Sunland resident, arrest...",Los Angeles police Lt. Marc Reina tells City N...,35.0,37543b134bb69905218527210ec9956d,1325169840-37543b134bb69905218527210ec9956d,1325170000.0


None
nuggets_generator topic_id: 38
Loading nugget_df


  0%|          | 0/1589 [00:00<?, ?it/s]

Nugget entries were generated for 1585 nuggets. There were 1 found in matches.tsv but not in corpus
There were 3 nugget_ids found in matches.tsv but not in nuggets.tsv
20 out of 1585 streamids had out of bounds sent_ids
nugget_df entries: 1585
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_38.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,38.0,0.0,41.0,3.0,76.0,On 4 April 2013 a building collapsed in Thane ...,Seven -storey building collapses in Thane,1.0,df1238c30ee75473badf81a83f618695,1365084558-df1238c30ee75473badf81a83f618695,1365085000.0
1,38.0,6.0,77.0,3.0,76.0,On 4 April 2013 a building collapsed in Thane ...,"Tags : building collapse , Thane building coll...",26.0,df1238c30ee75473badf81a83f618695,1365084558-df1238c30ee75473badf81a83f618695,1365085000.0
2,38.0,37.0,85.0,3.0,76.0,On 4 April 2013 a building collapsed in Thane ...,The building was in the Mumbra area of Thane .,30.0,df1238c30ee75473badf81a83f618695,1365084558-df1238c30ee75473badf81a83f618695,1365085000.0
3,38.0,1.0,24.0,3.0,79.0,the building was still under construction and ...,It was a new building .,31.0,df1238c30ee75473badf81a83f618695,1365084558-df1238c30ee75473badf81a83f618695,1365085000.0


None
nuggets_generator topic_id: 39


  0%|          | 0/817 [00:00<?, ?it/s]

Loading nugget_df


  0%|          | 0/817 [00:02<?, ?it/s]

Nugget entries were generated for 815 nuggets. There were 2 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
6 out of 815 streamids had out of bounds sent_ids
nugget_df entries: 815
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_39.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,39.0,38.0,111.0,3.0,104.0,a security guardat the embassy's gate was kill...,"FALLUJA , Iraq -LRB- Reuters -RRB- - A suicide...",118.0,804a807934bf57efcaab141074fd9d0e,1359718598-804a807934bf57efcaab141074fd9d0e,1359719000.0
1,39.0,40.0,66.0,3.0,68.0,"1 February 2013, a bombing outside the U.S. em...",A suicide bomber has reportedly struck the US ...,51.0,9dfbd470f9afd4749917d1979bfeb122,1359718994-9dfbd470f9afd4749917d1979bfeb122,1359719000.0
2,39.0,2.0,16.0,1.0,26.0,Suicide bombings in Turkey,A suicide bomber has reportedly struck the US ...,51.0,9dfbd470f9afd4749917d1979bfeb122,1359718994-9dfbd470f9afd4749917d1979bfeb122,1359719000.0
3,39.0,0.0,44.0,3.0,68.0,"1 February 2013, a bombing outside the U.S. em...",Suicide bomber strikes US Embassy in Ankara - ...,39.0,cdfe156a8bba198ac442561ccd97f17f,1359718994-cdfe156a8bba198ac442561ccd97f17f,1359719000.0


None
nuggets_generator topic_id: 40
Loading nugget_df


  0%|          | 0/1746 [00:00<?, ?it/s]

Nugget entries were generated for 1487 nuggets. There were 86 found in matches.tsv but not in corpus
There were 173 nugget_ids found in matches.tsv but not in nuggets.tsv
39 out of 1487 streamids had out of bounds sent_ids
nugget_df entries: 1487
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_40.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,40.0,0.0,29.0,3.0,44.0,"Bombings took place throughout Baghdad, Iraq",Car -bomb goes off in Baghdad,117.0,9ab43bf1a5115418d7b20d9c0fcdd5b0,1324507500-9ab43bf1a5115418d7b20d9c0fcdd5b0,1324508000.0
1,40.0,0.0,19.0,2.0,9.0,Car bombs,Car -bomb goes off in Baghdad,117.0,9ab43bf1a5115418d7b20d9c0fcdd5b0,1324507500-9ab43bf1a5115418d7b20d9c0fcdd5b0,1324508000.0
2,40.0,46.0,55.0,3.0,19.0,On 22 December 2011,At least 63 people were killed and 185 wounded...,103.0,1a01776c13e91d8a562b21201e7b0034,1324510800-1a01776c13e91d8a562b21201e7b0034,1324511000.0
3,40.0,82.0,109.0,3.0,44.0,"Bombings took place throughout Baghdad, Iraq",At least 63 people were killed and 185 wounded...,103.0,1a01776c13e91d8a562b21201e7b0034,1324510800-1a01776c13e91d8a562b21201e7b0034,1324511000.0


None
nuggets_generator topic_id: 41


  0%|          | 0/1471 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 1174 nuggets. There were 0 found in matches.tsv but not in corpus
There were 297 nugget_ids found in matches.tsv but not in nuggets.tsv
22 out of 1174 streamids had out of bounds sent_ids
nugget_df entries: 1174
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_41.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,41.0,0.0,39.0,3.0,26.0,Aleppo University bombings,Explosion at Syria 's Aleppo university :,1.0,2e084de7936646bb338a5be69fbf2220,1358253519-2e084de7936646bb338a5be69fbf2220,1358254000.0
1,41.0,0.0,44.0,3.0,26.0,Aleppo University bombings,An explosion rocked the University of Aleppo i...,55.0,2e084de7936646bb338a5be69fbf2220,1358253519-2e084de7936646bb338a5be69fbf2220,1358254000.0
2,41.0,38.0,53.0,2.0,13.0,"Aleppo, Syria",An explosion rocked the University of Aleppo i...,55.0,2e084de7936646bb338a5be69fbf2220,1358253519-2e084de7936646bb338a5be69fbf2220,1358254000.0
3,41.0,23.0,62.0,3.0,26.0,Aleppo University bombings,Other Middle East News Explosion at Syria 's A...,75.0,2e084de7936646bb338a5be69fbf2220,1358253519-2e084de7936646bb338a5be69fbf2220,1358254000.0


None
nuggets_generator topic_id: 42


  0%|          | 0/2047 [00:00<?, ?it/s]

Loading nugget_df
Nugget entries were generated for 1748 nuggets. There were 2 found in matches.tsv but not in corpus
There were 297 nugget_ids found in matches.tsv but not in nuggets.tsv
58 out of 1748 streamids had out of bounds sent_ids
nugget_df entries: 1748
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_42.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,42.0,0.0,80.0,3.0,28.0,Carnival Triumph engine fire,Carnival Cruise officials said The Carnival Tr...,30.0,949e43472d46dfaff277abff027f7011,1360525192-949e43472d46dfaff277abff027f7011,1360525000.0
1,42.0,82.0,108.0,3.0,35.0,Fire was automatically extinguished,Carnival Cruise officials said The Carnival Tr...,30.0,949e43472d46dfaff277abff027f7011,1360525192-949e43472d46dfaff277abff027f7011,1360525000.0
2,42.0,0.0,37.0,3.0,28.0,Carnival Triumph engine fire,"Fire reported on Carnival cruise ship , no inj...",1.0,53400e30d323240496bc3a47770363cf,1360526616-53400e30d323240496bc3a47770363cf,1360527000.0
3,42.0,12.0,37.0,3.0,28.0,Carnival Triumph engine fire,GALVESTON -- A fire on a cruise ship has cause...,33.0,53400e30d323240496bc3a47770363cf,1360526616-53400e30d323240496bc3a47770363cf,1360527000.0


None
nuggets_generator topic_id: 43


  0%|          | 0/338 [00:00<?, ?it/s]

Loading nugget_df


  0%|          | 0/338 [00:01<?, ?it/s]

Nugget entries were generated for 338 nuggets. There were 0 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
5 out of 338 streamids had out of bounds sent_ids
nugget_df entries: 338
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_43.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,43.0,0.0,90.0,3.0,243.0,"USS Guardian (MCM-5), based in Sasebo, souther...",A U.S. Navy minesweeper has run aground on a r...,13.0,f7c5b3b4f70b33da067738787e26f23c,1358397815-f7c5b3b4f70b33da067738787e26f23c,1358398000.0
1,43.0,0.0,36.0,2.0,88.0,Ar first only its bow was on the reef but now ...,The ship remains stuck on the reef .,14.0,f7c5b3b4f70b33da067738787e26f23c,1358397815-f7c5b3b4f70b33da067738787e26f23c,1358398000.0
2,43.0,71.0,102.0,2.0,97.0,"No evidence that fuel oil is leaking. 15,000 g...",No injuries were reported among the ship 's cr...,15.0,f7c5b3b4f70b33da067738787e26f23c,1358397815-f7c5b3b4f70b33da067738787e26f23c,1358398000.0
3,43.0,25.0,114.0,3.0,243.0,"USS Guardian (MCM-5), based in Sasebo, souther...",A Navy statement said the USS Guardian -LRB- M...,16.0,f7c5b3b4f70b33da067738787e26f23c,1358397815-f7c5b3b4f70b33da067738787e26f23c,1358398000.0


None
nuggets_generator topic_id: 44
Loading nugget_df


  0%|          | 0/1954 [00:00<?, ?it/s]

Nugget entries were generated for 1948 nuggets. There were 6 found in matches.tsv but not in corpus
There were 0 nugget_ids found in matches.tsv but not in nuggets.tsv
8 out of 1948 streamids had out of bounds sent_ids
nugget_df entries: 1948
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_44.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,44.0,155.0,171.0,1.0,32.0,magnitude of 8.5/8.6 earthquake,The quake struck about 434 kilometers -LRB- 27...,766.0,2eedf2b19a878a73a065f36911d4d58f,1334113860-2eedf2b19a878a73a065f36911d4d58f,1334114000.0
1,44.0,0.0,127.0,1.0,54.0,2012 earthquake occurred around 500km SW of Ba...,The quake struck about 434 kilometers -LRB- 27...,766.0,2eedf2b19a878a73a065f36911d4d58f,1334113860-2eedf2b19a878a73a065f36911d4d58f,1334114000.0
2,44.0,0.0,131.0,1.0,17.0,no serious damage,Indonesian President Susilo Bambang Yudhoyono ...,772.0,2eedf2b19a878a73a065f36911d4d58f,1334113860-2eedf2b19a878a73a065f36911d4d58f,1334114000.0
3,44.0,106.0,118.0,1.0,27.0,"On Wednesday, April 11 2012","`` Banda Aceh , Indonesia - Two massive earthq...",24.0,a036899e4dc58fb98630316e897fa50d,1334130383-a036899e4dc58fb98630316e897fa50d,1334130000.0


None
nuggets_generator topic_id: 45
Loading nugget_df


  0%|          | 0/896 [00:00<?, ?it/s]

Nugget entries were generated for 580 nuggets. There were 3 found in matches.tsv but not in corpus
There were 313 nugget_ids found in matches.tsv but not in nuggets.tsv
13 out of 580 streamids had out of bounds sent_ids
nugget_df entries: 580
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_45.csv.gz


Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,45.0,0.0,36.0,1.0,49.0,"Earthquake occurred near the Haida Gwaii region,",Earthquake detected in Haida Gwaii -,1.0,01266833802b5425b2dea93714376c52,1351395139-01266833802b5425b2dea93714376c52,1351395000.0
1,45.0,0.0,34.0,1.0,49.0,"Earthquake occurred near the Haida Gwaii region,",Earthquake detected in Haida Gwaii Add to ...,34.0,01266833802b5425b2dea93714376c52,1351395139-01266833802b5425b2dea93714376c52,1351395000.0
2,45.0,29.0,48.0,3.0,97.0,Queen Charlotte Islands and Canada's coastal ...,A magnitude 7.7 earthquake hit off the B.C. co...,42.0,01266833802b5425b2dea93714376c52,1351395139-01266833802b5425b2dea93714376c52,1351395000.0
3,45.0,0.0,28.0,3.0,39.0,The earthquake had a magnitude of 7.7,A magnitude 7.7 earthquake hit off the B.C. co...,42.0,01266833802b5425b2dea93714376c52,1351395139-01266833802b5425b2dea93714376c52,1351395000.0


None
nuggets_generator topic_id: 46


  0%|          | 0/311 [00:00<?, ?it/s]

Loading nugget_df


  0%|          | 0/311 [00:01<?, ?it/s]

Nugget entries were generated for 304 nuggets. There were 2 found in matches.tsv but not in corpus
There were 5 nugget_ids found in matches.tsv but not in nuggets.tsv
5 out of 304 streamids had out of bounds sent_ids
nugget_df entries: 304
df loaded
saved at: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/nuggets/nuggets_46.csv.gz





Unnamed: 0,topic_id,match_start,match_end,importance,nugget_len,nugget_text,sent_in_text,sent_id,docid,streamid,epoch
0,46.0,24.0,69.0,2.0,100.0,The number of people taking part in the demons...,1. Barcelona braces for million-strong march f...,234.0,529c12d25c4bb1b9bf66f0e2e8f0ca26,1347352633-529c12d25c4bb1b9bf66f0e2e8f0ca26,1347353000.0
1,46.0,0.0,66.0,2.0,100.0,The number of people taking part in the demons...,Barcelona braces for million-strong march for ...,814.0,a69d3d2d776ded792f5f6985acaf398b,1347369485-a69d3d2d776ded792f5f6985acaf398b,1347369000.0
2,46.0,1.0,204.0,1.0,92.0,The financial crisis has strained the already ...,The lingering effects of the global financial ...,32.0,be52a6b7690622e7ffb5fc82928ae889,1347372452-be52a6b7690622e7ffb5fc82928ae889,1347372000.0
3,46.0,378.0,506.0,3.0,105.0,The 2010 demonstration supporting Catalan in...,"and now more than ever , '' Mr Mas , of the na...",42.0,c3ec41c3aec064d4214a63fd57ab3e27,1347380912-c3ec41c3aec064d4214a63fd57ab3e27,1347381000.0


None

Finished generating files


Unnamed: 0,corpus_name,file_purpose,split_identifier,num_splits,instance_identifier,file_type,path,exists
0,original-trects-kba2014-filtered,topics,,1,topics_df,.csv.gz,/nfs/proj-repo/AAARG-dissertation/dataset/orig...,True
1,original-trects-kba2014-filtered,corpus,0.0,2,1,.csv.gz,/nfs/proj-repo/AAARG-dissertation/dataset/orig...,True
2,original-trects-kba2014-filtered,corpus,200.0,2,1,.csv.gz,/nfs/proj-repo/AAARG-dissertation/dataset/orig...,True
3,original-trects-kba2014-filtered,corpus,0.0,2,2,.csv.gz,/nfs/proj-repo/AAARG-dissertation/dataset/orig...,True
4,original-trects-kba2014-filtered,corpus,200.0,2,2,.csv.gz,/nfs/proj-repo/AAARG-dissertation/dataset/orig...,True
5,original-trects-kba2014-filtered,corpus,0.0,2,3,.csv.gz,/nfs/proj-repo/AAARG-dissertation/dataset/orig...,True
6,original-trects-kba2014-filtered,corpus,200.0,2,3,.csv.gz,/nfs/proj-repo/AAARG-dissertation/dataset/orig...,True
7,original-trects-kba2014-filtered,corpus,0.0,2,4,.csv.gz,/nfs/proj-repo/AAARG-dissertation/dataset/orig...,True
8,original-trects-kba2014-filtered,corpus,200.0,2,4,.csv.gz,/nfs/proj-repo/AAARG-dissertation/dataset/orig...,True
9,original-trects-kba2014-filtered,corpus,0.0,2,5,.csv.gz,/nfs/proj-repo/AAARG-dissertation/dataset/orig...,True


None


In [None]:
# class CorpusLoader:
#     def __init__(self):
#         self.path_handler = FilePathHandler()
#         self.file_purposes = self.path_handler.file_purposes
        
        
#     def load(self, selection=None, corpus_names=None, new_corpuses=None, force_reload=False, save=True, 
#              create_only=False, verbose=True):
#         """
#         Parameters:
#             selection: the data to load (e.g. corpus/nuggets), if None then load options
#             corpus_names: list of corpus names to load
#             new_corpuses: list of dicts of paths with keys {"dir_path", "topics_path", "nuggets_path"}
#             force_reload: force rebuild corpus files from original files
#             save: save built corpus files
#             create_only: only generate missing corpus files, do not load into ram
#         """
#         """Put flags in here to control process too
#         Steps:
#         1. Load corpus from gz html files
#         2. Load topics from topics file
#         3. Create nuggets_df from nuggets file
#         4. Create embeddings from nuggets and corpus
#         """
#         # add new corpuses to load
#         if new_corpuses is not None:
#             for new_corpus in new_corpuses:
#                 self.path_handler.add_corpus(new_corpus, overwrite=False)
#         # get corpus paths to load from (if corpus_names is None loads all)
#         corpus_sources = self.path_handler.get_corpus_sources(corpus_names=corpus_names)
#         corpus_names = corpus_sources.keys()
        
#         # get paths for generated files in corpus
#         if selection is None:  # if None selection get all
#             selection = self.file_purposes
#         corpus_names_paths = {}
#         for corpus_name in corpus_names:
#             name_paths = self.path_handler.paths_in_corpus_name(corpus_name, selection=selection)
#             corpus_names_paths[corpus_name] = name_paths
        
#         # go through each selected corpus_name
#         for corpus_name, corpus_paths in corpus_names_paths.items():
#             # go through selected tasks
#             for select in selection:  # maybe add tqdm here?
#                 # for each identifier
#                 # may behave differently when no entries in paths_df
#                 for identifier, ident_paths in corpus_paths[select]:
#                     exists = ident_paths['exists']
#                     not_exists = ident_paths['not_exists']
#                     if exists is None or force_reload:  # no paths loaded
#                         # create all appropriate files
# #                         not_exists = self.path_handler.convert_relative_path(not_exists)
#                         self.select_load_func(select, paths=not_exists, save=save, 
#                                     force_reload=force_reload, verbose=verbose, create_only=create_only)
#                     else:
#                         if not_exists is None:  # only exists has paths
#                             if create_only:
#                                 # change this to something better
#                                 warnings.warn("There are no new files to create")
#                             else:
# #                                 exists = self.path_handler.convert_relative_path(exists)
#                                 self.select_load_func(select, paths=exists, save=save, 
#                                     force_reload=force_reload, verbose=verbose, create_only=create_only)
#                         else:  # both have paths
#                             # need method to load/generate incomplete missing parts
#                             # self.path_handler.convert_relative_paths(...) dont forget
    
#     def fix_partially_missing_paths(self, selection):
#         """Function to organise missing paths from partially-saved/generated dataset"""
#         if selection == "topics":
#             raise ValueError("There can only be one topics file for a corpus")
    
#     def select_load_func(self, selection, paths=None, save=True, force_reload=False, 
#                              verbose=True, create_only=False, **identifiers):
#         # ["topics", "corpus", "nuggets", "embed_labels", "updates"]
#         if paths is None:
#             # create paths appropriately, add to paths_df
#             # also check paths/behaviour is correct in each load_func/after
#             # also need to account for other identifiers
#         if selection == "topics":
#             # should only need to be one meta topics_df
#             topic_dfs = []
#             # need to add save paths into paths_df
#             topic_df = load_topics(paths[0], save=save, force_reload=force_reload, verbose=verbose)
                
#         elif selection == "corpus":
            
#         elif selection == "nuggets":
            
#         elif selection == "embed_labels":
            
#         elif selection == "updates":
        
        
        

In [123]:
# def test_updates_tsv():
#     def parse_update_id(update_id):
#         streamid = "-".join(update_id.split("-")[:-1])
#         return streamid
        
#     updates_path = "/nfs/TemporalSummarization/ts14/results/updates_sampled.tsv"
#     updates_df = pd.read_csv(updates_path, "\t")
#     test_corpus_path = "/nfs/proj-repo/AAARG-dissertation/l_d/corpus_loaded.csv.gz"
#     test_cdf = load_corpus(test_corpus_path, verbose=False)
#     test_cdf = test_cdf.drop_duplicates(subset=['streamid'])
    
#     print(display(updates_df[updates_df['update_id'].str.contains("1329908400-f65b681fc854df85fd15e53cbb988df8")]))
    
    
#     for index, row in tqdm_notebook(updates_df.iterrows()):
#         # find where in corpus streamid is
#         up_streamid = parse_update_id(row['update_id'])
# #         print(up_streamid)
#         up_sentid = row['sentence_id']
#         nug = test_cdf[test_cdf["streamid"] == up_streamid]
#         if len(nug) == 0:
#             # see if docid match
#             up_docid = row['doc_id']
#             nug = test_cdf[test_cdf['docid'] == up_docid]
# #             if len(nug) == 0:
# #                 print("no docid match")
# #             print("no streamid match")
#             continue
#         elif len(nug) > 1:
#             print("multiple streamid matches??")
#             print(display(nug))
# #             outstr = []
# #             for sid in nug['streamid']:
# #                 outstr.append(sid)
# #             print("streamids: " + ",".join(outstr))
#             continue
#         nug_text = str(nug['text'].iloc[0])
#         nug_text = nug_text.splitlines()
#         up_text = row['update_text']
# #         print("nug_text len: " + str(len(nug_text)))
#         for sent in nug_text:
#             if sent == up_text:
#                 print("streamid:" + up_streamid)
#                 print("Match:\n" + "sent: " + sent + "\n" + "up_text: " + up_text + "\n")
# #         nug_text = nug_text[up_sentid + 1]
# #         up_text = row['update_text']
# #         if up_text == nug_text:
# #             print("It's the same")
#     print("end function")
    
# test_updates_tsv()

In [170]:
# !python -m spacy download en_core_web_lg
# import en_core_web_lg

In [186]:
def check_matches_nuggets():
    def parse_update_id(update_id):
        update_id = update_id.split("-")
        sent_id = int(update_id[-1])
        streamid = "-".join(update_id[:-1])
        return streamid, sent_id
    matches_path = "/nfs/TemporalSummarization/ts13/results/matches.tsv"
    nuggets_path = "/nfs/TemporalSummarization/ts13/results/nuggets.tsv"
    test_corpus_path = "/nfs/proj-repo/AAARG-dissertation/l_d/corpus_loaded.csv.gz"
    matches_df = pd.read_csv(matches_path, "\t")
    nugget_df = pd.read_csv(nuggets_path, "\t")
    corpus_df = load_corpus(test_corpus_path, verbose=False)
    corpus_df = corpus_df.drop_duplicates(subset=['streamid'])
    
#     nlp = spacy.load("en_core_web_lg")  # spacy char sent matching
    nlp = en_core_web_lg.load()
    print("loaded spacy model")
    
    for index, row in matches_df.iterrows():
        streamid, sent_id = parse_update_id(row['update_id'])
#         sent_id += 1
        
        c_sid = corpus_df[corpus_df['streamid'] == streamid]
        
        if len(c_sid) == 0:
            continue
        
        c_sid_str = str(c_sid['text'].iloc[0])
        nug_text = str(nugget_df[nugget_df['nugget_id'] == row['nugget_id']].iloc[0]['nugget_text'])
        
        c_sid_text = c_sid_str.splitlines()
        if c_sid_text[0] == "":
            sent_id += 1
        match_start = row['match_start']
        match_end = row['match_end']
        bound_sent = c_sid_str[match_start:match_end + 1]  # char indexing, start:end
#         print("sent_id: " + str(sent_id))  # sent id from update_id
        targ_sent = None
        try:
            targ_sent = c_sid_text[sent_id]  # sent_id from update_id sentence
        except IndexError as e:
            print("index error, sent_id/len: " + str(sent_id) + " / " + str(len(c_sid_text)))
        contains = False
        if c_sid_str.find(nug_text) != -1:
            contains = True
        char_count = 0
        bound_target_sent = None  # char indexing, using only match_start and matching sentence lengths
        for senten in c_sid_text:
            s_len = len(senten)
            if char_count + s_len > match_start:
                bound_target_sent = senten
                break
            char_count += s_len
            
        spacy_text = nlp(c_sid_str)
        spacy_sents = list(spacy_text.sents)
        bound_targ_sent_spacy = None
        char_count = 0
        for s in spacy_sents:
            s = str(s)
            s_len = len(s)
            if char_count + s_len > match_start:
                bound_targ_sent_spacy = s
                break
            char_count += s_len
            
        print("nug_text: " + str(nug_text) + "\n")
        print("sent_id (" + str(sent_id) + "): " + str(targ_sent) + "\n")
        print("bound_sent: " + str(bound_sent) + "\n")
        print("bound_targ_sent: " + str(bound_target_sent) + "\n")
        print("bound_targ_sent_spacy: " + str(bound_targ_sent_spacy) + "\n")
        print("contains: " + str(contains) + "\n")
        print("---------------------------------------------------------------")

check_matches_nuggets()    

loaded spacy model
nug_text: 676+ injuries

sent_id (1): Buenos Aires Train Crash Kills 49, Injuries over 600.

bound_sent:  Injuries over 600.

bound_targ_sent: Buenos Aires Train Crash Kills 49, Injuries over 600.

bound_targ_sent_spacy: 
Buenos Aires Train Crash Kills 49, Injuries over 600.


contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id (3): "The train was full and the impact was 49 dead in Buenos Aires train crash: police.

bound_sent: is is a Smart Blog and Wr

bound_targ_sent: Argentina train crash in Buenos Aires kills 49 This is a Smart Blog and Write All About Home Improvement & Health Care By Michael Warren | AP BUENOS AIRES , Argentina ( AP) — A packed train slammed into the end of the line in Buenos Aires ' busy Once station today, killing 49 people and injuring hundreds of morning commuters as passenger cars crumpled behind the engine.

bound_targ_sent_spacy: This is a Smart B

nug_text: 49 confirmed deaths

sent_id (3): "The train was full and the impact was 49 dead in Buenos Aires train crash: police.

bound_sent: kills 49

bound_targ_sent: Argentina train crash in Buenos Aires kills 49 This is a Smart Blog and Write All About Home Improvement & Health Care By Michael Warren | AP BUENOS AIRES , Argentina ( AP) — A packed train slammed into the end of the line in Buenos Aires ' busy Once station today, killing 49 people and injuring hundreds of morning commuters as passenger cars crumpled behind the engine.

bound_targ_sent_spacy: 
Argentina train crash in Buenos Aires kills 49

contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id (3): Deborah Lutterbeck AP BUENOS AIRES — A train packed with morning commuters slammed into a barrier at the end of a line Wednesday, killing 49 people and injuring hundreds as passenger cars crumpled and windows exploded.

bound_sent:  in Bu

nug_text: the train crashed at the buffer stop

sent_id (3): Deborah Lutterbeck AP BUENOS AIRES — A train packed with morning commuters slammed into a barrier at the end of a line Wednesday, killing 49 people and injuring hundreds as passenger cars crumpled and windows exploded.

bound_sent: kills 49 The Daily News - Reading Everything Under The World Buenos Aires train c

bound_targ_sent: Argentina train crash in Buenos Aires kills 49 The Daily News - Reading Everything Under The World Buenos Aires train crash leaves at least 40 dead and over 500 more injured.

bound_targ_sent_spacy: 
Argentina train crash in Buenos Aires kills 49

contains: False

---------------------------------------------------------------
nug_text: Hundreds injured

sent_id (3): Deborah Lutterbeck AP BUENOS AIRES — A train packed with morning commuters slammed into a barrier at the end of a line Wednesday, killing 49 people and injuring hundreds as passenger cars crumpled and windows exploded.

bound_sent:  over

nug_text: train accident in Buenos Aires, Argentina.

sent_id (3): ( Xinhua/Telenoticiosa Americana ) At least one Chinese national was injured in a rush hour train crash that had killed 49 people and injured more than 600 in Argentina 's capital Buenos Aires on Wednesday, according to the Chinese Embassy .

bound_sent: ch Home China World Business Life Entertainment Sports Insight Photo Gallery Romney wins GOP caucus i

bound_targ_sent: At least 1 Chinese injured in Argentina train crash_ Top Stories -- China Economic Net Search Home China World Business Life Entertainment Sports Insight Photo Gallery Romney wins GOP caucus in Guam US , ROK , Japan to meet over talks with Pyongyang IMF chief to propose 28-bln - euro loan for Greece Greek debt swap plan participation rate up to 95.7 pct Greece gets 85.8% takeup in swap CDB extends loans to vegetable transportation China posts Feb. trade deficit as imports jump February auto sales rise 24.5% EU to keep carbon tax Lagarde to visit China 

nug_text: the train crashed at the buffer stop

sent_id (6): Transportation Secretary Juan Pablo Schiavi said the train entered the station at a speed of 20km/h ) and failed to stop, crashing into a retaining wall at the end of the track.

bound_sent: ry 23, 2012 | Filed underWorld | Posted by Video bueno

bound_targ_sent: Buenos Aires Train Crash, 49 killed and 600 injured Daily News Buenos Aires Train Crash , 49 killed and 600 injured February 23, 2012 | Filed underWorld | Posted by Video buenos aires train crash The first 2 cars were packed as usual for the morning rush, so tightly that people stood pressed flesh to flesh, sandwiched between bicycles and the few seats, many without so much as a strap to hold onto.

bound_targ_sent_spacy: 
Buenos Aires Train Crash, 49 killed and 600 injured Daily News Buenos Aires Train Crash , 49 killed and 600 injured February 23, 2012

contains: False

---------------------------------------------------------------
nug_text: 676+ injuries

sent_id

nug_text: train accident in Buenos Aires, Argentina.

sent_id (11): This latest accident is Argentina 's worst train crash since February 1970, when a train smashed into another at full speed in suburban Buenos Aires , killing 200 people.

bound_sent: ntina train crash in Buenos Aires - 49 killed Oh d

bound_targ_sent: Argentina train crash in Buenos Aires - 49 killed Oh dear, another crash disaster.

bound_targ_sent_spacy: 
Argentina train crash in Buenos Aires - 49 killed

contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id (2): This time it's a commuter train crash in Buenos Aires , Argentina .

bound_sent: Buenos Aires - 49 killed Oh dear, anothe

bound_targ_sent: Argentina train crash in Buenos Aires - 49 killed Oh dear, another crash disaster.

bound_targ_sent_spacy: 
Argentina train crash in Buenos Aires - 49 killed

contains: False

--------------------------------------------------------

nug_text: 49 confirmed deaths

sent_id (3): Argentine commuter train crashes, killing 49 people BUENOS AIRES - A packed commuter train crashed at a Buenos Aires station during Wednesday's morning rush hour, killing 49 people and injuring more than 600 in Argentina 's worst rail disaster in three decades.

bound_sent:  killing 49 people

bound_targ_sent: Argentine commuter train crashes, killing 49 people - The China Post News Opinion Taiwan Living Learn English The China Post Subscribe RSS Feeds World Africa Middle East Europe Americas Updated Thursday, February 23, 2012 2:08 pm TWN , Reuters Sponsors ▪ Buy china wholesale products from reliable chinese wholesalers on DHgate. com! ▪ Save 70% for hotel in Shanghai and 6000 hotels, in Beijing , Guangzhou , Shenzhen , and all China . ▪ Get the best deals for Guangzhou Hotels or choose from more than 10,000 hotels in 499 Chinese cities. ▪ Find great real time deals on China Flights .

bound_targ_sent_spacy: 
Argentine commuter train crashe

nug_text: 550 injured

sent_id (4): At least fifty people were killed and over seven hundred were injured.{{cite news |title=Once train crash: 50 dead, 703 injured | url=http ://www. buenosairesherald. com/article/93449/once-train-crash-at-least-40-dead-more-than-550-injured |publisher= Buenos Aires Herald |date =23 February 2012 |accessdate=23 February 2012}} There were ~1,000 passengers on board the train.

bound_sent: eaths = 50+ BuenosAire

bound_targ_sent: Buenos Aires rail disaster Related changes ← Previous revision Revision as of 12:51, 23 February 2012 (One intermediate revision by one user not shown) Line 18: Line 18: | trains = 1 | trains = 1 | pax = | pax = − | deaths = 50+ BuenosAiresherald"/> + | deaths = 50+once train crash"/> − | injuries = 703+ BuenosAiresherald"> {{cite news |title=Once train crash: 50 dead, 703 injured + | injuries = 703+once train crash">{{cite news |title=Once train crash: 50 dead, 703 injured − |url=http :// www. buenosairesherald.com/article/9344

nug_text: train accident in Buenos Aires, Argentina.

sent_id (1): Hot news about: buenos aires train crash Your Dose of Daily News at 11 o'clock You are here: Home / Daily Trends / Hot news about: buenos aires train crash Hot news about: buenos aires train crash February 23, 2012 by admin · Leave a Comment Judge promises probe into Argentina train crash that killed 49 – Los Angeles Times REPORTING FROM BUENOS AIRES — An Argentinian federal judge promised to lead an investigation into the causes of a stunning train crash that killed at least 49 people and injured more than 600 Wednesday morning.

bound_sent:  buenos aires train crash

bound_targ_sent: Hot news about: buenos aires train crash Your Dose of Daily News at 11 o'clock You are here: Home / Daily Trends / Hot news about: buenos aires train crash Hot news about: buenos aires train crash February 23, 2012 by admin · Leave a Comment Judge promises probe into Argentina train crash that killed 49 – Los Angeles Times REPORTING FROM 

nug_text: 49 confirmed deaths

sent_id (7): Many of the train’ s … read full article… A commuter train slammed into a retaining barrier in a central Buenos Aires train station during peak rush hour Wednesday morning, killing at least 49 people and injuring 675, federal police officials said.Tweets about: buenos aires train crash http ://t.co/GXf958oV posted on: 23 February 2012, 7:45 am - View Tweet RIP Train crash in Buenos Aires posted on: 23 February 2012, 7:40 am - View Tweet RT @ GeneKim15 : Argentina train crash in Buenos Aires kills 49 http://t.co/RAoa2F9Z posted on: 23 February 2012, 7:40 am - View Tweet RT @ Susan_Shelton : 49 killed in Buenos Aires train crash, hundreds hurt http://t.co/o4tKGk8S posted on: 23 February 2012, 7:39 am - View Tweet 49 killed in Buenos Aires train crash, hundreds hurt http ://t. co / o4tKGk8S posted on: 23 February 2012, 7:38 am - View Tweet Argentina train crash in Buenos Aires kills 49 http ://t.co/RAoa2F9Z posted on: 23 February 2012, 7:37 am -

nug_text: train accident in Buenos Aires, Argentina.

sent_id (7): Many of the train’ s … read full article… A commuter train slammed into a retaining barrier in a central Buenos Aires train station during peak rush hour Wednesday morning, killing at least 49 people and injuring 675, federal police officials said.Tweets about: buenos aires train crash http ://t.co/GXf958oV posted on: 23 February 2012, 7:45 am - View Tweet RIP Train crash in Buenos Aires posted on: 23 February 2012, 7:40 am - View Tweet RT @ GeneKim15 : Argentina train crash in Buenos Aires kills 49 http://t.co/RAoa2F9Z posted on: 23 February 2012, 7:40 am - View Tweet RT @ Susan_Shelton : 49 killed in Buenos Aires train crash, hundreds hurt http://t.co/o4tKGk8S posted on: 23 February 2012, 7:39 am - View Tweet 49 killed in Buenos Aires train crash, hundreds hurt http ://t. co / o4tKGk8S posted on: 23 February 2012, 7:38 am - View Tweet Argentina train crash in Buenos Aires kills 49 http ://t.co/RAoa2F9Z posted on: 23 F

nug_text: train accident in Buenos Aires, Argentina.

sent_id (3): President Cristina Fernandez canceled her day’s agenda due to the accident, which raised fresh doubts … read full article… 49 killed in Buenos Aires train crash, hundreds hurt – Windsor Star At least 49 people were killed and 600 more injured after a commuter train was derailed at a station during the morning rush-hour in Buenos Aires .

bound_sent: os aires train crash Hot 

bound_targ_sent: Hot news about: buenos aires train crash Your Dose of Daily News at 11 o'clock You are here: Home / Daily Trends / Hot news about: buenos aires train crash Hot news about: buenos aires train crash February 23, 2012 by admin · Leave a Comment Judge promises probe into Argentina train crash that killed 49 – Los Angeles Times REPORTING FROM BUENOS AIRES — An Argentinian federal judge promised to lead an investigation into the causes of a stunning train crash that killed at least 49 people and injured more than 600 Wednesday morning.



nug_text: cause reported as malfunction of railway brakes

sent_id (9): Pictured Above : The commuter train that crashed into the Once train station at rush hour after its brakes failed.

bound_sent: nancial Astrology, earth

bound_targ_sent: Current economic events and news Financial Astrology, earth changes, disasters, volcanoes , floods, news, miscellaneous, earthquakes, fires, reviews, airplane disasters, shipwrecks, train accidents, tornadows, mine cave-ins, hurricanes, pestilence, blizzar Link to Video on Buenos Aires Train Crash from Reuter. February 23, 2012 - Argetine Train Crash Kills 49: A packed commuter train crashed at a Buenos Aires station during Wednesday’s morning rush hour, killing 49 people and injuring more than 600 in Argentina ’s worst rail disaster in three decades.

bound_targ_sent_spacy: Current economic events and news Financial Astrology, earth changes, disasters, volcanoes , floods, news, miscellaneous, earthquakes, fires, reviews, airplane disasters, shipw

nug_text: worst train accident in Argentina since 1970

sent_id (11): Pictured Above : Injured commuters lie on stretchers after their train crashed into the Once station after Argentina 's worst rail accident in more than 30 years, officials said.

bound_sent: , earth changes, disasters, volcanoes , floods, news, m

bound_targ_sent: Current economic events and news Financial Astrology, earth changes, disasters, volcanoes , floods, news, miscellaneous, earthquakes, fires, reviews, airplane disasters, shipwrecks, train accidents, tornadows, mine cave-ins, hurricanes, pestilence, blizzar Link to Video on Buenos Aires Train Crash from Reuter. February 23, 2012 - Argetine Train Crash Kills 49: A packed commuter train crashed at a Buenos Aires station during Wednesday’s morning rush hour, killing 49 people and injuring more than 600 in Argentina ’s worst rail disaster in three decades.

bound_targ_sent_spacy: Current economic events and news Financial Astrology, earth changes, disasters, vo

nug_text: February 22, 2012

sent_id (1): Buenos Aires rail disaster Related changes copy edit ← Previous revision Revision as of 12:59, 23 February 2012 Line 18: Line 18: | trains = 1 | trains = 1 | pax = | pax = − | deaths = 50+once train crash"/> + | deaths = 50+ BuenosAiresherald"/> − | injuries = 703+once train crash">{{cite news |title=Once train crash: 50 dead, 703 injured + | injuries = 703+ BuenosAiresherald"> {{cite news |title=Once train crash: 50 dead, 703 injured − |url=http :// www. buenosairesherald.com/article/93449/once-train-crash-at-least-40-dead-more-than-550-injured|newspaper= Buenos Aires Herald |date=23 February 2012 |accessdate=23 February 2012}} + | url=http :// www. buenosairesherald.com/article/93449/once-train-crash-at-least-40-dead-more-than-550-injured|newspaper= [[Buenos Aires Herald (newspaper)| Buenos Aires Herald]] |date=23 February 2012 |accessdate=23 February 2012}} | damage = | damage = | map = | map = Line 26: Line 26: | map _state = | map _state =

nug_text: at Once Station

sent_id (1): Buenos Aires rail disaster Related changes ← Previous revision Revision as of 12:47, 23 February 2012 (One intermediate revision by one user not shown) Line 18: Line 18: | trains = 1 | trains = 1 | pax = | pax = − | deaths = 50+once train crash"/> + | deaths = 50+ BuenosAiresherald"/> | injuries = 703+{{cite news |title=Once train crash: 50 dead, 703 injured | injuries = 703+{{cite news |title=Once train crash: 50 dead, 703 injured − |url=http :// www. buenosairesherald.com/article/93449/once-train-crash-at-least-40-dead-more-than-550-injured|newspaper= [[Buenos Aires Herald (newspaper)] Buenos Aires Herald]] |date=23 February 2012 |accessdate=23 February 2012}} + | url=http :// www. buenosairesherald.com/article/93449/once-train-crash-at-least-40-dead-more-than-550-injured|newspaper= [[Buenos Aires Herald (newspaper)| Buenos Aires Herald]] |date=23 February 2012 |accessdate=23 February 2012}} | damage = | damage = | map = | map =

bound_sent: +o

nug_text: 50 confirmed dead

sent_id (1): Buenos Aires rail disaster Related changes fix reference ← Previous revision Revision as of 13:03, 23 February 2012 Line 20: Line 20: | deaths = 50+ | deaths = 50+ | injuries = 703+{{cite news |title=Once train crash: 50 dead, 703 injured | injuries = 703+{{cite news |title=Once train crash: 50 dead, 703 injured − |url=http :// www. buenosairesherald.com/article/93449/once-train-crash-at-least-40-dead-more-than-550-injured|newspaper= [[Buenos Aires Herald (newspaper)| Buenos Aires Herald]] |date=23 February 2012 |accessdate=23 February 2012}} + | url=http :// www. buenosairesherald.com/article/93449/once-train-crash-at-least-40-dead-more-than-550-injured|newspaper= Buenos Aires Herald |date=23 February 2012 |accessdate=23 February 2012}} | damage = | damage = | map = | map = Line 26: Line 26: | map _state = | map _state = }} }} − The '''Buenos Aires rail disaster''' occurred on 22 February 2012 when a train crashed at [ [Once Station]] in [[ Bu

nug_text: February 22, 2012

sent_id (1): Buenos Aires rail disaster Related changes fix reference ← Previous revision Revision as of 13:03, 23 February 2012 Line 20: Line 20: | deaths = 50+ | deaths = 50+ | injuries = 703+{{cite news |title=Once train crash: 50 dead, 703 injured | injuries = 703+{{cite news |title=Once train crash: 50 dead, 703 injured − |url=http :// www. buenosairesherald.com/article/93449/once-train-crash-at-least-40-dead-more-than-550-injured|newspaper= [[Buenos Aires Herald (newspaper)| Buenos Aires Herald]] |date=23 February 2012 |accessdate=23 February 2012}} + | url=http :// www. buenosairesherald.com/article/93449/once-train-crash-at-least-40-dead-more-than-550-injured|newspaper= Buenos Aires Herald |date=23 February 2012 |accessdate=23 February 2012}} | damage = | damage = | map = | map = Line 26: Line 26: | map _state = | map _state = }} }} − The '''Buenos Aires rail disaster''' occurred on 22 February 2012 when a train crashed at [ [Once Station]] in [[ Bu

nug_text: train accident in Buenos Aires, Argentina.

sent_id (13): Photo : Reuters Injured commuters lie on stretchers after their train crashed into the Once station during rush hour in Buenos Aires .

bound_sent:  | IOL.co. za Sponsored Links: IOL News IOL Sport IOL Business IOL Motor

bound_targ_sent: Gallery: Dozens killed in Argentina train crash - Pretoria News | IOL.co. za Sponsored Links: IOL News IOL Sport IOL Business IOL Motoring IOL Entertainment IOL Travel IOL Property IOL Jobs IOL Classifieds Home Pretoria News Home Gallery : Dozens killed in Argentina train crash Comment on this story Rescue workers and paramedics work at the site of a train derailment, after two suburban commuter trains collided with a bus ( C ), in a Buenos Aires neighbourhood.

bound_targ_sent_spacy: News | IOL.co.

contains: False

---------------------------------------------------------------
nug_text: at Once Station

sent_id (12): Photo : Reuters Rescue workers carry a wounded passenger from a c

nug_text: 550 injured

sent_id (2): 49 killed in Argentina train crash BUENOS AIRES , Argentina ( AP) — A packed train slammed into the end of the line in Buenos Aires ' busy Once station Wednesday, killing 49 people and injuring hundreds of morning commuters in Argentina 's worst train A packed train has crashed into the end of the track in a Buenos Aires railway station, killing at least 50 morning commuters and injuring more than 500 others, with the death toll expected to rise, Argentine officials say.

bound_sent: nesday, killing 49 people and 

bound_targ_sent: 49 killed in Argentina train crash BUENOS AIRES , Argentina ( AP) — A packed train slammed into the end of the line in Buenos Aires ' busy Once station Wednesday, killing 49 people and injuring hundreds of morning commuters in Argentina 's worst train A packed train has crashed into the end of the track in a Buenos Aires railway station, killing at least 50 morning commuters and injuring more than 500 others, with the deat

nug_text: 676+ injuries

sent_id (2): The packed commuter train slammed into a retaining wall at a railway terminus in Buenos Aires during rush hour, leaving at least 49 dead, 675 injured, and dozens trapped in the wreckage.

bound_sent: 9, injures h

bound_targ_sent: Argentina rail crash kills 49, injures hundreds - FRANCE 24 Connect Join the France 24 community here Log in Argentina rail crash kills 49, injures hundreds - FRANCE 24 TOP STORIES FRANCE AFRICA MIDDLE EAST EUROPE AMERICAS ASIA / PACIFIC OBSERVERS WEATHER TOP STORIES Hot tags Russia nuclear Iran Afghanistan Manuel Zelaya rugby Dubai Copenhagen climate summit French are the cleanest in Europe , study finds Audio: The week in French sports Glossary of the Mideast conflict France’s military presence in Afghanistan Changing the constitution to remain in power Close BUSINESS / TECH SPORT CULTURE HEALTH EARTH REPORTAGES TV SHOWS BLOGS MOBILE TRAVEL SPORT football Rugby Tennis Formula 1 Hot tags Ligue 1 Top 14 Champions League E

nug_text: Hundreds injured

sent_id (1): Argentine train slams into station, killing 49 The latest news from GoErie.com BUENOS AIRES , Argentina -- A train packed with morning commuters slammed into a downtown station on Wednesday, killing 49 people and injuring hundreds as passenger cars crumpled and windows exploded around them.

bound_sent:  injuring hundreds

bound_targ_sent: Argentine train slams into station, killing 49 The latest news from GoErie.com BUENOS AIRES , Argentina -- A train packed with morning commuters slammed into a downtown station on Wednesday, killing 49 people and injuring hundreds as passenger cars crumpled and windows exploded around them.

bound_targ_sent_spacy: A train packed with morning commuters slammed into a downtown station on Wednesday, killing 49 people and injuring hundreds as passenger cars crumpled and windows exploded around them.


contains: False

---------------------------------------------------------------
nug_text: train accident in Bueno

nug_text: 50 confirmed dead

sent_id (15): Wednesday's hard stop of a commuter train at... CCTV of deadly Argentina train crash released ITN Buenos Aires killing at least 50 people and leaving over 700 injured.

bound_sent: ng activation form... Load

bound_targ_sent: 49 Dead In Argentina Train crash Loading activation form... Loading login form... Loading sign up form... Loading activation form... Loading password form... Loading new incentive enroll form... Loading invite contacts form... Invite your friends Please enter an optional message below, check the friends you want to invite, hit "Send Invitation," and you're done!

bound_targ_sent_spacy: Loading activation form...

contains: False

---------------------------------------------------------------
nug_text: over 700 injured

sent_id (15): Wednesday's hard stop of a commuter train at... CCTV of deadly Argentina train crash released ITN Buenos Aires killing at least 50 people and leaving over 700 injured.

bound_sent:  form... L

nug_text: train accident in Buenos Aires, Argentina.

sent_id (11): The Argentina train crash left 49 passengers dead and more than five hundred injured.

bound_sent: Dead In Argentina Trai

bound_targ_sent: 49 Dead In Argentina Train crash Loading activation form... Loading login form... Loading sign up form... Loading activation form... Loading password form... Loading new incentive enroll form... Loading invite contacts form... Invite your friends Please enter an optional message below, check the friends you want to invite, hit "Send Invitation," and you're done!

bound_targ_sent_spacy: 
49 Dead

contains: False

---------------------------------------------------------------
nug_text: 49 confirmed deaths

sent_id (2): BUENOS AIRES – A packed commuter train plowed into the buffers at a Buenos Aires station during Wednesday's morning rush hour, killing 49 people and injuring more than 600 in Argentina 's worst rail accident in more than 30 years, officials said.

bound_sent:  an inve

nug_text: train accident in Buenos Aires, Argentina.

sent_id (2): The city emergency service confirmed at least 49 people died 49 killed in Argentina train crash 22, 2012 – A packed train slammed into the end of the line in Buenos Aires ' busy Once station Wednesday, injuring at least 340 morning commuters, Argentina 's transportation secretary said.

bound_sent: e by firefighters in Buenos Aires , Argentina , 22 Febr

bound_targ_sent: 49 killed in Argentina train crash By Star-Ledger Wire Services An injured passenger is lifted from a coach wreckage by firefighters in Buenos Aires , Argentina , 22 February 2012, after a train accident in Once train station.

bound_targ_sent_spacy: An injured passenger is lifted from a coach wreckage by firefighters in Buenos Aires , Argentina , 22 February 2012, after a train accident in Once train station.


contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id 

nug_text: 676+ injuries

sent_id (19): Take a look at the scenes of the accident at Once station in Buenos Aires :... VIDEO: CCTV footage shows crash impact BBC Argentina train crash impact 23 February 2012 Last updated at 00:26 ET Some 49 people have been killed and at least 600 injured in Argentina , after a commuter train crashed as it arrived into one of the busiest stations in Buenos Aires.

bound_sent: nvite your friends Pl

bound_targ_sent: 49 Dead In Argentina Train crash Loading activation form... Loading login form... Loading sign up form... Loading activation form... Loading password form... Loading new incentive enroll form... Loading invite contacts form... Invite your friends Please enter an optional message below, check the friends you want to invite, hit "Send Invitation," and you're done!

bound_targ_sent_spacy: Invite your friends Please enter an optional message below, check the friends you want to invite, hit "Send Invitation," and you're done!


contains: False

--

nug_text: cause reported as malfunction of railway brakes

sent_id (15): Take a look at the scenes of the accident at Once station in Buenos Aires : Photo: Reuters Buenos Aires Train Crash Buenos Aires Train Crash A commuter train that crashed at Once train station at rush hour when its brakes failed, is seen in Buenos Aires February 22, 2012.

bound_sent: tion Brazil Edition Canada Edition China Edition France Edition Ge

bound_targ_sent: Edition Africa Edition Australia Edition Brazil Edition Canada Edition China Edition France Edition Germany Edition Hong Kong Edition HK English Edition India Edition Italia Edition Korea Edition Japan Edition Mexico Edition Russia Edition U.S.

bound_targ_sent_spacy: Edition Africa Edition Australia Edition Brazil Edition

contains: False

---------------------------------------------------------------
nug_text: at Once Station

sent_id (15): Take a look at the scenes of the accident at Once station in Buenos Aires : Photo: Reuters Buenos Aires Trai

nug_text: train accident in Buenos Aires, Argentina.

sent_id (2): The site of a train crash at Once station in Buenos Aires on February 22.

bound_sent:  crash kills 49, injures hundreds | Bangkok

bound_targ_sent: Argentina rail crash kills 49, injures hundreds | Bangkok Post: news Argentina rail crash kills 49, injures hundreds | Bangkok Post: news Home Help Lite Version Log in Sign up Member benefit E - Paper SMS Print Front Page Newswire Print subscription RSS Advanced search news business opinion travel food lifestyle Arts & Culture feature learning tech property auto multimedia Video Photo Interactive Morning Focus Service Archive Directory Search Reader forum Classifieds Event calendar Hotel booking Member setting Local news Politics Security Crimes Transport Health Sports Asia World Investigative report Election News &gt ; World Argentina rail crash kills 49, injures hundreds Published: 23/02/2012 at 01:33 PM Online news: World Share Tweet A packed commuter train slammed into 

nug_text: February 22, 2012

sent_id (20): Photo : Reuters Buenos Aires Train Crash Buenos Aires Train Crash Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour in Buenos Aires February 22, 2012.

bound_sent: tion China Edition

bound_targ_sent: Edition Africa Edition Australia Edition Brazil Edition Canada Edition China Edition France Edition Germany Edition Hong Kong Edition HK English Edition India Edition Italia Edition Korea Edition Japan Edition Mexico Edition Russia Edition U.S.

bound_targ_sent_spacy: Canada Edition China Edition France Edition Germany Edition

contains: True

---------------------------------------------------------------
nug_text: 49 confirmed deaths

sent_id (1): Argentina Train Crash: 49 Dead and 460 Injured in Buenos Aires Accident (Pictures) Society &gt ; News and Society &gt ; News and society &gt ; Argentina Train Crash: 49 Dead and 460 Injured in Buenos Aires Accident (Pictures) Phillips Crook 

sent_id (5): A train that reportedly left a workshop yesterday failed to brake when entering a major station in the center of Buenos Aires and crashed against the end-of-the-line barrier, causing wagons to BUENOS AIRES — A packed commuter train slammed into a retaining wall at a railway terminus in Buenos Aires during rush hour Wednesday, leaving at least 49 dead, 600 injured, and dozens trapped in the wreckage.

bound_sent: s one of

bound_targ_sent: Deadly Argentine Rail Crash Raises Outcry He went on to say “ the impact of the crash caused the second carriage to be pushed 6 metres into the first carriage of the train." Once station, located in the heart Plaza Miserere is one of Buenos Aires ' central train stations.

bound_targ_sent_spacy: Once station, located in the heart Plaza Miserere is one of Buenos Aires ' central train stations.


contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id (5)

nug_text: Hundreds injured

sent_id (1): Argentina Train Crash: 49 Dead and 460 Injured in Buenos Aires Accident (Pictures) Society &gt ; News and Society &gt ; News and society &gt ; Argentina Train Crash: 49 Dead and 460 Injured in Buenos Aires Accident (Pictures) Phillips Crook Become a Fan Send Private Message Shoutbox Mary Rose 2012/02/23 | 205 views | 0 Argentina Train Crash: 49 Dead and 460 Injured in Buenos Aires Accident (Pictures ) Email Share Favorite RePublish article Tweet A terrible train crash in Argentina has killed at least 49 people and left 460 others injured when a packed train slammed into the end of the line in Buenos Aires ' busy Once station Wednesday's morning rush hour, 22 February 2012.

bound_sent:  460 Injured 

bound_targ_sent: Argentina Train Crash: 49 Dead and 460 Injured in Buenos Aires Accident (Pictures) Society &gt ; News and Society &gt ; News and society &gt ; Argentina Train Crash: 49 Dead and 460 Injured in Buenos Aires Accident (Pictures) Philli

nug_text: train accident in Buenos Aires, Argentina.

sent_id (2): The train hit the end of the platform at Once Station in Buenos Aires during the morning rush hour.

bound_sent: dead, over 600 injured in Argentina train crashIndia4u News Online

bound_targ_sent: 49 dead, over 600 injured in Argentina train crashIndia4u News Online Online Booking Medindia Jobs Forums India4u Store Bollywood Blog Home News Special Reports Press Releases News Photos Videos Medical News World News National News Entertainment News Business News Sports News Education News Travel News News Central General News The Hindu Indian Express The Telegraph Deccan Herald Times of India Hindustan Times IBN Live News DNA Top News Indias News Sports News The Hindu Sports Espn Star Sports Sify Sports Regional News The Navhind Times Milligazette News Business News Business Line Financial Express Economic Times Business Standard IBN Business Sify Finance Education News Entertainment News Kollywood Cinema Sify Movies India

nug_text:  the poor condition of the railroad

sent_id (46): Argentina ’s Auditor- General Leandro Despouy has stated that years of failed safety tests and other problems made this accident foreseeable and preventable.

bound_sent: tina With fifty fatalities, over six hundred injured and at the time of writing, the recent re-em

bound_targ_sent: Disaster on El Sarmiento Observations in Buenos Aires , Argentina With fifty fatalities, over six hundred injured and at the time of writing, the recent re-emergence of two of the three unaccounted for passengers more than 48 hours after the accident, the crashing of the Sarmiento train at Once station in Buenos Aires is the worst train accident in Argentina since the 1979 head on collision of two trains near Benavidez station when over 140 people lost their lives.

bound_targ_sent_spacy: 
Disaster on El Sarmiento Observations in Buenos Aires , Argentina

contains: False

---------------------------------------------------------------
nug_text:

nug_text: train accident in Buenos Aires, Argentina.

sent_id (10): The packed commuter train slammed into buffers at a station on Wednesday morning, trapping dozens of people amongst the wreckage. Â Full story Â "Like a bomb explosion:" Argentine train crash survivor Â BUENOS AIRES , Feb. 23 ( Xinhua ) -- It was a commuter's nightmare come true - a packed train hurtling past platform after platform before finally slamming into a shock-absorbing barrier and sending passengers into each other, to the floor, and worse.

bound_sent: lized English.news.cn 2

bound_targ_sent: Chinese national injured in Argentina train crash remains hospitalized - Xinhua | English.news.cn China Chinese national injured in Argentina train crash remains hospitalized English.news.cn 2012-02-24 14:24:54 BUENOS AIRES , Feb. 23 ( Xinhua ) -- The Chinese national injured in Wednesday's train crash in Argentina remained under medical treatment Thursday with worsened conditions.

bound_targ_sent_spacy: China Chinese

nug_text: Sarmiento Line

sent_id (3): The Sarmiento train is the one that I use most weekends in Buenos Aires to visit my wife’s family who live in Moreno and we have many friends who use the service to commute to Capital for work.

bound_sent: aster on El Sarmi

bound_targ_sent: Disaster on El Sarmiento Observations in Buenos Aires , Argentina With fifty fatalities, over six hundred injured and at the time of writing, the recent re-emergence of two of the three unaccounted for passengers more than 48 hours after the accident, the crashing of the Sarmiento train at Once station in Buenos Aires is the worst train accident in Argentina since the 1979 head on collision of two trains near Benavidez station when over 140 people lost their lives.

bound_targ_sent_spacy: 
Disaster on El Sarmiento Observations in Buenos Aires , Argentina

contains: False

---------------------------------------------------------------
nug_text:  the poor condition of the railroad

sent_id (18): Those who have

nug_text: train accident in Buenos Aires, Argentina.

sent_id (8): Argentina 's government has declared a two-day mourning for the tragedy with flags at half-mast. Â Â Related: Argentina launches probe into deadly train crash Â BUENOS AIRES , Feb. 23 ( Xinhua ) -- An Argentine federal judge Thursday ordered a committee of experts be formed to investigate the deadly commuter train crash that killed at least 50 people and injured 703 others here Wednesday morning.

bound_sent: se national injured in Argentina train crash rema

bound_targ_sent: Chinese national injured in Argentina train crash remains hospitalized - Xinhua | English.news.cn China Chinese national injured in Argentina train crash remains hospitalized English.news.cn 2012-02-24 14:24:54 BUENOS AIRES , Feb. 23 ( Xinhua ) -- The Chinese national injured in Wednesday's train crash in Argentina remained under medical treatment Thursday with worsened conditions.

bound_targ_sent_spacy: China Chinese national injured in Argentina

nug_text: train accident in Buenos Aires, Argentina.

sent_id (3): Passengers wait in the boxcar for the departure of a train in Once station, Buenos Aires , the day after a train crashed there killing 50 people and injuring at least 703.

bound_sent: he France 24 community here Log in Argentines seek 

bound_targ_sent: Argentines seek answers after deadly train crash - FRANCE 24 Connect Join the France 24 community here Log in Argentines seek answers after deadly train crash - FRANCE 24 TOP STORIES FRANCE AFRICA MIDDLE EAST EUROPE AMERICAS ASIA / PACIFIC OBSERVERS WEATHER TOP STORIES Hot tags Russia nuclear Iran Afghanistan Manuel Zelaya rugby Dubai Copenhagen climate summit French are the cleanest in Europe , study finds Audio: The week in French sports Glossary of the Mideast conflict France’s military presence in Afghanistan Changing the constitution to remain in power Close BUSINESS / TECH SPORT CULTURE HEALTH EARTH REPORTAGES TV SHOWS BLOGS MOBILE TRAVEL SPORT football Rugby Tenn

nug_text: The president declared a time of national mourning

sent_id (11): "I saw a passenger who sat by the window was thrown out of the train and fell on the platform," a survivor told Xinhua after Wednesday's deadly morning rush-hour crash in Buenos Aires ' bustling Once station. Â Full story Editor: Mo Hong 'e Related News • Argentina launches probe into deadly train crash • China offers condolences to Argentina over train crash • Commuter train crash kills 49 in Argentina , national mourning declared • Argentina 's railway accidents in recent years Home &gt ;&gt ; China Back to Top

bound_sent:  man, surnamed Sun , was in

bound_targ_sent: The 24-year-old Chinese man, surnamed Sun , was in the first carriage of the crashed commuter train when the tragedy occurred.

bound_targ_sent_spacy: The 24-year-old Chinese man, surnamed Sun , was in the first carriage of the crashed commuter train when the tragedy occurred.


contains: False

-------------------------------------------------

nug_text: train accident in Buenos Aires, Argentina.

sent_id (20): The body of the 51st fatal victim of Wednesday's train crash Lucas Menghini Rey (2) was found in the wrecked cockpit BUENOS AIRES: Riot police charge after agitators set on fire a litter at Once 's train station hall in Buenos Aires on February 24, 2012.

bound_sent: rs to riot-hit jail Loading Stock data... Just in Pakistan can export its entire textile products, cemen

bound_targ_sent: Indonesia returns foreigners to riot-hit jail Loading Stock data... Just in Pakistan can export its entire textile products, cement to India : Baig Pakistan bans Ahle Sunnat Wal Jammat , three other religious organizations AJK imposes life imprisonment on acid attacks Iran, India adopt Rupee in direct trade: official Annan starts talks with Syrian leader Assad: Syrian TV Pakistan decides to grant Vietnam full market status Annan in Syria to press Assad for ceasefire Afghan foreign minister to visit Qatar to discuss Taliban talks Ground

nug_text: train accident in Buenos Aires, Argentina.

sent_id (11): More international headlines Latest breaking news » on PalmBeachPost. com Japan marks 1 year since quake, tsunami disaster Big health bills die at the session's end 2012 Florida legislative session winners and losers Santorum takes Kansas , Romney counters in Wyoming | Comments 5 Santorum wins most delegates in Kansas caucuses | Comments 2 More breaking news From The Web By ROGER DWARIKA The Associated Press Post a Comment E-mail Print Larger Type Small Type BUENOS AIRES , Argentina — Thousands of Argentines spent Thursday trekking from hospital to hospital to check lists of train crash victims, hoping they wouldn't have to go to the morgue.

bound_sent: y forecast SPORTS Latest news, results High Schools Dolphins Heat Marlins Panthers Golf Gators Hurricanes Seminoles Owls Spring Training Out

bound_targ_sent: Siblings dead, injured in Argentine train wreck JOBS AUTOS REAL ESTATE CLASSIFIEDS SHOPPING PUBLIC NOTICES Sub

nug_text: Sarmiento Line

sent_id (3): Home POLITICS After the railway tragedy the Gov 't takes over Sarmiento and Mitre train lines Télam .

bound_sent:  Mitre tra

bound_targ_sent: After the railway tragedy the Gov 't takes over Sarmiento and Mitre train lines Skip to content Skip to main navigation Skip to 1st column Skip to 2nd column Télam in English | English news service Username Password Forgot your password?

bound_targ_sent_spacy: 
After the railway tragedy the Gov 't takes over Sarmiento and Mitre train lines Skip to content Skip to main navigation Skip to 1st column

contains: False

---------------------------------------------------------------
nug_text: at Once Station

sent_id (3): The operator suffered arm burns in the electrical fire which broke out at the Terminal Once in Buenos Aires.

bound_sent: m Reader E-pap

bound_targ_sent: Fire at Argentine rail station days after deadly crash Last updated at 4.12 pm Reader E-paper Untitled Document Jobs Cars Property Shops S

nug_text: February 22, 2012

sent_id (1): At least 49 dead and hundreds injured as train crashes in Argentina | World news | guardian.co. uk Turn autoplay off Turn autoplay on Please activate cookies in order to turn autoplay off Jump to content [ s] Jump to site navigation [0] Jump to search [4] Terms and conditions [8] Edition: US UK Sign in Mobile About us About us Contact us Press office Terms of service Privacy policy Subscribe Guardian iPhone iPad edition Kindle Guardian Weekly Digital edition News US World Sports Comment Culture Business Environment Science Travel Tech Media Life &amp ; style Apps Data News World news Argentina At least 49 dead and hundreds injured as train crashes in Argentina Helicopters and ambulances ferry survivors to hospital after rush hour service ploughs into barrier at end of line in Buenos Aires Tweet this Share reddit this Associated Press in Buenos Aires guardian. co.uk , Wednesday 22 February 2012 09.35 EST Article history About this article Close 

nug_text: worst train accident in Argentina since 1970

sent_id (10): The accident was Argentina 's worst train crash for decades, and has fuelled criticism of the state of the country's railways.

bound_sent: tina train crash company TBA taken over Acc

bound_targ_sent: BBC News - Argentina train crash company TBA taken over Accessibility links Skip to content Skip to local navigation Accessibility Help bbc.co. uk navigation News Sport Weather Travel Future TV Radio More Search term: Latin America &amp ; Caribbean Home US &amp ; Canada Latin America UK Africa Asia Europe Mid-East Business Health Sci / Environment Tech Entertainment Video 28 February 2012 Last updated at 14:28 ET Share this page Delicious Digg Facebook reddit StumbleUpon Twitter Email Print Argentina train crash company TBA taken over The train failed to stop as it came into the Once station in Buenos Aires Continue reading the main story Related Stories Argentine rail brake alert denied Argentina probes train disaster

nug_text:  the poor condition of the railroad

sent_id (6): The incident highlighted the insecure nature of commuter rail, political negligence and the lack of state control over the management of concessionaire TBA , which employs more than 4,500 people.

bound_sent: ver train operator after crash - T

bound_targ_sent: Argentine government takes over train operator after crash - Thaindian News Thaindian News Home About Us Top Stories Tags Argentine government takes over train operator after crash February 29th, 2012 - 11:51 am ICT by IANS Tweet Buenos Aires , Feb 29 (IANS/EFE) The Argentine government announced Tuesday the “temporary and preventive” takeover of commuter rail operator Trenes de Buenos Aires amid harsh criticism by relatives of the victims of the rail accident that left 50 commuters dead here last week.

bound_targ_sent_spacy: 
Argentine government takes over train operator after crash - Thaindian News Thaindian News Home About Us

contains: False

---------------------

nug_text: 51 people confirmed dead

sent_id (3): Buenos Aires , Argentina - The discovery of a 51st victim on Friday two days after Argentina 's deadliest train wreck in decades left the man's family devastated and prompted rock-throwing and other violence by protesters holding vigil at the scene.

bound_sent: News | IOL. c

bound_targ_sent: ‘We are as fragile as cardboard’ - Pretoria News | IOL. co.za Sponsored Links: IOL News IOL Sport IOL Business IOL Motoring IOL Entertainment IOL Travel IOL Property IOL Jobs IOL Classifieds Home Pretoria News Home ‘We are as fragile as cardboard’ February 25 2012 at 09:37am By Debora Rey Comment on this story AP A demonstrator destroys turnstiles during riots at Once railway station in Buenos Aires , Argentina .

bound_targ_sent_spacy: We are as fragile as cardboard’ - Pretoria News | IOL.

contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id (3): Buenos Aire

nug_text: train accident in Buenos Aires, Argentina.

sent_id (24): Safety features are designed to prevent trains from crashing through barriers and killing people, but it happened this week in Buenos Aires.

bound_sent: 4 cup coffee pot?
Quote: Originally Posted by wieslaw Hi Bruce , Something is wrong with the design of the e

bound_targ_sent: RE: Will I be able to bring my 4 cup coffee pot?

bound_targ_sent_spacy: Will I be able to bring my 4 cup coffee pot?


contains: False

---------------------------------------------------------------
nug_text: unknown number were killed

sent_id (24): Safety features are designed to prevent trains from crashing through barriers and killing people, but it happened this week in Buenos Aires.

bound_sent: law Hi Bruce , S

bound_targ_sent: Quote: Originally Posted by wieslaw Hi Bruce , Something is wrong with the design of the electrical system on your cruise ship or all of the cruise ships, if they have the same system as you described.

bound_

nug_text: 51 people confirmed dead

sent_id (2): At Estación Once de Septiembre , only about three km west of the central Plaza de Mayo , a runaway commuter train crashed into the end of its platform, killing 51 Argentines and injuring 703, according to the latest statistics.

bound_sent: ld, and not

bound_targ_sent: Aftermath: Behind the Buenos Aires Train Wreck The Best of Southernmost South America This past Wednesday, Buenos Aires made front pages around the world, and not in a good way.

bound_targ_sent_spacy: This past Wednesday, Buenos Aires made front pages around the world, and not in a good way.


contains: False

---------------------------------------------------------------
nug_text: the train crashed at the buffer stop

sent_id (2): At Estación Once de Septiembre , only about three km west of the central Plaza de Mayo , a runaway commuter train crashed into the end of its platform, killing 51 Argentines and injuring 703, according to the latest statistics.

bound_sent: B

nug_text: the conductor seemed to struggle with the brakes

sent_id (10): (AP Photo/Leonardo Zavattaro , Telam ) Passengers said the train's motorman struggled repeatedly with the brakes during the journey, overrunning platforms and missing one station entirely before crashing at the end of the line.

bound_sent: eries Commerce Corner Business Opinion AP Stories AP A

bound_targ_sent: The Alaska Journal of Commerce Local News Oil and Gas Fisheries Commerce Corner Business Opinion AP Stories AP Alaska AP Business Top Stories U.S.

bound_targ_sent_spacy: 
The Alaska Journal of Commerce Local News Oil and Gas Fisheries Commerce Corner

contains: False

---------------------------------------------------------------
nug_text: the train crashed at the buffer stop

sent_id (10): (AP Photo/Leonardo Zavattaro , Telam ) Passengers said the train's motorman struggled repeatedly with the brakes during the journey, overrunning platforms and missing one station entirely before crashing at the end o

nug_text: Hundreds injured

sent_id (2): The packed train slammed into the end of the line in Buenos Aires’ busy Once station, killing dozens and injuring hundreds.

bound_sent: 2012 07:42 PM 56° F 

bound_targ_sent: Train crash kills dozens in Buenos Aires | Business | The Bulletin bendbulletin.com/ business APRIL 15, 2012 07:42 PM 56° F Overcast Central Oregon Forecast News Community Entertainment Real Estate Subscriptions Advertising Commercial Print About Us Classifieds Publications Articles Restaurants Yellow Pages Web Newsprint Archive 1907 — 1994 Archive Bulletin E-Edition : » E-Edition Login » Manage my account Home Business " Train crash kills dozens in Buenos Aires" print Share | e -mail Facebook Tweet StumbleUpon Google Firemen rescue wounded passengers from a commuter train after it crashed in Buenos Aires , Argentina , on Wednesday.

bound_targ_sent_spacy: The Bulletin bendbulletin.com/ business APRIL 15, 2012 07:42 PM

contains: False

------------------------------------

nug_text: February 22, 2012

sent_id (5): Photo : Anibal Greco , Associated Press / AP Wounded passengers wait to be carried away from a commuter train... Rescue workers and policemen help an injured passenger after a train crashed at Once train station in Buenos Aires on February 22, 2012.

bound_sent: ty Opinion Comics 

bound_targ_sent: Businesses Register Sign In Home News Police Reports Sports Community Opinion Comics Jobs Homes Rentals Cars Index ▼ Close [X ] Quick links to other pages on this site | Still can't find it? see Site Index More Community News : Darien News Fairfield Citizen Greenwich Citizen New Canaan News Westport News At least 49 dead in train crash Firemen rescue wounded passengers from a commuter train after a collision in Buenos Aires , Argentina , Wednesday Feb. 22, 2012.

bound_targ_sent_spacy: Sports Community Opinion Comics Jobs Homes Rentals Cars Index ▼ Close [X

contains: True

---------------------------------------------------------------
nug_text: tra

nug_text: 49 confirmed deaths

sent_id (2): At least 49 people died and at least 600 people were injured in the accident when the suburban train failed to break and ran into the buffers at the railway terminus.

bound_sent: 
49 killed as Argentine t

bound_targ_sent: 49 killed as Argentine train slams into station - The China Post News Opinion Taiwan Living Learn English The China Post Subscribe RSS Feeds World Africa Middle East Europe Americas Updated Friday, February 24, 2012 0:13 am TWN , By Michael Warren , AP An injured person, center, is rescued after a train accident in Buenos Aires , Argentina , Wednesday, Feb. 22.

bound_targ_sent_spacy: 
49 killed as Argentine train slams into station

contains: False

---------------------------------------------------------------
nug_text: Hundreds injured

sent_id (2): A packed commuter train slammed into a retaining wall at a railway terminus in Buenos Aires during rush hour Wednesday, leaving at least 49 dead, 675 injured, and dozens tr

nug_text: train accident in Buenos Aires, Argentina.

sent_id (1): Brake failure may be behind Argentina horror train crash NJ TAMIL Radio ..

bound_sent:  Argentina horror train crash

bound_targ_sent: Brake failure may be behind Argentina horror train crash NJ TAMIL Radio ..

bound_targ_sent_spacy: 
Brake failure may be behind Argentina horror train crash NJ TAMIL Radio ..


contains: False

---------------------------------------------------------------
nug_text: February 22, 2012

sent_id (6): Paramedics carry away wounded passengers from a commuter train after a collision in Buenos Aires , Argentina , Wednesday Feb. 22, 2012.

bound_sent: d by YAHOO !
S

bound_targ_sent: Peninsula Clarion | The Newspaper of Alaska 's Kenai Peninsula Login | Join Now ! | Subscribe Site Web Web Search powered by YAHOO !

bound_targ_sent_spacy: Subscribe Site Web Web Search powered by YAHOO !


contains: False

---------------------------------------------------------------
nug_text: train accident i

nug_text: 12 victims remain unidentified

sent_id (6): Ten people are yet to be identified among the deceased.

bound_sent: 
Buenos Aires Train Crash : 50 Dead, 703 Injured Argenti

bound_targ_sent: Buenos Aires Train Crash : 50 Dead, 703 Injured Argentina , Buenos Aires and Latin America news, features, reviews, interviews and travel information Buenos Aires Train Crash: 50 Dead, 703 Injured by Hannah Flint , 23 February 2012.

bound_targ_sent_spacy: 
Buenos Aires Train Crash :

contains: False

---------------------------------------------------------------
nug_text: 703 injured

sent_id (4): 50 people, including one 7-year-old boy, are now confirmed to have died in the accident, while 703 people were injured.

bound_sent: s, features, reviews, in

bound_targ_sent: Buenos Aires Train Crash : 50 Dead, 703 Injured Argentina , Buenos Aires and Latin America news, features, reviews, interviews and travel information Buenos Aires Train Crash: 50 Dead, 703 Injured by Hannah Flint , 23 Febr

nug_text: 49 confirmed deaths

sent_id (34): "Argentine train slams into station, killing 49" .

bound_sent: kipedia , t

bound_targ_sent: 2012 Buenos Aires rail disaster - Wikipedia , the free encyclopedia 2012 Buenos Aires rail disaster From Wikipedia , the free encyclopedia (Redirected from Buenos Aires rail disaster ) Jump to: navigation , search 2012 Buenos Aires rail disaster Details Date 22 February 2012 ( 2012-02-22 ) Time 08:33 ART Location Buenos Aires Country Argentina Rail line Sarmiento Line Operator Trenes de Buenos Aires Type of incident Train wreck Cause Under investigation Statistics Trains 1 Deaths 51 [ 1 ] Injuries 703 [ 2 ] The 2012 Buenos Aires rail disaster occurred on 22 February 2012, when a train crashed at Once Station ( Spanish : Estación Once de Setiembre ; IPA : [ ˈonse] ) in the Balvanera barrio of Buenos Aires in Argentina .

bound_targ_sent_spacy: 
2012 Buenos Aires rail disaster - Wikipedia , the free encyclopedia 2012 Buenos Aires rail disaster From Wi

nug_text: a train crashed in the Balvanera ''barrio'' of Buenos Aires

sent_id (1): 2012 Buenos Aires rail disaster - Wikipedia , the free encyclopedia 2012 Buenos Aires rail disaster From Wikipedia , the free encyclopedia (Redirected from Buenos Aires rail disaster ) Jump to: navigation , search 2012 Buenos Aires rail disaster Details Date 22 February 2012 ( 2012-02-22 ) Time 08:33 ART Location Buenos Aires Country Argentina Rail line Sarmiento Line Operator Trenes de Buenos Aires Type of incident Train wreck Cause Under investigation Statistics Trains 1 Deaths 51 [ 1 ] Injuries 703 [ 2 ] The 2012 Buenos Aires rail disaster occurred on 22 February 2012, when a train crashed at Once Station ( Spanish : Estación Once de Setiembre ; IPA : [ ˈonse] ) in the Balvanera barrio of Buenos Aires in Argentina .

bound_sent:  in the Balvanera barrio of Buenos Aires

bound_targ_sent: 2012 Buenos Aires rail disaster - Wikipedia , the free encyclopedia 2012 Buenos Aires rail disaster From Wikipedia 

nug_text: train accident in Buenos Aires, Argentina.

sent_id (1): 2012 Buenos Aires rail disaster - Wikipedia , the free encyclopedia 2012 Buenos Aires rail disaster From Wikipedia , the free encyclopedia (Redirected from Buenos Aires rail disaster ) Jump to: navigation , search 2012 Buenos Aires rail disaster Details Date 22 February 2012 ( 2012-02-22 ) Time 08:33 ART Location Buenos Aires Country Argentina Rail line Sarmiento Line Operator Trenes de Buenos Aires Type of incident Train wreck Cause Under investigation Statistics Trains 1 Deaths 51 [ 1 ] Injuries 703 [ 2 ] The 2012 Buenos Aires rail disaster occurred on 22 February 2012, when a train crashed at Once Station ( Spanish : Estación Once de Setiembre ; IPA : [ ˈonse] ) in the Balvanera barrio of Buenos Aires in Argentina .

bound_sent:  Buenos Aires rail disaster 

bound_targ_sent: 2012 Buenos Aires rail disaster - Wikipedia , the free encyclopedia 2012 Buenos Aires rail disaster From Wikipedia , the free encyclopedia (Redi

nug_text: Sarmiento Line

sent_id (6): The siblings were inseparable, taking the Sarmiento line trains downtown each day to their jobs as telemarketers at Nextel Communications , their uncle Daniel Peralta said.

bound_sent:  wreck - Times-

bound_targ_sent: Siblings dead, injured in Argentine train wreck - Times-Standard Online Mobile | Digital Subscribe | e-edition | Print Subscribe Search powered by YAHOO!

bound_targ_sent_spacy: Argentine train wreck - Times-Standard Online Mobile | Digital Subscribe | e-edition

contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id (3): The tragedy comes after a series of train accidents in Argentina and will likely bring about a prolonged legal battle.

bound_sent: crash in Argentina | Video | Re

bound_targ_sent: Video shows exact moment of train crash in Argentina | Video | Reuters .com Edition : IN Africa Arabic Argentina Brazil Canada China France Germany

nug_text: Dozens killed

sent_id (41): Retrieved 22 February 2012 . ^ "Forty Dead, Up to 550 Injured in Argentine Train Crash" .

bound_sent:  - Wikipedi

bound_targ_sent: 2012 Buenos Aires rail disaster - Wikipedia , the free encyclopedia 2012 Buenos Aires rail disaster From Wikipedia , the free encyclopedia (Redirected from Buenos Aires rail disaster ) Jump to: navigation , search 2012 Buenos Aires rail disaster Details Date 22 February 2012 ( 2012-02-22 ) Time 08:33 ART Location Buenos Aires Country Argentina Rail line Sarmiento Line Operator Trenes de Buenos Aires Type of incident Train wreck Cause Under investigation Statistics Trains 1 Deaths 51 [ 1 ] Injuries 703 [ 2 ] The 2012 Buenos Aires rail disaster occurred on 22 February 2012, when a train crashed at Once Station ( Spanish : Estación Once de Setiembre ; IPA : [ ˈonse] ) in the Balvanera barrio of Buenos Aires in Argentina .

bound_targ_sent_spacy: 
2012 Buenos Aires rail disaster - Wikipedia , the free encyclopedia 2012 B

nug_text: train accident in Buenos Aires, Argentina.

sent_id (2): QUOTE ( MrBox2u @ Feb 22 2012, 04:09 PM ) A PACKED commuter train entering a Buenos Aires station at morning rush hour overnight smashed into a retaining wall, crumpling cars and leaving at least 49 dead, 600 injured and dozens trapped in the twisted wreckage.

bound_sent: 9 PM ) A PACKED commuter train entering a Buenos Aires station at morning rush hour overnight smash

bound_targ_sent: QUOTE ( MrBox2u @ Feb 22 2012, 04:09 PM ) A PACKED commuter train entering a Buenos Aires station at morning rush hour overnight smashed into a retaining wall, crumpling cars and leaving at least 49 dead, 600 injured and dozens trapped in the twisted wreckage.

bound_targ_sent_spacy: ( MrBox2u @ Feb 22 2012, 04:09 PM )

contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id (10): The victims of Buenos Aires Train Crash which has been evacuated, was 

nug_text: following the 2011 Flores rail crash

sent_id (1): 2011 Flores rail crash Track the most recent changes to the wiki in this feed.

bound_sent: 
2011 Flores rail crash

bound_targ_sent: 2011 Flores rail crash Track the most recent changes to the wiki in this feed.

bound_targ_sent_spacy: 
2011

contains: False

---------------------------------------------------------------
nug_text: 703 injured

sent_id (1): AGI.it - Train disaster in Argentina leaves 50 dead and 703 wounded Home Italy People Business World Sport RSS Contacts Home World Train disaster in Argentina leaves 50 dead and 703 wounded Share: Share Tweet 19:42 23 FEB 2012 ( AGI)Buenos Aires - The tally of victims in Wednesday's train crash in Buenos Aires has seemingly ended at 50 dead and 703 wounded.

bound_sent:  703 wounded

bound_targ_sent: AGI.it - Train disaster in Argentina leaves 50 dead and 703 wounded Home Italy People Business World Sport RSS Contacts Home World Train disaster in Argentina leaves 50 dead 

nug_text: Hundreds injured

sent_id (1): Buenos Aires - Argentine Train Crash Toll At 50, Hundreds Injured -- VosIzNeias .com Welcome , Guest! - Sign-In or Create an Account Easy to remember! » VinNews .com Home Archives About Us Contact Us Search : Buenos Aires - Argentine Train Crash Toll At 50, Hundreds Injured Published on: February 23, 2012 01:14 PM News Source: AP Text Size Email Post Print Post Comments (3) Save Article Buenos Aires - Argentines desperately searched hospitals Thursday in hopes that loved ones survived a train crash that killed 50 people and sent hundreds to emergency rooms.

bound_sent:  Hundreds Injured

bound_targ_sent: Buenos Aires - Argentine Train Crash Toll At 50, Hundreds Injured -- VosIzNeias .com Welcome , Guest! - Sign-In or Create an Account Easy to remember! » VinNews .com Home Archives About Us Contact Us Search : Buenos Aires - Argentine Train Crash Toll At 50, Hundreds Injured Published on: February 23, 2012 01:14 PM News Source: AP Text Size Emai

nug_text: train accident in Buenos Aires, Argentina.

sent_id (2): Photo : AFP Desperate families were searching for loved ones yesterday after a massive train crash in Buenos Aires killed 50 people, injured nearly 700 and left dozens trapped for hours in the wreckage.

bound_sent: æ¨ | æ éç©æ¨ | å¥½åº·å ±å ± | 

bound_targ_sent: Deadly train crash shocks Argentina - Taipei Times è ªç±é»å­å ± | å½±é³å¨æ¨ | æ éç©æ¨ | å¥½åº·å ±å ± | èªç±é¨è½ â  § World News Home Front Page Taiwan News Business Editorials Sports World News Features Bilingual Pages Home / World News Fri , Feb 24, 2012 - Page 7ã   News List Print Mail Facebook Twitter plurk funp Deadly train crash shocks Argentina SLOW , BUT DEADLY : The train was going 20kph when it entered Once station in Buenos Aires and failed to stop, buckling under the pressure and crushing cars AFP, BUENOS AIRES Police and rescue workers surround a train that crashed at Once train station in Buenos Aires , Argentina , on Wed

nug_text: train accident in Buenos Aires, Argentina.

sent_id (5): Civil defence officials said at least 550 people were injured in the crash, which witnesses said occurred after the train's breaks failed as it was arriving at a station on the western outskirts of Buenos Aires.

bound_sent: crash Classifieds | Archives | Jobs | About TGT | Contact | Subscribe | Last updated 14 minutes ago | TGT @ Twitter | RSS Feed | HOME LOCAL MIDEAST 

bound_targ_sent: gulftoday. ae | 49 dead, hundreds injured in Buenos Aires train crash Classifieds | Archives | Jobs | About TGT | Contact | Subscribe | Last updated 14 minutes ago | TGT @ Twitter | RSS Feed | HOME LOCAL MIDEAST ASIA WORLD BUSINESS SPORT OPINION WRITERS Twinge SHJ Pakistan | India Americas | United Kingdom Local | Regional | International | Viewpoint Local | The Aconcagua Diaries Editorial | Gallery | Letters | Send Letters 49 dead, hundreds injured in Buenos Aires train crash February 22, 2012 Print Send to Friend BUENOS AIRES : A pac

nug_text: Hundreds injured

sent_id (8): Injured passengers wait for medical attention at Once train in Buenos Aires , Feb. 22, 2012.

bound_sent: 
Deadly Train Crash

bound_targ_sent: Deadly Train Crash in Argentina A blog about the world, its people and its politics 1 / 10backnext Latin America | By Nicholas Hegel McClelland | February 22, 2012 | + Deadly Train Crash in Argentina Enrique Marcarian / Reuters Juan Mabromata / AFP / Getty Images More Photo Galleries View Again Photos from the Europe 's Deep Freeze Fat Tuesday Photos: Inside the Mardi Gras Mayhem Rio de Janeiro Celebrates Carnival Photos : Explosion by New Delhi's Israeli Embassy The Solemn Scene Outside Houston 's Funeral Black History Month: African Americans and the Great Depression Related Topics : Latin America Tweet emailprint share FacebookTwitterTumblrLinkedInStumbleUponRedditDiggMixxDel. i.cious Google Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour 

nug_text: train crashes into platform

sent_id (2): Election • Breaking Caste March 10: Letters to the editor In photos: Dozens killed, hundreds injured in Buenos Aires train crash Globe and Mail Update Published Wednesday, Feb. 22, 2012 1: 20PM EST Last updated Wednesday, Feb. 22, 2012 1:27PM EST 1 of 3 ( Julio Sanders / Reuters ) Hide caption Trapped passengers from a commuter train that crashed into the Once train station at rush hour are seen in a coach in Buenos Aires February 22, 2012.

bound_sent:  People at canada411.ca People by Can

bound_targ_sent: In photos: Dozens killed, hundreds injured in Buenos Aires train crash - The Globe and Mail Visit our mobile site The Globe and Mail Go to the Globe and Mail homepage Jump to main navigation Jump to main content Search: News Quote Web Businesses People Jobs News Search News Search Stock Quotes Quote Search The Web Search by Google Search People at canada411.ca People by Canada411. ca Search Businesses at yellowpages.ca Business by

nug_text: 49 confirmed deaths

sent_id (2): BUENOS AIRES - A packed commuter train plowed into the buffers at a Buenos Aires station during Wednesday's morning rush hour, killing 49 people and injuring more than 600 in Argentina 's worst rail accident in more than 30 years, officials said.

bound_sent: cs Defense Nationa

bound_targ_sent: Argentine train crashes killing 49 people - Set as Homepage Sat , Mar 10, 2012 16 Adar , 5772 Breaking News Diplomacy & Politics Defense National News Middle East International Iranian Threat Business Sports Health &amp ; Science Opinion Columnists Editorials Op- Eds Letters Jewish World Jewish News Jewish Features Judaism Lifestyle Arts &amp ; Culture Food &amp ; Wine Travel Real Estate Features 2012: The US Presidential race Inside Israel Insights & Features Week in review Blogs In the news Judaism From the Middle East Lifestyle Aliya Science and Technology Premium Zone The Experts The Jerusalem Report Dash 20 Questions e-paper Ivrit Magazine Metro 

nug_text: the train crashed at the buffer stop

sent_id (2): REUTERS/Enrique Marcarian Argentine train crash kills 49 people, hurts 600 Posted: Wednesday, 22 February 2012 11:25AM BUENOS AIRES ( Reuters ) - A packed commuter train plowed into the buffers at a Buenos Aires station during morning rush hour on Wednesday, killing at least 49 people and injuring more than 600 others, officials said.

bound_sent: mployment Opportunities MoDot R

bound_targ_sent: Wires Feed Reuters - World News - Moberly Portal Big K Sports Sports Schedule Smith Heating &amp ; Cooling Area Scores Sports Links Local Events Community Photos Employment Opportunities MoDot Road Conditions Ameren Outage Map KWIXland Cookbook Wedding Announcements Religious Services Country Showdown Trading Post Half Price Hump Day Business Directory Bids for Bargains KWIX AM1230 The Captain 99.9 KRES 104.7 KTCM Glory 97.3 Contest Rules Contact Us EEO File Commuters lie on stretchers after being injured when their train crashed int

nug_text: 676+ injuries

sent_id (7): Credit: Reuters/Enrique Marcarian By Hilary Burke and Magdalena Morales BUENOS AIRES | Thu Feb 23, 2012 3:05am IST BUENOS AIRES ( Reuters ) - A packed commuter train plowed into the buffers at a Buenos Aires station during Wednesday's morning rush hour, killing at least 49 people and injuring more than 600 in Argentina 's worst rail crash in three decades.

bound_sent:  Business Video Company

bound_targ_sent: Argentine commuter train crashes, killing 49 people | Reuters Edition: IN Africa Arabic Argentina Brazil Canada China France Germany Italy Japan Latin America Mexico Russia Spain United Kingdom United States Home Business Business Home Economy Technology Summits Summit Notebook Deals Business Video Company Results & Outlooks Markets Markets Home India Markets US Markets Indices Stocks Stock Screener Currencies Commodities Funds India Top News India Insight World World Home South Asia FaithWorld World Video Tech Technology Home MediaFile Scien

nug_text: 676+ injuries

sent_id (2): BUENOS AIRES - A packed commuter train plowed into the buffers at a Buenos Aires station during Wednesday's morning rush hour, killing 49 people and injuring more than 600 in Argentina 's worst rail accident in more than 30 years, officials said.

bound_sent: ws Middle East Internat

bound_targ_sent: Argentine train crashes killing 49 people - Set as Homepage Sat , Mar 10, 2012 16 Adar , 5772 Breaking News Diplomacy & Politics Defense National News Middle East International Iranian Threat Business Sports Health &amp ; Science Opinion Columnists Editorials Op- Eds Letters Jewish World Jewish News Jewish Features Judaism Lifestyle Arts &amp ; Culture Food &amp ; Wine Travel Real Estate Features 2012: The US Presidential race Inside Israel Insights & Features Week in review Blogs In the news Judaism From the Middle East Lifestyle Aliya Science and Technology Premium Zone The Experts The Jerusalem Report Dash 20 Questions e-paper Ivrit Magazine Metro I

nug_text: train accident in Buenos Aires, Argentina.

sent_id (4): At least 550 people were injured in the crash, which witnesses said occurred after the train's breaks failed as it was arriving at a station on the western outskirts of Buenos Aires.

bound_sent: y Balkan war B

bound_targ_sent: At least 49 dead in Buenos Aires train crash: police - FOCUS Information Agency Focus editions : Focus News Radio Focus Focus Sport Focus Livescore Focus press Focus Army Balkan war Bulgarian Fight flags AIDS - news 16:15 | 10.03.2012 Saturday Home Bulgaria Politics Business Finance European Union Police Southeast Europe and Balkans World Sports World Font size : Picture : AFP At least 49 dead in Buenos Aires train crash: police 22 February 2012 | 19:17 | FOCUS News Agency Home / World Buenos Aires .

bound_targ_sent_spacy: Focus News Radio Focus Focus Sport Focus Livescore Focus press Focus Army Balkan war

contains: False

---------------------------------------------------------------
nug_tex

nug_text: train accident in Buenos Aires, Argentina.

sent_id (7): Credit: Reuters/Enrique Marcarian By Hilary Burke and Magdalena Morales BUENOS AIRES | Thu Feb 23, 2012 3:05am IST BUENOS AIRES ( Reuters ) - A packed commuter train plowed into the buffers at a Buenos Aires station during Wednesday's morning rush hour, killing at least 49 people and injuring more than 600 in Argentina 's worst rail crash in three decades.

bound_sent: Mexico Russia Spain United Kingdom United States Home Bus

bound_targ_sent: Argentine commuter train crashes, killing 49 people | Reuters Edition: IN Africa Arabic Argentina Brazil Canada China France Germany Italy Japan Latin America Mexico Russia Spain United Kingdom United States Home Business Business Home Economy Technology Summits Summit Notebook Deals Business Video Company Results & Outlooks Markets Markets Home India Markets US Markets Indices Stocks Stock Screener Currencies Commodities Funds India Top News India Insight World World Home South A

nug_text: train accident in Buenos Aires, Argentina.

sent_id (2): 49 killed in Buenos Aires train crash – Ninemsn

bound_sent:  Buenos Aires train crash

bound_targ_sent: 49 killed in Buenos Aires train crash – Ninemsn Rent Apartment In Buenos Aires 49 killed in Buenos Aires train crash Ninemsn A packed commuter train slammed into a retaining wall at a railway terminus in Buenos Aires during rush hour Wednesday, leaving at least 49 dead, 550 injured, and dozens trapped in the wreckage.

bound_targ_sent_spacy: 
49 killed in Buenos Aires train crash –

contains: False

---------------------------------------------------------------
nug_text: the train was travelling too fast

sent_id (9): More from GlobalPost : 90 wounded in another crash in Buenos Aires The train came in too fast and hit the barrier at the end of the platform at about 12 mph, reported the AP.

bound_sent: e Suite Spot Search thi

bound_targ_sent: Deadly train crash in Buenos Aires ( VIDEO ) | GlobalPost Powerland The S

nug_text: 49 confirmed deaths

sent_id (4): Related The Lede Blog : Train Crash in Buenos Aires Kills At Least 49 (February 22, 2012) Connect With Us on Twitter Follow @nytimesworld for international breaking news and headlines.

bound_sent: - NYTimes .com Log

bound_targ_sent: Argentina Train Crash Kills Dozens and Injures 600 - NYTimes .com Log In Register Now Help Home Page Today's Paper Video Most Popular Times Topics Search All NYTimes .com Americas World Africa Americas Asia Pacific Europe Middle East U.S.

bound_targ_sent_spacy: 
Argentina Train Crash Kills Dozens and Injures 600 - NYTimes .com

contains: False

---------------------------------------------------------------
nug_text: February 22, 2012

sent_id (4): Related The Lede Blog : Train Crash in Buenos Aires Kills At Least 49 (February 22, 2012) Connect With Us on Twitter Follow @nytimesworld for international breaking news and headlines.

bound_sent: In Register Now He

bound_targ_sent: Argentina Train Crash Kills Doze

nug_text: train accident in Buenos Aires, Argentina.

sent_id (3): Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour in Buenos Aires February 22, 2012./ REUTERS Officials said faulty brakes were suspected of causing the accident and witnesses said the train hurtled into the buffers.

bound_sent: e | ABS - CBN News TV PATROL LIVE REPLAY BANDILA BMPM CURRENT AFFAIRS ANC DZM

bound_targ_sent: Argentine commuter train crashes, killing 49 people | ABS - CBN News TV PATROL LIVE REPLAY BANDILA BMPM CURRENT AFFAIRS ANC DZMM LIVE AUDIO LOCAL TV PATROL BICOL CAGAYAN VALLEY CENTRAL VISAYAS CHAVACANO ILOCOS NEGROS NORTH CENTRAL LUZON NORTHERN LUZON NORTHERN MINDANAO PANAY PAMPANGA PALAWAN SOCKSARGEN SOUTHERN MINDANAO SOUTHERN TAGALOG TACLOBAN ABS - CBN.COM ABS-CBN News | Latest Philippine Headlines, Breaking News, Video, Analysis, Features Search Search this site: Sign up Login Username : * Password : * Create new account Request new pas

nug_text: 49 confirmed deaths

sent_id (4): The World Crash at Buenos Aires rail terminus leaves 49 dead, 550 injured From: AFP February 23, 2012 8:25AM Increase Text Size Decrease Text Size Print Email Share Add to Digg Add to del. icio.us Add to Facebook Add to Kwoff Add to Myspace Add to Newsvine What are these?

bound_sent: 550 inju

bound_targ_sent: Crash at Buenos Aires rail terminus leaves 49 dead, 550 injured | The Australian Skip to: Main Content Site Navigation Site Footer Site Search Site Map Network Navigation (other sites) news. com. au Fox Sports CareerOne Carsguide RealEstate News Network The Australian News Opinion National Affairs Business Aus IT Higher Ed Media Sport Arts JOBS Latest Jobs careerone.com.au Job Search Employment News Salary Calculator Advertise your Job Magazines Careers SEARCH 5 Minutes 10 Minutes List Standard List View Video National The Australian news. com.au The Punch FOXSPORTS SportingPulse State and Territory The Telegraph Courier Mail Herald Su

nug_text: the train crashed at the buffer stop

sent_id (2): Wed Feb 22, 2012 2:31PM Share | Email | Print At least 49 people have been killed and over 600 others injured after a packed train slammed into the end of the line in the capital Buenos Aires ' busy Once station, officials said.

bound_sent:  Programs Documentaries Ù Ø§Ø±Ø³Û Iran

bound_targ_sent: ï»¿ PressTV - Argentina train crash leaves 49 dead, 600 injured Schedule Audio Stream Watch Live text only version Home News Programs Documentaries Ù Ø§Ø±Ø³Û Iran Middle East Britain United States Asia-Pacific Africa Europe Americas Arts Business Health Sci -Tech Society Sports Wednesday Feb 22, 2012 09:29 PM GMT Home &gt ; Americas &gt ; More From Americas Â » Back to Story Argentina train crash leaves 49 dead, 600 injured February 22, 2012 picture shows the commuter train that crashed into in the Argentinean capital Buenos Airesâ  Once train station at the rush hour after its brakes failed.

bound_targ_sent_spacy: 600 injure

nug_text: train accident in Buenos Aires, Argentina.

sent_id (7): (AP Photo / Argentina 's Press Office ) Argentine President Cristina Fernandez urged investigators Monday to determine responsibility for last week's deadly train crash soon, and hinted she may move toward renationalizing railways.

bound_sent: ng trains Argentine leader hints at renationalizing trains Login or Signup Email: Password : Forgot Password Facebook user?
You c

bound_targ_sent: Argentine leader hints at renationalizing trains Argentine leader hints at renationalizing trains Login or Signup Email: Password : Forgot Password Facebook user?

bound_targ_sent_spacy: 
Argentine leader hints at renationalizing trains Argentine leader hints at renationalizing trains Login or Signup Email:

contains: False

---------------------------------------------------------------
nug_text: 49 confirmed deaths

sent_id (4): Oldies 106.7 105.9 The Brew Z100 Wild 107.5 620 KPOJ K103 ADVERTISE National News Wednesday, February 22,

nug_text: 49 confirmed deaths

sent_id (1): 48 killed, hundreds injured in Argentinian train crash - Indiatalkies.com News at your Tips 48 killed, hundreds injured in Argentinian train crash February 22, 2012 | Accident / Crime / Disaster | Written by Kayanush Tweet Tweet Buenos Aires , Feb 22: At least 48 people were killed and hundreds wounded Wednesday in a commuter train crash at one of the three busiest stations in Argentine capital Buenos Aires, BBC reported, quoting the police.

bound_sent:  48 killed,

bound_targ_sent: 48 killed, hundreds injured in Argentinian train crash - Indiatalkies.com News at your Tips 48 killed, hundreds injured in Argentinian train crash February 22, 2012 | Accident / Crime / Disaster | Written by Kayanush Tweet Tweet Buenos Aires , Feb 22: At least 48 people were killed and hundreds wounded Wednesday in a commuter train crash at one of the three busiest stations in Argentine capital Buenos Aires, BBC reported, quoting the police.

bound_targ_sent_spac

nug_text: 676+ injuries

sent_id (2): At least 49 people were killed and more than 600 people were injured when a train plowed into a platform at a Buenos Aires station Wednesday, state media said.

bound_sent: 00WLW | THE BIG ONE Follow @700wlw

bound_targ_sent: 700WLW - THE BIG ONE LISTEN LIVE 700WLW | THE BIG ONE Follow @700wlw Free Live and Custom Radio advertisement | your ad here Sign Up | Edit Account | News Local News National News Entertainment News Music News News Video News &amp ; Traffic Staff Reporters' Notebook School Closings Showbiz Stuff Health News Weather Traffic Sports Sports Ohio Sports Headquarters Cincinnati Reds Reds Spring Training Cincinnati Bengals Kentucky Speedway University of Cincinnati Xavier University Ohio State Buckeyes UK Basketball Schedule Blogs Jim Scott Scott Sloan Bill Cunningham Eddie &amp ; Tracy Lance McAlister Lance 's Chat Marc Amazon Reds Spring Training Steve Sommers - ATN Darryl Parks Seg Dennison Mo Egger Showbiz Stuff Big One Restauran

nug_text: 49 confirmed deaths

sent_id (1): Over 600 injured and at least 49 killed in Argentina train crash | National News - News Radio 1190 & 102.3 KEX - Depend On Us.

bound_sent:  at least 49 killed

bound_targ_sent: Over 600 injured and at least 49 killed in Argentina train crash | National News - News Radio 1190 & 102.3 KEX - Depend On Us.

bound_targ_sent_spacy: 
Over 600 injured and at least 49 killed in Argentina train crash

contains: False

---------------------------------------------------------------
nug_text: 49 confirmed deaths

sent_id (1): 49 dead, hundreds injured in Buenos Aires train crash - Yahoo!

bound_sent: 
49 dead

bound_targ_sent: 49 dead, hundreds injured in Buenos Aires train crash - Yahoo!

bound_targ_sent_spacy: 
49 dead, hundreds injured in Buenos Aires train crash - Yahoo!


contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id (1): Hundreds injured in Buenos Aire

nug_text: Hundreds injured

sent_id (8): Picture: AP Source: AP At least 49 dead, hundreds injured in Argentina crash Train smashed into end of station platform Windows exploded, cars separated, people thrown A PACKED commuter train entering a Buenos Aires station at morning rush hour overnight smashed into a retaining wall, crumpling cars and leaving at least 49 dead, 600 injured and dozens trapped in the twisted wreckage.

bound_sent:  | Herald Sun Ski

bound_targ_sent: Argentinian train derailment injures 600 | Herald Sun Skip to: Main Content Site Navigation Site Footer Site Search Site Map Network Navigation (other sites) news. com. au Fox Sports CareerOne Carsguide RealEstate News Network Herald Sun News Sport AFL NRL Grand Prix Racing Olympics Soccer Cricket Rugby Gold Golf Motor Racing Tennis Netball More sports Sport Confidential OddsScanner Entertainment Confidential Arts Fashion Movies Music TV & Radio Events Business Breaking News Markets Dollar Worklife Your Business Terry

nug_text: Benavidez rail disaster in 1970

sent_id (31): Worst Crash Argentina ’s worst railway crash occurred in 1970, when two trains collided near Benavidez , 48 kilometers from Buenos Aires , killing more than 200 people.

bound_sent: 60 - Businessweek Bloomberg Businessweek Go To Bus

bound_targ_sent: Buenos Aires Commuter Train Crash Kills 49, Injures 460 - Businessweek Bloomberg Businessweek Go To Businessweek. com Bloomberg Businessweek Businessweek Global Economics Pakistan 's Textile Industry Is Dangerously Fragile Textile mills in Faisalabad are closing while workers riot Global Economics Mexico's Dangerous News China 's Slowing Growth Points to More Easing Ahead Return to Chernobyl Spanish Soccer's Economic Crisis Recent Mystery and Rumor Dominate China in the Time of Bo Rediscovering the Philippines Wouldn't You Really Rather Have a Bezos The Three Wedges That Separate Workers From Their Pay Why Lower Natural Gas Prices Help the U.S.

bound_targ_sent_spacy: 460 - Businessw

nug_text: unknown number were killed

sent_id (10): Dozens dead in rush hour train crash Argentina train crash kills more than 40 Passengers told reporters the crash sounded like a bomb blast.

bound_sent: 
Argentina d

bound_targ_sent: Argentina declares two-day mourning period after train crash kills 50 - CNN. com SET EDITION : U.S.

bound_targ_sent_spacy: 
Argentina declares two-day mourning period after train crash kills 50 - CNN.

contains: False

---------------------------------------------------------------
nug_text: 49 confirmed deaths

sent_id (8): Picture: AP Source: AP At least 49 dead, hundreds injured in Argentina crash Train smashed into end of station platform Windows exploded, cars separated, people thrown A PACKED commuter train entering a Buenos Aires station at morning rush hour overnight smashed into a retaining wall, crumpling cars and leaving at least 49 dead, 600 injured and dozens trapped in the twisted wreckage.

bound_sent: lf Motor Racing T

bound_targ_sent:

nug_text: 49 confirmed deaths

sent_id (1): 49 dead in Argentina train crash Accessibility options Buenos Aires train crash Argentina 49 dead in Argentina train crash World UK Sport Showbiz Business Technology Odd Paramedics carry away wounded passengers from a commuter train after a crash in Argentina ( AP ) Injured passengers lie on the platform in Buenos Aires ( AP ) Published: 2:07pm, 22nd February 2012 Updated: 6:46am, 23rd February 2012 A packed train has slammed into a barrier at a Buenos Aires station, killing 49 people and injuring hundreds of morning commuters.

bound_sent: 
49 dead 

bound_targ_sent: 49 dead in Argentina train crash Accessibility options Buenos Aires train crash Argentina 49 dead in Argentina train crash World UK Sport Showbiz Business Technology Odd Paramedics carry away wounded passengers from a commuter train after a crash in Argentina ( AP ) Injured passengers lie on the platform in Buenos Aires ( AP ) Published: 2:07pm, 22nd February 2012 Updated: 6:46a

nug_text: 550 injured

sent_id (3): The city emergency service confirmed so far 550 injured and at least 40 fatal casualties when a suburban train failed to break and ran into the buffers at the railway terminus.

bound_sent: s train crash

bound_targ_sent: PhotoBlog - Hundreds injured in Buenos Aires train crash MSN Hotmail More Autos My MSN Video Careers &amp ; Jobs Personals Weather Delish Quotes White Pages Games Real Estate Wonderwall Horoscopes Shopping Yellow Pages Local Edition Traffic Feedback Maps & Directions Travel Full MSN Index Bing msnbc. com sites & shows: TODAY Rock Center Nightly News Meet the Press Dateline Morning Joe Hardball Ed Maddow Last Word msnbc tv Home US World Politics Business Sports Entertainment Health Tech &amp ; science Travel Local Weather Advertise | AdChoices Recommended: Fukushima : Before, during and after Recommended: Sky lights go wild, north and south Recommended: Tsunami survivors: For a rice farmer, obstacles still ahead Recommended: Song , d

nug_text: February 22, 2012

sent_id (3): Rescue workers carry a passenger who was injured when a commuter train crashed into the Once train station during rush hour in Buenos Aires , February 22, 2012.

bound_sent:  our mobile site 

bound_targ_sent: Passengers say conductor struggled with train’s brakes before crash killed 49 - The Globe and Mail Visit our mobile site The Globe and Mail Go to the Globe and Mail homepage Jump to main navigation Jump to main content Search: News Quote Web Businesses People Jobs News Search News Search Stock Quotes Quote Search The Web Search by Google Search People at canada411.ca People by Canada411. ca Search Businesses at yellowpages.ca Business by Yellowpages. ca Search Jobs at eluta. ca Jobs by eluta. ca Login Register Select City Select City Calgary Halifax Montreal Toronto Vancouver More cities Home News Commentary Business Investing Sports Life Arts Technology Drive Site map National Politics World News Video Worldview Africa - Mideast Americas

nug_text: the train crashed at the buffer stop

sent_id (19): Earlier Wednesday, Schiavi said authorities believed there were problems with the train's brakes that caused it to smash into a barrier at the station .

bound_sent: t daylight saving time

bound_targ_sent: Argentina declares two-day mourning period after train crash kills 50 | News - Home Back To Mobile Site Learn about daylight saving time This Sunday our clocks will be rolling forwa… Bugs in kitchen?

bound_targ_sent_spacy: Learn about daylight saving time

contains: False

---------------------------------------------------------------
nug_text: cause reported as malfunction of railway brakes

sent_id (19): Earlier Wednesday, Schiavi said authorities believed there were problems with the train's brakes that caused it to smash into a barrier at the station .

bound_sent: ning period after train crash kills 50 | News - Home Back To Mobile Site Learn about dayl

bound_targ_sent: Argentina declares two-day mourning period af

nug_text: crashed at speed of 26 kilometers per hour

sent_id (6): The commuter train came in too fast and hit the barrier at the end of the platform at about 16 mph (26 kph ), smashing the front of the engine and crunching the leading cars behind it; one car penetrated nearly 20 feet (six meters) into the next, Argentina 's transportation secretary, J.P.

bound_sent:  Movies Cricket Good Times 

bound_targ_sent: 49 killed, 550 injured as train slams into station in Argentina NDTV Profit Khabar Movies Cricket Good Times Recipes Updated: March 10, 2012 23:55 IST Home Live TV Video India Elections Cities World Sports Tech Photos Trends Social Weather Apps Schedule Music Environment Polls Forums News Alerts You are here: Home » World » Services : Astrology | Shopping | B2B | Property | e -Learning | Classifieds | Loans | Gifts 49 killed, 550 injured as train slams into station in Argentina Associated Press , Updated : February 23, 2012 10:41 IST Tweet Buenos Aires : A packed train slammed

nug_text: third worst train accident in Argentina since the Benavidez rail disaster in 1970 and the "Estrella del Norte" in 1978.

sent_id (25): The worst train accidents in Argentine history include a 1970 crash that killed more than 230 people and another in 1978, in which about 55 died, local media said.

bound_sent: 
Argentine commuter train crashes, killing 49 people | World News | Comcast XFINITY Home TV Connect Account Shop Help Secu

bound_targ_sent: Argentine commuter train crashes, killing 49 people | World News | Comcast XFINITY Home TV Connect Account Shop Help Security Register Sign In Profile Set-up Loading Percentage View More Options Hi Sign Out Welcome Complete the XFINITY set up process so you can browse, watch and record your TV shows & movies anytime, anywhere.

bound_targ_sent_spacy: 
Argentine commuter train crashes, killing 49 people

contains: False

---------------------------------------------------------------
nug_text: Hundreds injured

sent_id (1): Hundreds

nug_text: train accident in Buenos Aires, Argentina.

sent_id (35): Transportation Secretary Juan Pablo Schiavi called the crash "very serious" and said there may be deaths, according to Telam ...Witnesses told local media... Train Crashes into Station Platform KSAZ At least 340 people were injured when a train crashed into a busy railway station platform in Buenos Aires on Wednesday, the La Nacion newspaper reported...However, Argentina 's transportation secretary, Juan Pablo Schiavi , said people could be dead... Several dead, 550 injured in Argentina train crash GMA News Several people were killed and 550 injured when a crowded passenger train slammed into the buffers at a railway station on Wednesday in the Argentine capital Buenos Aires, officials said. Many of those hurt in the rush-hour incident suffered... Hundreds injured in Argentina rail accident Al Jazeera 15 A packed train has crashed into the end of the track in a Buenos Aires railway station, injuring at least 340 mornin

nug_text: The locomotive and the first three cars were crushed.

sent_id (12): "Then I saw the engine destroyed and the train driver trapped amongst the steel.

bound_sent: ntina train crash 

bound_targ_sent: BBC News - Argentina train crash in Buenos Aires kills 49 BBC Accessibility links Skip to content Skip to local navigation Skip to bbc.co. uk navigation Skip to bbc.co. uk search Help Accessibility Help Latin America &amp ; Caribbean Home World UK England N.

bound_targ_sent_spacy: 
BBC News - Argentina train crash in Buenos Aires kills 49 BBC Accessibility links Skip to content Skip to local navigation Skip to

contains: False

---------------------------------------------------------------
nug_text: 49 confirmed deaths

sent_id (1): BBC News - Argentina train crash in Buenos Aires kills 49 BBC Accessibility links Skip to content Skip to local navigation Skip to bbc.co. uk navigation Skip to bbc.co. uk search Help Accessibility Help Latin America &amp ; Caribbean Home World UK E

nug_text: 49 confirmed deaths

sent_id (4): Study says it works 9 of 9 February 22, 2012 11:22 AM Print Text Federal police spokesman confirms 49 dead in Argentine train accident Add Comment Have Your Say Email Story Send to a Friend Share This Tell Your Friends Tweet This Tweet This More Share It .

bound_sent:  Evening

bound_targ_sent: Federal police spokesman confirms 49 dead in Argentine train accident - CBS News CBSNews.com | CBS Evening News | CBS This Morning | 48 Hours | 60 Minutes | Sunday Morning | Face the Nation | Up to the Minute Log In | Register Your Profile | Log Out CBS News.com World Video U.S.

bound_targ_sent_spacy: 
Federal police spokesman confirms 49 dead in Argentine train accident - CBS News CBSNews.com | CBS Evening News | CBS

contains: False

---------------------------------------------------------------
nug_text: February 22, 2012

sent_id (4): Study says it works 9 of 9 February 22, 2012 11:22 AM Print Text Federal police spokesman confirms 49 dead in Ar

nug_text: 3 children killed

sent_id (7): "Unfortunately, we must report that there are 49 dead in the accident," including a child, police spokesman Nestor Rodriguez told a news conference.

bound_sent: ews 49 dead, hundr

bound_targ_sent: 49 dead, hundreds injured in Buenos Aires train crash | Bangkok Post: news 49 dead, hundreds injured in Buenos Aires train crash | Bangkok Post: news Home Help Lite Version Log in Sign up Member benefit E - Paper SMS Print Front Page Newswire Print subscription RSS Advanced search news business opinion travel food lifestyle Arts & Culture feature learning tech property auto multimedia Video Photo Interactive Morning Focus Service Archive Directory Search Reader forum Classifieds Event calendar Hotel booking Member setting Local news Politics Security Crimes Transport Health Sports Asia World Investigative report Election News > World 49 dead, hundreds injured in Buenos Aires train crash Published: 23/02/2012 at 05:32 AM Online news: World Share Twee

nug_text: train crashes into platform

sent_id (4): By MICHAEL WARREN | The Associated Press First Published Feb 22 2012 08:07 am • Last Updated Feb 22 2012 07:15 pm Buenos Aires , Argentina • A train packed with morning commuters slammed into a downtown station on Wednesday, killing 49 people and injuring hundreds as passenger cars crumpled and windows exploded around them.

bound_sent:  Updated: 04:27 pm Salt Lake City

bound_targ_sent: Argentine train slams into station, killing 49 By MICHAEL WARREN The Associated Press Argentine train slams into station, killing 49 | The Salt Lake Tribune Nation + World | Last Updated: 04:27 pm Salt Lake City 58° Fair | Traffic Utah Nation + World Neighborhood Politics Justice Polygamy LDS Church Education UtahsRight.com McEntee Rolly Online Today News Utah Nation + World Neighborhood Politics Justice Polygamy LDS Church Education Weather UtahsRight.com McEntee Rolly Online Today Sports Preps College BYU Cougars Utah Utes PAC -12 USU Aggies Jazz RS

nug_text: train accident in Buenos Aires, Argentina.

sent_id (4): By MICHAEL WARREN | The Associated Press First Published Feb 22 2012 08:07 am • Last Updated Feb 22 2012 07:15 pm Buenos Aires , Argentina • A train packed with morning commuters slammed into a downtown station on Wednesday, killing 49 people and injuring hundreds as passenger cars crumpled and windows exploded around them.

bound_sent: station, killing 49 | The Salt Lake Tribune Nation + World | Last Updated: 04:27 pm Salt Lake City

bound_targ_sent: Argentine train slams into station, killing 49 By MICHAEL WARREN The Associated Press Argentine train slams into station, killing 49 | The Salt Lake Tribune Nation + World | Last Updated: 04:27 pm Salt Lake City 58° Fair | Traffic Utah Nation + World Neighborhood Politics Justice Polygamy LDS Church Education UtahsRight.com McEntee Rolly Online Today News Utah Nation + World Neighborhood Politics Justice Polygamy LDS Church Education Weather UtahsRight.com McEntee Rolly On

nug_text: Hundreds injured

sent_id (8): ( AP Photo / Leonardo Zavattaro , Telam) storyidforme : 26165304 tmspicid: 9504941 fileheaderid: 4359327 Article Extras Updated: February 22, 2012 3: 39PM BUENOS AIRES , Argentina ( AP) — A train packed with morning commuters slammed into a downtown station on Wednesday, killing 49 people and injuring hundreds as passenger cars crumpled and windows exploded around them.

bound_sent: aperville Sun Post

bound_targ_sent: Argentine train slams into station, killing 49 - Chicago Sun-Times Metering is ON Contact Subscribe E-paper TV Weekly Reader Services Advertise with Us Select a Publication or Site Daily Publications Chicago Sun-Times The Beacon News The Courier News The Herald News Lake County News-Sun The Naperville Sun Post-Tribune The SouthtownStar Pioneer Press - Pioneer Local PioneerLocal.com Barrington Courier Review Buffalo Grove Countryside The Doings Claredon Hills Edition The Doings Hinsdale Edition The Doings La Grange Edition The Doin

nug_text: train accident in Buenos Aires, Argentina.

sent_id (2): Some 550 others were injured The Breaking News Dashboard Officials fear up to 40 dead following commuter train crash in Buenos Aires , # Argentina .

bound_sent:  others were injured The Breaking News Dashbo

bound_targ_sent: Some 550 others were injured The Breaking News Dashboard Officials fear up to 40 dead following commuter train crash in Buenos Aires , # Argentina .

bound_targ_sent_spacy: Some 550 others were injured The Breaking News Dashboard Officials fear up to 40 dead following commuter train crash in Buenos Aires , # Argentina .


contains: False

---------------------------------------------------------------
nug_text: 550 injured

sent_id (1): AGI.it - Train crash in Argentina caused many dead and 550 injured Home Italy People Business World Sport RSS Contacts Home World Train crash in Argentina caused many dead and 550 injured Share: Share Tweet 16:33 22 FEB 2012 ( AGI)Buenos Aires -The causualty toll of

nug_text: The locomotive and the first three cars were crushed.

sent_id (8): ( AP Photo / Leonardo Zavattaro , Telam) storyidforme : 26165304 tmspicid: 9504941 fileheaderid: 4359327 Article Extras Updated: February 22, 2012 3: 39PM BUENOS AIRES , Argentina ( AP) — A train packed with morning commuters slammed into a downtown station on Wednesday, killing 49 people and injuring hundreds as passenger cars crumpled and windows exploded around them.

bound_sent: SouthtownStar 

bound_targ_sent: Argentine train slams into station, killing 49 - Chicago Sun-Times Metering is ON Contact Subscribe E-paper TV Weekly Reader Services Advertise with Us Select a Publication or Site Daily Publications Chicago Sun-Times The Beacon News The Courier News The Herald News Lake County News-Sun The Naperville Sun Post-Tribune The SouthtownStar Pioneer Press - Pioneer Local PioneerLocal.com Barrington Courier Review Buffalo Grove Countryside The Doings Claredon Hills Edition The Doings Hinsdale Edition The 

nug_text: 49 confirmed deaths

sent_id (1): Argentina train crash leaves 49 dead - Yahoo!

bound_sent:  49 dead

bound_targ_sent: Argentina train crash leaves 49 dead - Yahoo!

bound_targ_sent_spacy: 
Argentina train crash leaves 49 dead - Yahoo!


contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id (1): Argentina train crash leaves 49 dead - Yahoo!

bound_sent: 
Argentina train crash

bound_targ_sent: Argentina train crash leaves 49 dead - Yahoo!

bound_targ_sent_spacy: 
Argentina train crash leaves 49 dead - Yahoo!


contains: False

---------------------------------------------------------------
nug_text: 50 confirmed dead

sent_id (1): 50 dead, over 500 wounded in Buenos Aires !

bound_sent: 
50 dead

bound_targ_sent: 50 dead, over 500 wounded in Buenos Aires !

bound_targ_sent_spacy: 
50 dead, over 500 wounded in Buenos Aires !


contains: False

---------------------------------------------

nug_text: train crashes into platform

sent_id (5): Argentina train crashes into station, several killed south of Buenos Aires.

bound_sent: res Train Crash Hot Topics 

bound_targ_sent: Buenos Aires Train Crash Hot Topics Tracking, Popular Topics 49 dead, hundreds injured in Buenos Aires train crash "The train was full and the impact was tremendous," a passenger identified only as Ezequiel told local television, adding that medics at the scene appeared overwhelmed by the scale of the disaster.

bound_targ_sent_spacy: 
Buenos Aires Train Crash Hot Topics Tracking, Popular Topics 49 dead, hundreds injured in Buenos Aires train crash

contains: False

---------------------------------------------------------------
nug_text: unknown number were killed

sent_id (5): Argentina train crashes into station, several killed south of Buenos Aires.

bound_sent: racking, Popula

bound_targ_sent: Buenos Aires Train Crash Hot Topics Tracking, Popular Topics 49 dead, hundreds injured in Buenos Aires t

nug_text: 49 confirmed deaths

sent_id (1): Buenos Aires Train Crash Hot Topics Tracking, Popular Topics 49 dead, hundreds injured in Buenos Aires train crash "The train was full and the impact was tremendous," a passenger identified only as Ezequiel told local television, adding that medics at the scene appeared overwhelmed by the scale of the disaster.

bound_sent:  49 dead

bound_targ_sent: Buenos Aires Train Crash Hot Topics Tracking, Popular Topics 49 dead, hundreds injured in Buenos Aires train crash "The train was full and the impact was tremendous," a passenger identified only as Ezequiel told local television, adding that medics at the scene appeared overwhelmed by the scale of the disaster.

bound_targ_sent_spacy: 
Buenos Aires Train Crash Hot Topics Tracking, Popular Topics 49 dead, hundreds injured in Buenos Aires train crash

contains: False

---------------------------------------------------------------
nug_text: Hundreds injured

sent_id (1): Buenos Aires Train Crash Ho

nug_text: Hundreds injured

sent_id (20): The commuter train came in too fast and hit the barrier at the end... At least 49 killed, hundreds injured in Argentina train crash CNN Blog ET] At least 49 people were killed and hundreds of people were injured Wednesday morning when a train packed with rush-hour commuters plowed head-on into a barrier at a station in Buenos Aires , Argentina , officials said.

bound_sent: form... Loading new incentive en

bound_targ_sent: ( Tragic Pictures ) Train Accident Killed Dozens in Argentina Loading activation form... Loading login form... Loading sign up form... Loading activation form... Loading password form... Loading new incentive enroll form... Loading invite contacts form... Invite your friends Please enter an optional message below, check the friends you want to invite, hit "Send Invitation," and you're done!

bound_targ_sent_spacy: Loading password form...

contains: True

---------------------------------------------------------------
nug_te

nug_text: February 22, 2012

sent_id (18): At least 49 people have been reported dead and more than 600 have sustained injuries, according to Telam , Argentina 's... Argentine train slams into station, killing 49 San Diego Union-Tribune Firemen rescue wounded passengers from a commuter train after a collision in Buenos Aires , Argentina , Wednesday Feb. 22, 2012.

bound_sent: e below, check t

bound_targ_sent: ( Tragic Pictures ) Train Accident Killed Dozens in Argentina Loading activation form... Loading login form... Loading sign up form... Loading activation form... Loading password form... Loading new incentive enroll form... Loading invite contacts form... Invite your friends Please enter an optional message below, check the friends you want to invite, hit "Send Invitation," and you're done!

bound_targ_sent_spacy: Invite your friends Please enter an optional message below, check the friends you want to invite, hit "Send Invitation," and you're done!


contains: True

------------

nug_text: 550 injured

sent_id (25): At least forty-nine people are dead and approximately six hundred are injured after a train derailed during the morning rush hour in Argentina 's Buenos Aires province: ... Several dead, 550 injured in Argentina train crash - InterAksyon .com www.interaksyon .com Several people were killed and 550 injured when a crowded passenger train slammed into the buffers at a railway station on Wednesday in the Argentine capital Buenos Aires, officials said. ... with serious accidents in recent years.

bound_sent:  Loading new

bound_targ_sent: ( Tragic Pictures ) Train Accident Killed Dozens in Argentina Loading activation form... Loading login form... Loading sign up form... Loading activation form... Loading password form... Loading new incentive enroll form... Loading invite contacts form... Invite your friends Please enter an optional message below, check the friends you want to invite, hit "Send Invitation," and you're done!

bound_targ_sent_spacy: Loadi

nug_text: the train crashed at the buffer stop

sent_id (22): The train, which was overcrowded with more than a thousand passengers,... Argentine train crash kills 49 people, hurts 600 Zee News They said the train was unable to stop, presumably due to faulty brakes, and it slammed into the buffers inside the centrally located "Once" station.

bound_sent:  enroll form... Loading invi

bound_targ_sent: ( Tragic Pictures ) Train Accident Killed Dozens in Argentina Loading activation form... Loading login form... Loading sign up form... Loading activation form... Loading password form... Loading new incentive enroll form... Loading invite contacts form... Invite your friends Please enter an optional message below, check the friends you want to invite, hit "Send Invitation," and you're done!

bound_targ_sent_spacy: Loading new incentive enroll form...

contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_

nug_text: train accident in Buenos Aires, Argentina.

sent_id (1): Argentina train crash in Buenos Aires 'kills dozens' Daily Hot News updated BUENOS AIRES – A packed commuter train plowed into the buffers at a Buenos Aires station during Wednesday's morning rush hour, killing 49 people and injuring more than 600 in Argentina 's worst rail accident in more than 30 years, officials said.

bound_sent: 
Argentina train crash in Buenos Aires 

bound_targ_sent: Argentina train crash in Buenos Aires 'kills dozens' Daily Hot News updated BUENOS AIRES – A packed commuter train plowed into the buffers at a Buenos Aires station during Wednesday's morning rush hour, killing 49 people and injuring more than 600 in Argentina 's worst rail accident in more than 30 years, officials said.

bound_targ_sent_spacy: 
Argentina train crash in Buenos Aires 'kills dozens' Daily Hot News updated BUENOS

contains: False

---------------------------------------------------------------
nug_text: 49 confirmed dea

nug_text: missing his stopping marks at each station

sent_id (22): Some passengers reported signs the conductor was struggling with the brakes before the crash, saying he kept overshooting platforms and missed one entirely.

bound_sent: .. Subscribe | Today's Classifieds | Place Ad | Jobs 

bound_targ_sent: Argentine train slams into station; 49 die Close Sun Subscriber Login Username: Password : Please wait.... Subscribe | Today's Classifieds | Place Ad | Jobs | Real Estate | More Classifieds | Bartow Weather BARTOW LAKE WALES LAKE PLACID FORT MEADE FROSTPROOF HAINES CITY News Story Back Print Email Updated: 02/22/2012 10:24:03PM Argentine train slams into station; 49 die Share this story: Share Firemen rescue wounded passengers from a commuter train after a collision in Buenos Aires , Argentina , Wednesday Feb. 22, 2012.

bound_targ_sent_spacy: Subscribe |

contains: False

---------------------------------------------------------------
nug_text: cause reported as malfunction of rai

nug_text: train accident in Buenos Aires, Argentina.

sent_id (11): (AP Photo / Leonardo Zavattaro , Telam ) A wounded passenger waits to be rescued from a commuter train after a collision in Buenos Aires , Argentina , Wednesday Feb. 22, 2012.

bound_sent: un Subscriber Login Username: Password : Please wait.... Subscribe | Today's Classifieds | Place Ad

bound_targ_sent: Argentine train slams into station; 49 die Close Sun Subscriber Login Username: Password : Please wait.... Subscribe | Today's Classifieds | Place Ad | Jobs | Real Estate | More Classifieds | Bartow Weather BARTOW LAKE WALES LAKE PLACID FORT MEADE FROSTPROOF HAINES CITY News Story Back Print Email Updated: 02/22/2012 10:24:03PM Argentine train slams into station; 49 die Share this story: Share Firemen rescue wounded passengers from a commuter train after a collision in Buenos Aires , Argentina , Wednesday Feb. 22, 2012.

bound_targ_sent_spacy: Sun Subscriber Login Username:

contains: False

--------------------------

nug_text: train accident in Buenos Aires, Argentina.

sent_id (6): Feb. 22 – Hundreds injured as rescue workers continue to pull trapped people from the wreckage of a Buenos Aires train crash.

bound_sent: 2012 at 9:44 AM EST OFF TH

bound_targ_sent: Argentina train crash in Buenos Aires 'kills dozens' Latest... By Agence France-PresseWed , Feb 22 2012 at 9:44 AM EST OFF THE RAILS: Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour in Buenos Aires .

bound_targ_sent_spacy: By Agence France-PresseWed , Feb 22 2012 at 9:44 AM EST OFF THE RAILS: Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour in Buenos Aires .


contains: False

---------------------------------------------------------------
nug_text: 550 injured

sent_id (5): Transport Secretary Juan Pablo Schiavi confirmed that 340 people were injured and Buenos Aires train crash leaves at least 40 dead and over 500

nug_text: 49 confirmed deaths

sent_id (1): Argentina train crash ‘kills 49 ′ Morning Tea - News Headlines A commuter train crash at a station in the Argentine capital, Buenos Aires , kills 49 people and leaves at least 600 injured, officials say.

bound_sent:  kills 49 people 

bound_targ_sent: Argentina train crash ‘kills 49 ′ Morning Tea - News Headlines A commuter train crash at a station in the Argentine capital, Buenos Aires , kills 49 people and leaves at least 600 injured, officials say.

bound_targ_sent_spacy: A commuter train crash at a station in the Argentine capital, Buenos Aires , kills 49 people and leaves at least 600 injured, officials say.


contains: False

---------------------------------------------------------------
nug_text: 676+ injuries

sent_id (2): It was Argentina 's worst train accident in BUENOS AIRES — A packed commuter train slammed into a retaining wall at a railway terminus in Buenos Aires during rush hour Wednesday, leaving at least 49 dead, 600 inju

nug_text: Hundreds injured

sent_id (15): From the Web By Michael Warren ASSOCIATED PRESS E-mail Print Larger Type Small Type BUENOS AIRES , Argentina — A train packed with morning commuters slammed into a downtown station Wednesday, killing 49 people and injuring hundreds as passenger cars crumpled and windows exploded around them.

bound_sent: S HOOKEM.COM ENTER

bound_targ_sent: SUBSCRIBE TODAY PRINT OR E-EDITION JOBS HOMES CARS CLASSIFIEDS HOOKEM.COM ENTERTAINMENT NEWS Home NEWS Local News VIRTUAL CAPITOL Perry Files PolitiFact Texas STATE Commuter Rail Got a news tip?

bound_targ_sent_spacy: HOMES CARS CLASSIFIEDS HOOKEM.COM

contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id (15): From the Web By Michael Warren ASSOCIATED PRESS E-mail Print Larger Type Small Type BUENOS AIRES , Argentina — A train packed with morning commuters slammed into a downtown station Wednesday, killing 49 people an

nug_text: 49 confirmed deaths

sent_id (7): World A: larger smaller reset Email Print Argentine commuter train crashes, killing 49 people UPDATED @ 05:43:53 PM 23-02-2012 February 23, 2012 Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour in Buenos Aires Feb 22, 2012. — Reuters pic BUENOS AIRES , Feb 23 — A packed commuter train ploughed into the buffers at a Buenos Aires station during yesterday's morning rush hour, killing at least 49 people and injuring more than 600 in Argentina 's worst rail crash in three decades.

bound_sent: o form coalition Prove PMO 

bound_targ_sent: Main - World - Argentine commuter train crashes, killing 49 people @ Thu Feb 23 2012 7-day Archive: Mon Tues Wed Thurs Fri Sat Sun Saturday, 10 March 2012 Last Update : 04:53pm Weather | Kuala Lumpur 29 °C News Malaysia Business World Showbiz Sports Features Opinion Bahasa Food Books Tech Drive Travel Gallery More Most ETP targets exceeded, says Idris 

nug_text: train accident in Buenos Aires, Argentina.

sent_id (7): World A: larger smaller reset Email Print Argentine commuter train crashes, killing 49 people UPDATED @ 05:43:53 PM 23-02-2012 February 23, 2012 Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour in Buenos Aires Feb 22, 2012. — Reuters pic BUENOS AIRES , Feb 23 — A packed commuter train ploughed into the buffers at a Buenos Aires station during yesterday's morning rush hour, killing at least 49 people and injuring more than 600 in Argentina 's worst rail crash in three decades.

bound_sent: , says Idris Jala Bersih says deportation shows Taib fears

bound_targ_sent: Main - World - Argentine commuter train crashes, killing 49 people @ Thu Feb 23 2012 7-day Archive: Mon Tues Wed Thurs Fri Sat Sun Saturday, 10 March 2012 Last Update : 04:53pm Weather | Kuala Lumpur 29 °C News Malaysia Business World Showbiz Sports Features Opinion Bahasa Food Books Tech Drive Trav

nug_text: 49 confirmed deaths

sent_id (4): A PACKED commuter train entering a Buenos Aires station at morning rush hour overnight smashed into a retaining wall, crumpling cars and leaving at least 49 dead, 600 injured and dozens trapped in the twisted wreckage.

bound_sent: rk Navigation (ot

bound_targ_sent: Argentina train crash kills 49, injures 600 | thetelegraph. com. au Skip to: Main Content Site Navigation Site Footer Site Search Site Map Network Navigation (other sites) news. com. au Fox Sports CareerOne Carsguide RealEstate News Network thetelegraph. com.au News Sport NRL SuperCoach Cricket Rugby Gold Soccer AFL Racing Olympics F1 Motor Golf Tennis Boxing/MMA Galleries More OddsScanner Entertainment Sydney Confidential Insider Galleries Music Movies Television Fashion What's On Sydney Festival 2012 Deluxe Sydney Business Business Breaking News Markets Dollar Worklife Entrepreneur Archive Money Banking Property Money Matters Superannuation Investing Interest Rates Guides & Too

nug_text: 49 confirmed deaths

sent_id (8): Picture: AP Source: AP At least 48 dead, hundreds injured in Argentina crash Train smashed into end of station platform Windows exploded, cars separated, people thrown A PACKED train has slammed into the end of the line in Buenos Aires ' busy Once station, killing 49 people and injuring hundreds of morning commuters in Argentina 's worst train accident in decades.

bound_sent: Sports CareerOne Carsguide RealEstate News Network Herald Sun News Sport AFL NRL 

bound_targ_sent: Argentinian train derailment injures 550 | Herald Sun Skip to: Main Content Site Navigation Site Footer Site Search Site Map Network Navigation (other sites) news. com. au Fox Sports CareerOne Carsguide RealEstate News Network Herald Sun News Sport AFL NRL Grand Prix Racing Olympics Soccer Cricket Rugby Gold Golf Motor Racing Tennis Netball More sports Sport Confidential OddsScanner Entertainment Confidential Arts Fashion Movies Music TV & Radio Events Business Breaking N

nug_text: the train crashed at the buffer stop

sent_id (8): Picture: AP Source: AP At least 48 dead, hundreds injured in Argentina crash Train smashed into end of station platform Windows exploded, cars separated, people thrown A PACKED train has slammed into the end of the line in Buenos Aires ' busy Once station, killing 49 people and injuring hundreds of morning commuters in Argentina 's worst train accident in decades.

bound_sent: erOne Carsguide RealEstate News N

bound_targ_sent: Argentinian train derailment injures 550 | Herald Sun Skip to: Main Content Site Navigation Site Footer Site Search Site Map Network Navigation (other sites) news. com. au Fox Sports CareerOne Carsguide RealEstate News Network Herald Sun News Sport AFL NRL Grand Prix Racing Olympics Soccer Cricket Rugby Gold Golf Motor Racing Tennis Netball More sports Sport Confidential OddsScanner Entertainment Confidential Arts Fashion Movies Music TV & Radio Events Business Breaking News Markets Dollar Worklife You

nug_text: at Once Station

sent_id (2): Trapped passengers from a commuter train that crashed into the Once train station at rush hour are seen in a coach in Buenos Aires February 22, 2012.

bound_sent: gentina ( CNN) — A 

bound_targ_sent: 49 killed in Argentina train crash newspaper Buenos Aires , Argentina ( CNN) — A commuter train plowed into a barrier at a Buenos Aires station Wednesday, killing nearly 49 people and injuring hundreds, officials said.

bound_targ_sent_spacy: 
49 killed in Argentina train crash newspaper Buenos Aires , Argentina ( CNN) —

contains: False

---------------------------------------------------------------
nug_text: train crashes into platform

sent_id (2): Trapped passengers from a commuter train that crashed into the Once train station at rush hour are seen in a coach in Buenos Aires February 22, 2012.

bound_sent:  newspaper Buenos Aires , Argentina ( CNN) — A c

bound_targ_sent: 49 killed in Argentina train crash newspaper Buenos Aires , Argentina ( 

nug_text: 676+ injuries

sent_id (21): A commuter train plowed into the buffers at a Buenos Aires station during Wednesday's morning rush hour, killing 49 people and injuring at least 600 officials said.

bound_sent:  Newscom FocalPoint Ca

bound_targ_sent: Deadly Train Crash in Argentina | Newscom FocalPoint Newscom FocalPoint A view from both sides of the photo market Pages About Newscom FocalPoint Categories Bet You Didn't Know Newscom Had This on Their Site… Celebrity and Entertainment Content Partners Creative Photo Sets Around the World Cute Animal Pictures Stock Photography Guest Blog Holiday and Anniversary In the News Nature and the Environment Newscom News Most Downloaded Tips 'n Tricks On the 2012 Election Trail Pictures of the Week Sports 2012 Olympics RSS Deadly Train Crash in Argentina Rescuers take wounded persons out of a carriage at the site of train derailling in Buenos Aires , capital of Argentina , Feb. 22, 2012.

bound_targ_sent_spacy: About Newscom FocalPoint Cate

nug_text: cause reported as malfunction of railway brakes

sent_id (3): Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour in Buenos Aires February 22, 2012./ REUTERS Officials said faulty brakes were suspected of causing the accident and witnesses said the train hurtled into the buffers.

bound_sent:  VALLEY CENTRAL VISAYAS CHAVACANO ILOCOS NEGROS NORTH 

bound_targ_sent: Argentine commuter train crashes, killing 49 people | ABS - CBN News TV PATROL LIVE REPLAY BANDILA BMPM CURRENT AFFAIRS ANC DZMM LIVE AUDIO LOCAL TV PATROL BICOL CAGAYAN VALLEY CENTRAL VISAYAS CHAVACANO ILOCOS NEGROS NORTH CENTRAL LUZON NORTHERN LUZON NORTHERN MINDANAO PANAY PAMPANGA PALAWAN SOCKSARGEN SOUTHERN MINDANAO SOUTHERN TAGALOG TACLOBAN ABS - CBN.COM ABS-CBN News | Latest Philippine Headlines, Breaking News, Video, Analysis, Features Search Search this site: Sign up Login Username : * Password : * Create new account Request new password Home Nation 

nug_text: February 22, 2012

sent_id (40): Find it on Newscom : rtrlfive083012 Feb. 22, 2012 - Buenos Aires , Argentina - Rescuers take wounded persons out of a carriage at the site of train derailing.

bound_sent: ewscom FocalPo

bound_targ_sent: Deadly Train Crash in Argentina | Newscom FocalPoint Newscom FocalPoint A view from both sides of the photo market Pages About Newscom FocalPoint Categories Bet You Didn't Know Newscom Had This on Their Site… Celebrity and Entertainment Content Partners Creative Photo Sets Around the World Cute Animal Pictures Stock Photography Guest Blog Holiday and Anniversary In the News Nature and the Environment Newscom News Most Downloaded Tips 'n Tricks On the 2012 Election Trail Pictures of the Week Sports 2012 Olympics RSS Deadly Train Crash in Argentina Rescuers take wounded persons out of a carriage at the site of train derailling in Buenos Aires , capital of Argentina , Feb. 22, 2012.

bound_targ_sent_spacy: Newscom FocalPoint Newscom FocalPoint



nug_text: train accident in Buenos Aires, Argentina.

sent_id (35): Find it on Newscom : rtrlfive082472 Rescue members search for wounded people at the site of train derailling in Buenos Aires , capital of Argentina , Feb. 22, 2012.

bound_sent: des of the photo market Pages About Newscom FocalPoint C

bound_targ_sent: Deadly Train Crash in Argentina | Newscom FocalPoint Newscom FocalPoint A view from both sides of the photo market Pages About Newscom FocalPoint Categories Bet You Didn't Know Newscom Had This on Their Site… Celebrity and Entertainment Content Partners Creative Photo Sets Around the World Cute Animal Pictures Stock Photography Guest Blog Holiday and Anniversary In the News Nature and the Environment Newscom News Most Downloaded Tips 'n Tricks On the 2012 Election Trail Pictures of the Week Sports 2012 Olympics RSS Deadly Train Crash in Argentina Rescuers take wounded persons out of a carriage at the site of train derailling in Buenos Aires , capital of Argentina , Feb. 

nug_text: 550 injured

sent_id (3): [ Read More ] 49 Die in Argentine Train Crash – Train hits station in 6th incident in … 5 hours ago … A train full of people crashed into a station in Buenos Aires today, injuring at least 550 people and killing 48 adults and one child in Argentina 's … [Read More ] Raw Video : Dozens dead in Argentina train crash – Yahoo !

bound_sent:  crash at a station in the Ar

bound_targ_sent: argentina train crash Reporting on news impacting Indians across the globe BBC News – Argentina train crash in Buenos Aires 'kills dozens' 10 hours ago … A commuter train crash at a station in the Argentine capital, Buenos Aires , kills 49 people and leaves at least 600 injured, officials say.

bound_targ_sent_spacy: A commuter train crash at a station in the Argentine capital, Buenos Aires , kills 49 people and leaves at least 600 injured, officials say.


contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos

nug_text: 49 confirmed deaths

sent_id (2): Classifieds Jobs Cars Property Shops Microsites Parliament Motoring Home Breaking News World Story 49 dead, hundreds injured in Buenos Aires train crash Published on Feb 23, 2012 Purchase this article for republication Buy SPH photos Firemen rescue wounded passengers from a commuter train after a collision in Buenos Aires , Argentina , Wednesday Feb 22, 2012.

bound_sent: pore wea

bound_targ_sent: 49 dead, hundreds injured in Buenos Aires train crash Last updated at 7.40 pm Reader E-paper Singapore weather 25° C - 30° C Razor TV Top Stories News Current Affairs Entertainment Lifestyle Food Fashion Popular Stomp What's Hot Club Stomp Talkback I Say, You Say Youthphoria Court Room Ask Libby Getai A -go-go Love Stories DIY Video Reallife. sg Hey Goondus !

bound_targ_sent_spacy: Reader E-paper Singapore

contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id

nug_text: at Once Station

sent_id (1): Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour in Buenos Aires | Deadly Train Crash in Argentina | Global Spin | TIME. com TIME Magazine Subscribe Photos Videos Lists Apps Life. com Style Follow TIME Facebook Twitter Google + Tumblr NewsFeed U.S.

bound_sent:  Once train station

bound_targ_sent: Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour in Buenos Aires | Deadly Train Crash in Argentina | Global Spin | TIME. com TIME Magazine Subscribe Photos Videos Lists Apps Life. com Style Follow TIME Facebook Twitter Google + Tumblr NewsFeed U.S.

bound_targ_sent_spacy: 
Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour in Buenos Aires | Deadly Train Crash in Argentina

contains: False

---------------------------------------------------------------
nug_text: train cra

nug_text: at Once Station

sent_id (20): REUTERS Rescue workers extract a passenger from a commuter train that crashed into the Once train station at rush hour in Buenos Aires Wednesday.

bound_sent: ter train 

bound_targ_sent: At least 49 dead, 550 injured after Argentina train crash BUENOS AIRES — A packed commuter train entering a station at morning rush hour Wednesday suddenly smashed into a retaining wall, crumpling cars and leaving at least 49 dead, 550 injured and dozens trapped in the twisted wreckage.

bound_targ_sent_spacy: A packed commuter train entering a station at morning rush hour Wednesday suddenly smashed into a retaining wall, crumpling cars and leaving at least 49 dead, 550 injured and dozens trapped in the twisted wreckage.


contains: False

---------------------------------------------------------------
nug_text: train accident in Buenos Aires, Argentina.

sent_id (20): REUTERS Rescue workers extract a passenger from a commuter train that crashed into the Once t

nug_text: worst train accident in Argentina since 1970

sent_id (4): That makes it Argentina ’s worst train accident since February 1, 1970, when a train smashed into another at full speed in suburban Buenos Aires , killing 200 people.

bound_sent: n crash kills 49 in Buenos Aires JavaScript is turned off i

bound_targ_sent: Commuter train crash kills 49 in Buenos Aires JavaScript is turned off in your browser To view afr.com website correctly and with full functionality requires your browser be JavaScript enabled.

bound_targ_sent_spacy: 
Commuter train crash kills 49 in Buenos Aires JavaScript is turned off in your browser

contains: False

---------------------------------------------------------------
nug_text: at Once Station

sent_id (2): Click here for help » advanced search Subscribe Sign In ASIC Search Mobile TV Today's Paper My Account My Portfolio My Alerts Home National World Business Technology Markets Personal Finance Opinion Lifestyle World America Decides advertising Co

nug_text: third worst train accident in Argentina since the Benavidez rail disaster in 1970 and the "Estrella del Norte" in 1978.

sent_id (17): The worst accidents in Argentine history include a 1970 crash that killed more than 230 people and another in 1978, in which about 55 died, local media said.

bound_sent: 
BusinessDay - Dozens dead, hundreds injured in Buenos Aires train crash Other BDFM titles Quick Links Log in | Reg

bound_targ_sent: BusinessDay - Dozens dead, hundreds injured in Buenos Aires train crash Other BDFM titles Quick Links Log in | Register | Subscribe to E-Edition My Portfolio | | Logout | Edit | | My Portfolio Company Admin View more cities | View 7 day forecast Home Opinion & Analysis Companies Markets Economy &amp ; Business Mining Sport Tech Business Life Tools DISASTER: A view of the front of a commuter train that crashed into the Once train station in Buenos Aires during rush hour.

bound_targ_sent_spacy: 
BusinessDay - Dozens dead, hundreds injured in Bue

nug_text: Tel Aviv bus bombing

sent_id (1): Israel arrests suspects in Tel Aviv bus bombing | UnFox News Login | Create a free account Home World US Science Technology Green Politics Atheism Religion News Search Israel arrests suspects in Tel Aviv bus bombing JERUSALEM ( Reuters ) - Israeli authorities arrested an Israeli Arab on suspicion of planting a bomb in a Tel Aviv bus that wounded 15 people hours before Israel agreed a ceasefire with Hamas in Gaza , police and security officials said on Thursday.... read more... Share | Published By: Chicago Tribune - Today Older News Tel Aviv bus bombing shatters any illusions of safety LA Times World (Yesterday) - Attack leaves 21 injured in the Israeli city, which has enjoyed a relative sense of security in recent years. TEL AVIV — A bus bombing in downtown Tel Aviv , the first... Israel arrests suspects in Tel Aviv bus bombing Black Elk Order To Improve Safety After Rig Explosion Regulator warns Black Elk Energy on safety after rig fire Bo

nug_text: Terror suspects arrested

sent_id (1): Israel arrests suspects in Tel Aviv bus bombing | UnFox News Login | Create a free account Home World US Science Technology Green Politics Atheism Religion News Search Israel arrests suspects in Tel Aviv bus bombing JERUSALEM ( Reuters ) - Israeli authorities arrested an Israeli Arab on suspicion of planting a bomb in a Tel Aviv bus that wounded 15 people hours before Israel agreed a ceasefire with Hamas in Gaza , police and security officials said on Thursday.... read more... Share | Published By: Chicago Tribune - Today Older News Tel Aviv bus bombing shatters any illusions of safety LA Times World (Yesterday) - Attack leaves 21 injured in the Israeli city, which has enjoyed a relative sense of security in recent years. TEL AVIV — A bus bombing in downtown Tel Aviv , the first... Israel arrests suspects in Tel Aviv bus bombing Black Elk Order To Improve Safety After Rig Explosion Regulator warns Black Elk Energy on safety after rig fir

nug_text: Terror suspects arrested

sent_id (2): Secretary of State Hillary Clinton to pursue an elusive truce between... Israel - Hamas talks leave future of Gaza blockade cloudy Israel arrests suspects in Tel Aviv bus bombing Gaza ceasefire holds but mistrust runs deep Tel Aviv bus hit by bomb, at least 10 wounded Chicago Tribune (Yesterday) - TEL AVIV ( Reuters ) - A bomb exploded on a bus in central Tel Aviv on Wednesday, wounding at least 10 people in what Israeli officials said was a terrorist attack... Israel arrests suspects in Tel Aviv bus bombing Tel Aviv bus bombing shatters any illusions of safety Bomb blast hits bus in Tel Aviv Oil up to above $87 after Tel Aviv bus explosion Seattle times Tech News (Yesterday) - The price of oil rebounded slightly to above $87 a barrel on Wednesday after an explosion injured 10 people on a bus in Tel Aviv , a development that has the... Israel arrests suspects in Tel Aviv bus bombing Tel Aviv bus bombing shatters any illusions of safety B

nug_text: occured in the heart of Tel Aviv near military hdqtrs

sent_id (115): The bus was reportedly passing the military headquarters in the city at the time of the blast.

bound_sent: 
Israel says it arrests Tel Aviv bus bomber - Worldnews.com Explore WN Photos Travel Movies Phot

bound_targ_sent: Israel says it arrests Tel Aviv bus bomber - Worldnews.com Explore WN Photos Travel Movies Photographers Health Science Technology Cities Live TV World News Login Edit Israel says it arrests Tel Aviv bus bomber Tweet sms this page email this page News Videos Video Details Location Images Related Links Twitter BBC Results Shopping Travel Booking The News &amp ; Observer 2012-11-22 : Dan Balilty - AP Photo Israeli police and security personnel stand next to a destroyed bus at the site of a bombing in Tel Aviv , Israel , Wednesday, Nov. 21, 2012 .

bound_targ_sent_spacy: 
Israel says it arrests Tel Aviv bus bomber - Worldnews.com

contains: False

--------------------------------------------

IndexError: single positional indexer is out-of-bounds

In [99]:
def test_non_standard_topic_id():
    def generate_update_paths():
        up_dir = "/nfs/TemporalSummarization"
        ts_dirs = ["ts13", "ts14", "ts15"]
        target_files = ['updates_sampled.extended.tsv', 'updates_sampled.tsv',
                                 'matches.tsv', 'nuggets.tsv']
        
        up_paths = []
        for ts_dir in ts_dirs:
            for target_file in target_files:
                full_path = up_dir + '/' + ts_dir + '/results/' + target_file
                if os.path.exists(full_path):
                    up_paths.append(full_path)
                else:
                    print("doesn't exist: " + str(full_path))
        return up_paths
    
    def get_num_topic_id(topic_id):
        split = topic_id.split(".", 2)  # e.g. 'TS14.18'
        try:
            if split[0].upper() == "TS14":
                topic_id = int(split[1])  # extract int '18'
                return topic_id
            else:
                raise ValueError()
        except ValueError:
            print("can't convert " + str(topic_id))
            return None  # no discernable topic_id
    
    tsv_paths = generate_update_paths()
    topic_ids = set()
    
    
    
    for tsv_path in tsv_paths:
        try:
            tsv_df = pd.read_csv(tsv_path, "\t")
#             print(str(tsv_path) + "\ntopic_ids: " + str(tsv_df['query_id'].unique()))
            if 'query_id' in tsv_df.columns:
                query_ids = set(list(tsv_df['query_id']))
                topic_ids.update(query_ids)
        except FileNotFoundError:
            print("not found tsv_path: " + str(tsv_path))
            
    non_standard = []
    for topic_id in topic_ids:
        try:
            conv = int(topic_id)
        except ValueError:
            non_standard.append(topic_id)
    print("non_standard")
    print(non_standard)
    
    converted = []
    for non in non_standard:
        tid = get_num_topic_id(non)
        if tid is not None:
            converted.append(tid)
    print("converted")
    print(converted)
    
    print("finished")
    
#     print(topic_ids)
    
test_non_standard_topic_id()

doesn't exist: /nfs/TemporalSummarization/ts13/results/updates_sampled.extended.tsv
doesn't exist: /nfs/TemporalSummarization/ts15/results/updates_sampled.extended.tsv
non_standard
['TS14.14', 'TS14.25', 'TS14.20', 'TS14.22', 'TS14.13', 'TS14.15', 'TS14.24', 'TS14.21', 'TS14.16', 'TS13.07', 'TS14.19', 'TS13.TEST01', 'TS14.11', 'TS14.12', 'TS14.18', 'TS14.23', 'TS14.17', 'TS13.18', 'TS13.13']
can't convert TS13.07
can't convert TS13.TEST01
can't convert TS13.18
can't convert TS13.13
converted
[14, 25, 20, 22, 13, 15, 24, 21, 16, 19, 11, 12, 18, 23, 17]
finished
