In [1]:
import os
import requests
import json

import pycountry

from datetime import datetime, timedelta

import pandas as pd

from bs4 import BeautifulSoup

In [2]:
from api_query_setup import QuerySetup

In [3]:
## get TR AuthToken

def get_authtoken(username, password):
    '''Tries to load cached authentification token and renews it if necessary.'''
    if os.path.isfile('authtoken.cache'):
        # read cached authtoken
        with open('authtoken.cache', 'r') as f:
            authToken = f.read()
    else: 
        authToken = None
    
    if authToken is not None:
        # check validity of authToken
        url = f'http://rmb.reuters.com/rmd/rest/json/channels?token={authToken}&format=json'
        response = requests.get(url) # 
        json.loads(response.text)
        if response == '<Response [403]>':
            authToken = None
    
    if authToken is None:
        print('Renewing AuthToken...')
        auth_url = f"https://commerce.reuters.com/rmd/rest/xml/login?username={username}&password={password}&format=json"
        response = requests.get(auth_url)
        authToken = json.loads(response.text).get('authToken').get('authToken')
        with open('authtoken.cache', 'w') as f:
            f.write(authToken)

    return authToken
    

username = 'HackAPI3'
password = 'xfzzzEOBEJrmWpi'

authToken = get_authtoken(username, password)

print(authToken)

3SDnXO+c5NcIo45KY4ZrT5Lw89ntAckI81kIX5wuiTI=


## Retrieve country codes for relevant countries
### (according to https://www.dol.gov/agencies/ilab/reports/child-labor/list-of-goods)

In [4]:
# load TR country codes dataframe

tr_country_file = '../Data/TR_API_files/tr_country_codes_normalized.tsv'

#cc_df = pd.read_csv(tr_country_file, sep='\t', dtype='str')
cc_df = pd.read_csv(tr_country_file, sep='\t', dtype=object, na_filter=False)

# retrieve country code mapping as dictionary

cc_mapping = {k:v for k,v in zip(cc_df.country_name, cc_df.country_code)}
cc_mapping_long = {k:v for k,v in zip(cc_df.country_name_long, cc_df.country_code) if str(k) != 'nan'}

cc_mapping.update(cc_mapping_long)

num_mappings = len(cc_mapping)

print(f'{len(cc_mapping)} mapping instances found.')


# retrieve country code area mappings

area_mappings = {k:v for k,v in zip(cc_df.country_code, cc_df.area)}

492 mapping instances found.


In [5]:
# extract list of relevant country codes from GSI dataset

country_file = '../Data/ListofGoodsExcel.tsv'

country_goods_df = pd.read_csv(country_file)

countries = country_goods_df.country

countries_df = pd.DataFrame(countries)

countries_df['country_code'] = countries_df.country.apply(lambda x: cc_mapping.get(x))

# check that all countries have been mapped
assert countries_df[countries_df.country_code.isna()].shape[0] == 0

relevant_country_codes = countries_df.country_code.unique().tolist()

In [6]:
# reverse mapping from country codes to GSI country names

cc_mapping_reverse = {v:k for k,v in cc_mapping.items() if k in countries_df.country.unique().tolist()}

In [7]:
# mapping of country code to country for newer (normalized) country names (e.g. Myanmar instead of Burma)

country_file_normalized = '../Data/ListofGoodsExcel_normalized.tsv'

countries_normalized = pd.read_csv(country_file_normalized).country.unique()

cc_mapping_reverse_normalized = {v:k for k,v in cc_mapping.items() if k in countries_normalized}

### Configure the Query

- **keywords**: human trafficking, slavery, child labour, child labor, forced labor, force labour, debt boundage
- **countries**: all countries specified by the GSI as being involved in human trafficking in good production (according to the ListofGoods as defined above)
- **language**: English is default (can be expanded to more languagues but for simplicity, we currently only consider English)
- **daterange**: The TR API sadly only allows for one month
- **media type**: Text

In [8]:
query_terms = ['human trafficking',
            'slavery',
            'child labour',
            'child labor',
            'forced labor',
            'force labour',
            'debt boundage']

In [9]:
# initiate query setup module

q_setup = QuerySetup(authToken, query_terms=query_terms, geography=relevant_country_codes)

In [10]:
# get the query url from the query setup module

url = q_setup.query_url()
url

'http://rmb.reuters.com/rmd/rest/json/search?q=main:("human trafficking" OR "slavery" OR "child labour" OR "child labor" OR "forced labor" OR "force labour" OR "debt boundage")&mediaType=T&dateRange=2018.11.03-2019.11.03&language=en&geography=AF&geography=AO&geography=AR&geography=AZ&geography=BD&geography=BZ&geography=BJ&geography=BO&geography=BR&geography=BF&geography=MM&geography=KH&geography=CM&geography=CF&geography=TD&geography=CN&geography=CO&geography=CR&geography=CI&geography=CD&geography=DO&geography=EC&geography=EG&geography=SV&geography=SZ&geography=ET&geography=GH&geography=GT&geography=GN&geography=HN&geography=IN&geography=ID&geography=IR&geography=KZ&geography=KE&geography=KG&geography=LB&geography=LS&geography=LR&geography=MG&geography=MW&geography=MY&geography=ML&geography=MR&geography=MX&geography=MN&geography=MZ&geography=NA&geography=NP&geography=NI&geography=NE&geography=NG&geography=KP&geography=PK&geography=PA&geography=PY&geography=PE&geography=PH&geography=RU&

In [11]:
# get results as json

response = requests.get(url)
print(response)
response_text = response.text
t_results = json.loads(response_text)

<Response [200]>


In [12]:
t_results

{'results': {'result': [{'id': 'tag:reuters.com,2019:newsml_L3N27I00W:391219080',
    'guid': 'tag:reuters.com,2019:newsml_L3N27I00W',
    'version': 391219080,
    'dateCreated': 1572659257000,
    'slug': 'BRITAIN-BODIES/VIETNAM',
    'source': 'Thomson Reuters',
    'language': 'en',
    'headline': 'Vietnam says human traffickers must be strictly dealt with',
    'mediaType': 'T',
    'priority': 3,
    'geography': ['VN', 'BE', 'GB', 'CN', 'BG'],
    'channels': ['STK567'],
    'indexTimestamp': 1572659267216,
    'contributorId': 'RTRS',
    'contributorName': 'Reuters',
    'destination': ['ZZ',
     'C',
     'MTL',
     'D',
     'E',
     'GRO',
     'G',
     'DNP',
     'M',
     'GNS',
     'O',
     'CSA',
     'RNP',
     'U',
     'SI',
     'Z',
     'REULB',
     'RBN',
     'PSC',
     'LBY',
     'ZD',
     'SOF',
     'PGE',
     'RWS',
     'UCDPTEST',
     'UKI',
     'OIL',
     'RWSA',
     'RAST',
     'AFA'],
    'signal': ['prodId:TXT', 'pmt:text', 'source:i

In [13]:
# convert json to pandas dataframe

results_info = t_results['results']['result']
t_results_df = pd.DataFrame(results_info)
print(t_results_df.shape)
t_results_df.head()

(171, 20)


Unnamed: 0,id,guid,version,dateCreated,slug,source,language,headline,mediaType,priority,geography,channels,indexTimestamp,contributorId,contributorName,destination,signal,internalReceivedDate,channelIds,author
0,"tag:reuters.com,2019:newsml_L3N27I00W:391219080","tag:reuters.com,2019:newsml_L3N27I00W",391219080,1572659257000,BRITAIN-BODIES/VIETNAM,Thomson Reuters,en,Vietnam says human traffickers must be strictl...,T,3,"[VN, BE, GB, CN, BG]",[STK567],1572659267216,RTRS,Reuters,"[ZZ, C, MTL, D, E, GRO, G, DNP, M, GNS, O, CSA...","[prodId:TXT, pmt:text, source:ids, edStat:N]",1572659267216,"[76637, 63683, 37917, 26786, 144993, 626, 6893...",
1,"tag:reuters.com,2019:newsml_L8N27H5Q0:447177668","tag:reuters.com,2019:newsml_L8N27H5Q0",447177668,1572646886000,BRAZIL-HUMAN TRAFFICKING/ (TV),Thomson Reuters,en,Brazil police arrest man said to be one of wor...,T,3,"[BR, BD, MX, AF, IN, US]",[STK567],1572646890383,RTRS,Reuters,"[PSC, LBY, G, DNP, GNS, PGE, RWS, CSA, UCDPTES...","[prodId:TXT, pmt:text, source:ids, edStat:N]",1572646890383,"[63683, 37917, 144993, 626, 159003, 176417, 31...",
2,"tag:reuters.com,2019:newsml_KBN1XB3XS:6","tag:reuters.com,2019:newsml_KBN1XB3XS",6,1572640591000,UK-BRITAIN-BODIES,Thomson Reuters,en,"Second man charged over UK truck deaths, victi...",T,4,"[VN, GB, CN, IE, BG, IND]",[TRn222],1572640594477,RTRS,Reuters,"[NOINSKM, OUKDNM, NOINWDM, ORTAK, OUKWDM, OUKT...","[var:itemMod, prodId:TXT, pmt:text, source:ids...",1572640594477,"[23276, 83733, 22849, 25437, 176625, 23274, 14...",By Amanda Ferguson
3,"tag:reuters.com,2019:newsml_L8N27H2WN:1498332260","tag:reuters.com,2019:newsml_L8N27H2WN",1498332260,1572640534000,"BRITAIN-BODIES/ (UPDATE 3, PIX, TV)",Thomson Reuters,en,UPDATE 3-Second man charged over UK truck deat...,T,3,"[VN, GB, CN, IE, BG]",[STK567],1572640540811,RTRS,Reuters,"[PSC, LBY, G, DNP, GNS, PGE, RWS, CSA, UCDPTES...","[prodId:TXT, pmt:text, source:ids, edStat:U, s...",1572640540811,"[63683, 37917, 144993, 626, 159003, 662, 17641...",By Amanda Ferguson
4,"tag:reuters.com,2019:newsml_MT1ALTL9N26H01T2:1...","tag:reuters.com,2019:newsml_MT1ALTL9N26H01T2",1099731627,1572615643000,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...,Thomson Reuters,en,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...,T,1,"[VN, GB, CN, IE, BG]",[STK567],1572615647562,RTRS,Reuters,"[ZZ, C, MTL, D, E, GRO, G, M, ABN, GNS, O, CSA...","[var:itemMod, prodId:TXT, pmt:text, source:ids...",1572615647562,"[37917, 626, 176417, 34325, 63398, 63411, 5760...",


In [14]:
# get articles by id

def query_by_id(t_id):
    url = f'http://rmb.reuters.com/rmd/rest/json/item?id={t_id}&token={authToken}&format=json'
    response = requests.get(url)
    return json.loads(response.text)

by_id = t_results_df.id.apply(lambda x: query_by_id(x))

# convert to pandas dataframe
text_df = pd.DataFrame.from_dict(by_id.tolist())
text_df.columns

Index(['destination', 'productlabel', 'representationtype', 'copyrightholder',
       'copyrightnotice', 'credit', 'profile', 'signal', 'firstcreated',
       'headline', 'language', 'mimetype', 'slug', 'caption', 'subject',
       'subjectlocation', 'altid', 'type', 'urgency', 'uri', 'usn', 'version',
       'versioncreated', 'versionedguid', 'wordcount', 'body_xhtml',
       'renditions', 'byline', 'dateline', 'derivedfrom', 'located', 'source'],
      dtype='object')

In [15]:
# parse fulltext

def parse_html(string):
    soup = BeautifulSoup(string, 'html.parser')    

    texts = []
    for i in soup.find_all('p'):
        texts.append(i.get_text())
        
    return '\n'.join(texts)

text_df['parsed_text'] = text_df.body_xhtml.apply(lambda x: parse_html(x))

In [16]:
print(text_df.columns)

text_df.head()

Index(['destination', 'productlabel', 'representationtype', 'copyrightholder',
       'copyrightnotice', 'credit', 'profile', 'signal', 'firstcreated',
       'headline', 'language', 'mimetype', 'slug', 'caption', 'subject',
       'subjectlocation', 'altid', 'type', 'urgency', 'uri', 'usn', 'version',
       'versioncreated', 'versionedguid', 'wordcount', 'body_xhtml',
       'renditions', 'byline', 'dateline', 'derivedfrom', 'located', 'source',
       'parsed_text'],
      dtype='object')


Unnamed: 0,destination,productlabel,representationtype,copyrightholder,copyrightnotice,credit,profile,signal,firstcreated,headline,...,versionedguid,wordcount,body_xhtml,renditions,byline,dateline,derivedfrom,located,source,parsed_text
0,"[CSA, SI, ZZ, ZD, LBY, REULB, G, RAST, SOF, M,...",text,complete,Thomson Reuters,(c) Copyright Thomson Reuters 2019. Click For ...,Reuters,SNI-Text,"[edStat:N, prodId:TXT, pmt:text]",2019-11-02T01:47:37.000Z,Vietnam says human traffickers must be strictl...,...,"tag:reuters.com,2019:newsml_L3N27I00W:391219080",226,<p>Nov 2 (Reuters) - Vietnam sai...,[{'mimetype': 'application/vnd.iptc.g2.newsite...,,,,,,Nov 2 (Reuters) - Vietnam said on Saturday tha...
1,"[CSA, LBY, REULB, G, RAST, PGE, RNP, AFA, PSC,...",text,complete,Thomson Reuters,(c) Copyright Thomson Reuters 2019. Click For ...,Reuters,SNI-Text,"[edStat:N, prodId:TXT, pmt:text]",2019-11-01T22:21:26.000Z,Brazil police arrest man said to be one of wor...,...,"tag:reuters.com,2019:newsml_L8N27H5Q0:447177668",346,"<p>SAO PAULO, Nov 1 (Reuters) - ...",[{'mimetype': 'application/vnd.iptc.g2.newsite...,,,,,,"SAO PAULO, Nov 1 (Reuters) - Brazilian federal..."
2,"[OUKTPM, ORTOA, OUKDNM, OUKWDM, NOINWDM, NOINS...",text,complete,Thomson Reuters,(c) Copyright Thomson Reuters 2019. Click For ...,Reuters,SNI-Text,"[edStat:U, prodId:TXT, pmt:text, var:itemMod]",2019-11-01T11:05:29.000Z,"Second man charged over UK truck deaths, victi...",...,"tag:reuters.com,2019:newsml_KBN1XB3XS:6",450,<p>By Amanda Ferguson</p>\n ...,[{'mimetype': 'application/vnd.iptc.g2.newsite...,By Amanda Ferguson,2019-11-01 20:36:31 GMT+00:00,"[tag:reuters.com,2019:newsml_L8N27H2WN]","Bangalore, India","{'code': 'NS:RTRS', 'literal': 'Reuters'}",By Amanda Ferguson\nBELFAST (Reuters) - A seco...
3,"[CSA, SI, LBY, REULB, G, RAST, PGE, RNP, UKI, ...",text,complete,Thomson Reuters,(c) Copyright Thomson Reuters 2019. Click For ...,Reuters,SNI-Text,"[edStat:U, prodId:TXT, pmt:text, sic:XBSUM]",2019-11-01T11:45:27.000Z,UPDATE 3-Second man charged over UK truck deat...,...,"tag:reuters.com,2019:newsml_L8N27H2WN:1498332260",499,<p>* Thirty-nine bodies were fou...,[{'mimetype': 'application/vnd.iptc.g2.newsite...,By Amanda Ferguson,,,,,* Thirty-nine bodies were found in a truck on ...
4,"[CSA, ZZ, ZD, LBY, REULB, G, RAST, SOF, M, Z, ...",text,complete,Thomson Reuters,(c) Copyright Thomson Reuters 2019. Click For ...,Reuters,SNI-Text,"[edStat:N, prodId:TXT, pmt:text, var:itemMod]",2019-11-01T13:40:43.000Z,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...,...,"tag:reuters.com,2019:newsml_MT1ALTL9N26H01T2:1...",21,<p>NORTHERN IRISH MAN IS CHARGED...,[{'mimetype': 'application/vnd.iptc.g2.newsite...,,,"[tag:reuters.com,2019:newsml_L9N26H01T]",,,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...


In [17]:
# only keep relevant columns

relevant_columns = ['firstcreated', 'headline', 'language',  'slug', 'caption',
    'urgency', 'uri', 'usn', 'version', 'versioncreated',
       'versionedguid', 'wordcount', 'body_xhtml', 'dateline', 'parsed_text']
text_df = text_df[relevant_columns]


t_results_df_relcols = ['id', 'guid', 'dateCreated', 'geography']
t_results_df_rel = t_results_df[t_results_df_relcols]

In [18]:
# add information from t_results_df (table returned by customized query string)
# merge tables into one

text_df['guid'] = text_df['uri']
all_api_df = t_results_df_rel.merge(text_df, on='guid')

all_api_df.head()

Unnamed: 0,id,guid,dateCreated,geography,firstcreated,headline,language,slug,caption,urgency,uri,usn,version,versioncreated,versionedguid,wordcount,body_xhtml,dateline,parsed_text
0,"tag:reuters.com,2019:newsml_L3N27I00W:391219080","tag:reuters.com,2019:newsml_L3N27I00W",1572659257000,"[VN, BE, GB, CN, BG]",2019-11-02T01:47:37.000Z,Vietnam says human traffickers must be strictl...,en,BRITAIN-BODIES/VIETNAM,BRITAIN-BODIES/VIETNAM:Vietnam says human traf...,3,"tag:reuters.com,2019:newsml_L3N27I00W",L3N27I00W,391219080,2019-11-02T01:47:37.000Z,"tag:reuters.com,2019:newsml_L3N27I00W:391219080",226,<p>Nov 2 (Reuters) - Vietnam sai...,,Nov 2 (Reuters) - Vietnam said on Saturday tha...
1,"tag:reuters.com,2019:newsml_L8N27H5Q0:447177668","tag:reuters.com,2019:newsml_L8N27H5Q0",1572646886000,"[BR, BD, MX, AF, IN, US]",2019-11-01T22:21:26.000Z,Brazil police arrest man said to be one of wor...,en,BRAZIL-HUMAN TRAFFICKING/ (TV),BRAZIL-HUMAN TRAFFICKING/ (TV):Brazil police a...,3,"tag:reuters.com,2019:newsml_L8N27H5Q0",L8N27H5Q0,447177668,2019-11-01T22:21:26.000Z,"tag:reuters.com,2019:newsml_L8N27H5Q0:447177668",346,"<p>SAO PAULO, Nov 1 (Reuters) - ...",,"SAO PAULO, Nov 1 (Reuters) - Brazilian federal..."
2,"tag:reuters.com,2019:newsml_KBN1XB3XS:6","tag:reuters.com,2019:newsml_KBN1XB3XS",1572640591000,"[VN, GB, CN, IE, BG, IND]",2019-11-01T11:05:29.000Z,"Second man charged over UK truck deaths, victi...",en-GB,UK-BRITAIN-BODIES,UK-BRITAIN-BODIES:Second man charged over UK t...,4,"tag:reuters.com,2019:newsml_KBN1XB3XS",KBN1XB3XS,6,2019-11-01T20:36:31.000Z,"tag:reuters.com,2019:newsml_KBN1XB3XS:6",450,<p>By Amanda Ferguson</p>\n ...,2019-11-01 20:36:31 GMT+00:00,By Amanda Ferguson\nBELFAST (Reuters) - A seco...
3,"tag:reuters.com,2019:newsml_L8N27H2WN:1498332260","tag:reuters.com,2019:newsml_L8N27H2WN",1572640534000,"[VN, GB, CN, IE, BG]",2019-11-01T11:45:27.000Z,UPDATE 3-Second man charged over UK truck deat...,en,"BRITAIN-BODIES/ (UPDATE 3, PIX, TV)","BRITAIN-BODIES/ (UPDATE 3, PIX, TV):UPDATE 3-S...",3,"tag:reuters.com,2019:newsml_L8N27H2WN",L8N27H2WN,1498332260,2019-11-01T20:35:34.000Z,"tag:reuters.com,2019:newsml_L8N27H2WN:1498332260",499,<p>* Thirty-nine bodies were fou...,,* Thirty-nine bodies were found in a truck on ...
4,"tag:reuters.com,2019:newsml_MT1ALTL9N26H01T2:1...","tag:reuters.com,2019:newsml_MT1ALTL9N26H01T2",1572615643000,"[VN, GB, CN, IE, BG]",2019-11-01T13:40:43.000Z,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...,en,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...,1,"tag:reuters.com,2019:newsml_MT1ALTL9N26H01T2",MT1ALTL9N26H01T2,1099731627,2019-11-01T13:40:43.000Z,"tag:reuters.com,2019:newsml_MT1ALTL9N26H01T2:1...",21,<p>NORTHERN IRISH MAN IS CHARGED...,,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...


In [19]:
## add long version of countries
all_api_df['countries_long'] = all_api_df.geography.fillna('').apply(lambda x: ','.join([cc_mapping_reverse.get(i) for i in x if i in cc_mapping_reverse]))
all_api_df['countries_long_newversion'] = all_api_df.geography.fillna('').apply(lambda x: ','.join([cc_mapping_reverse_normalized.get(i) for i in x if i in cc_mapping_reverse_normalized]))

all_api_df.head()

Unnamed: 0,id,guid,dateCreated,geography,firstcreated,headline,language,slug,caption,urgency,...,usn,version,versioncreated,versionedguid,wordcount,body_xhtml,dateline,parsed_text,countries_long,countries_long_newversion
0,"tag:reuters.com,2019:newsml_L3N27I00W:391219080","tag:reuters.com,2019:newsml_L3N27I00W",1572659257000,"[VN, BE, GB, CN, BG]",2019-11-02T01:47:37.000Z,Vietnam says human traffickers must be strictl...,en,BRITAIN-BODIES/VIETNAM,BRITAIN-BODIES/VIETNAM:Vietnam says human traf...,3,...,L3N27I00W,391219080,2019-11-02T01:47:37.000Z,"tag:reuters.com,2019:newsml_L3N27I00W:391219080",226,<p>Nov 2 (Reuters) - Vietnam sai...,,Nov 2 (Reuters) - Vietnam said on Saturday tha...,"Vietnam,China","Vietnam,China"
1,"tag:reuters.com,2019:newsml_L8N27H5Q0:447177668","tag:reuters.com,2019:newsml_L8N27H5Q0",1572646886000,"[BR, BD, MX, AF, IN, US]",2019-11-01T22:21:26.000Z,Brazil police arrest man said to be one of wor...,en,BRAZIL-HUMAN TRAFFICKING/ (TV),BRAZIL-HUMAN TRAFFICKING/ (TV):Brazil police a...,3,...,L8N27H5Q0,447177668,2019-11-01T22:21:26.000Z,"tag:reuters.com,2019:newsml_L8N27H5Q0:447177668",346,"<p>SAO PAULO, Nov 1 (Reuters) - ...",,"SAO PAULO, Nov 1 (Reuters) - Brazilian federal...","Brazil,Bangladesh,Mexico,Afghanistan,India","Brazil,Bangladesh,Mexico,Afghanistan,India"
2,"tag:reuters.com,2019:newsml_KBN1XB3XS:6","tag:reuters.com,2019:newsml_KBN1XB3XS",1572640591000,"[VN, GB, CN, IE, BG, IND]",2019-11-01T11:05:29.000Z,"Second man charged over UK truck deaths, victi...",en-GB,UK-BRITAIN-BODIES,UK-BRITAIN-BODIES:Second man charged over UK t...,4,...,KBN1XB3XS,6,2019-11-01T20:36:31.000Z,"tag:reuters.com,2019:newsml_KBN1XB3XS:6",450,<p>By Amanda Ferguson</p>\n ...,2019-11-01 20:36:31 GMT+00:00,By Amanda Ferguson\nBELFAST (Reuters) - A seco...,"Vietnam,China","Vietnam,China"
3,"tag:reuters.com,2019:newsml_L8N27H2WN:1498332260","tag:reuters.com,2019:newsml_L8N27H2WN",1572640534000,"[VN, GB, CN, IE, BG]",2019-11-01T11:45:27.000Z,UPDATE 3-Second man charged over UK truck deat...,en,"BRITAIN-BODIES/ (UPDATE 3, PIX, TV)","BRITAIN-BODIES/ (UPDATE 3, PIX, TV):UPDATE 3-S...",3,...,L8N27H2WN,1498332260,2019-11-01T20:35:34.000Z,"tag:reuters.com,2019:newsml_L8N27H2WN:1498332260",499,<p>* Thirty-nine bodies were fou...,,* Thirty-nine bodies were found in a truck on ...,"Vietnam,China","Vietnam,China"
4,"tag:reuters.com,2019:newsml_MT1ALTL9N26H01T2:1...","tag:reuters.com,2019:newsml_MT1ALTL9N26H01T2",1572615643000,"[VN, GB, CN, IE, BG]",2019-11-01T13:40:43.000Z,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...,en,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...,1,...,MT1ALTL9N26H01T2,1099731627,2019-11-01T13:40:43.000Z,"tag:reuters.com,2019:newsml_MT1ALTL9N26H01T2:1...",21,<p>NORTHERN IRISH MAN IS CHARGED...,,NORTHERN IRISH MAN IS CHARGED WITH MANSLAUGHTE...,"Vietnam,China","Vietnam,China"


In [20]:
# save table with API results 

all_api_df.to_csv('../Data/TR_API_files/TR_API_results.tsv', sep='\t', header=True, index=None)

In [21]:
# Extra: Overview of country code distribution in results

countries_lists = t_results_df_rel.geography.fillna('')
country_dist = pd.Series([i for sublist in countries_lists for i in sublist]).value_counts()

country_dist_df = pd.DataFrame(country_dist).reset_index()

country_dist_df.columns = ['country_code', 'observed_count']

country_dist_df.to_csv('../Data/TR_API_files/country_code_dist_articles.tsv', sep='\t', header=True, index=None)