### Read Credentials 

In [None]:
import requests, json, time, getopt, sys

# User Variables
credFile = open("..\creds\credFileHuman.txt","r")    # one per line
                                                #--- RDP MACHINE ID---
                                                #--- LONG PASSWORD---
                                                #--- GENERATED CLIENT ID---

USERNAME = credFile.readline().rstrip('\n')
PASSWORD = credFile.readline().rstrip('\n')
CLIENT_ID = credFile.readline().rstrip('\n')

credFile.close()

# Make sure that creds are read in
#print("USERNAME="+str(USERNAME))
#print("PASSWORD="+str(PASSWORD))
#print("CLIENT_ID="+str(CLIENT_ID))

### Define Token Endpoint 

In [None]:
# Application Constants
RDP_version = "/v1"
base_URL = "https://api.refinitiv.com"
category_URL = "/auth/oauth2"
endpoint_URL = "/token"
CLIENT_SECRET = ""
TOKEN_FILE = "token.txt"
SCOPE = "trapi"

TOKEN_ENDPOINT = base_URL + category_URL + RDP_version + endpoint_URL

In [None]:
def _requestNewToken(refreshToken):
    if refreshToken is None:
        tData = {
            "username": USERNAME,
            "password": PASSWORD,
            "grant_type": "password",
            "scope": SCOPE,
            "takeExclusiveSignOnControl": "true"
        };
    else:
        tData = {
            "refresh_token": refreshToken,
            "grant_type": "refresh_token",
        };

    # Make a REST call to get latest access token
    response = requests.post(
        TOKEN_ENDPOINT,
        headers = {
            "Accept": "application/json"
        },
        data = tData,
        auth = (
            CLIENT_ID,
            CLIENT_SECRET
        )
    )
    
    if response.status_code != 200:
        raise Exception("Failed to get access token {0} - {1}".format(response.status_code, response.text));

    # Return the new token
    return json.loads(response.text);

In [None]:
def saveToken(tknObject):
    tf = open(TOKEN_FILE, "w+");
    print("Saving the new token");
    # Append the expiry time to token
    tknObject["expiry_tm"] = time.time() + int(tknObject["expires_in"]) - 10;
    # Store it in the file
    json.dump(tknObject, tf, indent=4)


In [None]:
def getToken():
    try:
        print("Reading the token from: " + TOKEN_FILE);
        # Read the token from a file
        tf = open(TOKEN_FILE, "r+")
        tknObject = json.load(tf);

        # Is access token valid
        if tknObject["expiry_tm"] > time.time():
            # return access token
            return tknObject["access_token"];

        print("Token expired, refreshing a new one...");
        tf.close();
        # Get a new token from refresh token
        tknObject = _requestNewToken(tknObject["refresh_token"]);

    except Exception as exp:
        print("Caught exception: " + str(exp))
        print("Getting a new token using Password Grant...");
        tknObject = _requestNewToken(None);

    # Persist this token for future queries
    saveToken(tknObject)
    print("Token is: " + tknObject["access_token"])
    # Return access token
    return tknObject["access_token"];

### Obtain Valid Token 

In [None]:
accessToken = getToken();
print("Have token now");

### Request All News Metadata (Paginate)

In [None]:
news_category_URL = "/data/news"
newsmeta_endpoint_URL = "/metadata"
news_param1 = "?limit="
meta_step_size = 100  # 100 is max allowed at this time
news_param2 = "?cursor=" 
NEWS_ENDPOINT = base_URL + news_category_URL + RDP_version + newsmeta_endpoint_URL 
NEWS_META_FILE = "newsMetadata.txt"

REQUEST_URL = NEWS_ENDPOINT + news_param1+str(meta_step_size)
moreExists = True
count = 0;
    
while moreExists:
    print("Requesting: ",REQUEST_URL)
    dResp = requests.get(REQUEST_URL, headers = {"Authorization": "Bearer " + accessToken});
    if dResp.status_code != 200:
        print("Unable to get data. Code %s, Message: %s" % (dResp.status_code, dResp.text));
        if dResp.status_code != 401:   # error other then token expired
            break 
        accessToken = getToken();     # token refresh on token expired
    else:
        print("Resource access successful")
        # Display data
        jResp = json.loads(dResp.text);
        print(json.dumps(jResp, indent=2));

        if not "next" in jResp["meta"]: 
            print("<<Reached the end of paged Metadata >>")
            moreExists = False
            count += len(jResp["data"])
        elif not jResp["meta"]["next"]:
            print("<<Reached the end of paged Metadata >>")
            moreExists = False
            count += len(jResp["data"])
        else:
            REQUEST_URL = NEWS_ENDPOINT + news_param2 + jResp["meta"]["next"]
            count +=  100
print("Completed with: " + str(count))
    


### Request News Metadata 

In [None]:
news_category_URL = "/data/news"
newsmeta_endpoint_URL = "/metadata"
news_param1 = "?limit=100"
NEWS_ENDPOINT = base_URL + news_category_URL + RDP_version + newsmeta_endpoint_URL 
NEWS_META_FILE = "newsMetadata.txt"

nodesWithParents = []
nodesWithoutParents = []

#print("NEWS_ENDPOINT=" + NEWS_ENDPOINT)

dResp = requests.get(NEWS_ENDPOINT + news_param1 , headers = {"Authorization": "Bearer " + accessToken});

if dResp.status_code != 200:
    print("Unable to get data. Code %s, Message: %s" % (dResp.status_code, dResp.text));
    if dResp.status_code == 401:   # error token expired
        accessToken = getToken();     # token refresh on token expired
        dResp = requests.get(NEWS_ENDPOINT + news_param1 , headers = {"Authorization": "Bearer " + accessToken});
else:
    print("Resource access successful")
    # Display data
    jResp = json.loads(dResp.text);
    print(json.dumps(jResp, indent=2));


### Request Children and Re-Categorize With Parent Information

In [None]:
def processWithChildren(dResp, jResp, parentId):
    news_param2 = "/children?offset="
    step_size = 100 # 100 is max allowed at the time of this writing
    news_param3 = "&limit="+str(step_size)  
    global accessToken
    
    if dResp.status_code == 200:
        for node in jResp['data']: 
            nodeIsFirstSeen = True
            if parentId != '':
                node['parentId'] = parentId 
                if node not in nodesWithParents:
                    nodesWithParents.append(node)
 #                   print("*** id= " + str(node.get('id')) + "nodesWithParents.append" )
                else :
                    nodeIsFirstSeen = False
            else:
                if not any(nd.get('id') == node.get('id') for nd in nodesWithParents) and node not in nodesWithoutParents:
                    nodesWithoutParents.append(node)
  #                  print("*** id= " + str(node.get('id')) + "nodesWithoutParents.append")
                else :
                    nodeIsFirstSeen = False
            # keep track of the processing progress
            if nodeIsFirstSeen == True and ((len(nodesWithParents) + len(nodesWithoutParents)) % 200) == 0:
                print("***************Inserted "+ str((len(nodesWithParents) + len(nodesWithoutParents))))
            childrenOfThisNode = node.get('childrenCount')
 #           print("^^^^^^^^^^^^^^^^^^ children="+ str(childrenOfThisNode))
            if nodeIsFirstSeen == True and childrenOfThisNode != 0:
                start = 0; nextExists = True;
                while nextExists and start <= node.get('childrenCount'):
                    nextExists = True;
                    print("*in node %s with childrenCount %s at offset %s " % (node.get('id'),node.get('childrenCount'), str(start)))
                    dChildrenResp = requests.get(NEWS_ENDPOINT + "/" + str(node.get('id')) + news_param2 + str(start) + news_param3, headers = {"Authorization": "Bearer " + accessToken});

                    if dChildrenResp.status_code != 200:
                        print("Unable to get children data. Code %s, Message: %s, in node %s with childrenCount %s at offset %s" % (dChildrenResp.status_code, dChildrenResp.text, 
                                                                                                                       node.get('id'),node.get('childrenCount'), str(start)));
                        if dChildrenResp.status_code != 401:   # error other then token expired
                            break 
                        accessToken = getToken();     # token refresh on token expired
                        dChildrenResp = requests.get(NEWS_ENDPOINT + "/" + str(node.get('id')) + news_param2 + str(start) + news_param3, headers = {"Authorization": "Bearer " + accessToken});
                                    
                    jCResp = json.loads(dChildrenResp.text);
                    processWithChildren(dChildrenResp, jCResp, node.get('id'))
                    
                    if not "next" in jCResp["meta"]: 
#                        print("*next = False");
                        nextExists = False;
                    else:
                        print("*in node %s next is not False " % (node.get('id')))
                        start = start + step_size


In [None]:
processWithChildren(dResp, jResp,'')

### Request Next on News Metadata and (optionally) Save to File

In [None]:
nf = open(NEWS_META_FILE, "w+"); #DBG 
nf.write(json.dumps(jResp, indent=2)) #DBG 
    
#print("Next= " + jResp["meta"]["next"])

news_param2 = "?cursor=" 
while jResp["meta"]["next"]:   #not empty
    print("Next= " + jResp["meta"]["next"])
    dResp = requests.get(NEWS_ENDPOINT + news_param2 + jResp["meta"]["next"] , headers = {"Authorization": "Bearer " + accessToken});

    if dResp.status_code != 200:   #
        print("Unable to get data. Code %s, Message: %s" % (dResp.status_code, dResp.text));
        if dResp.status_code != 401:   # error other then token expired
            break 
        accessToken = getToken();     # token refresh on token expired
        dResp = requests.get(NEWS_ENDPOINT + news_param2 + jResp["meta"]["next"] , headers = {"Authorization": "Bearer " + accessToken});
            
    print("Resource access successful")
    # Display data
    jResp = json.loads(dResp.text);
#    print(json.dumps(jResp, indent=2));
    processWithChildren(dResp, jResp,'')
        
    nf.write(json.dumps(jResp, indent=2)) #DBG
nf.close()  #DBG

print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<DONE child processing >>>>>>>>>>>>>>>>>>>>>>>>>>")

### Process into Tree Form

In [None]:
from anytree import Node, RenderTree

# keeping track of the progress prior to removing a few duplicates
print("nodesWithoutParents length=" + str(len(nodesWithoutParents)) + ", nodesWithParents length=" + str(len(nodesWithParents)))
    
for node in nodesWithoutParents:
    node['treenode'] = Node(node.get('id')) 
    
for node in nodesWithParents:
    node['treenode'] = Node(node.get('id')) 
    
for node in nodesWithParents:
    found = False
    for nWithp in nodesWithParents:
        if node.get('parentId') == nWithp.get('id'):
            node['treenode'].parent = nWithp.get('treenode')  
            found = True
            break
    if not found:
        for nWithoutp in nodesWithoutParents:
            if node.get('parentId') == nWithoutp.get('id'):
                node['treenode'].parent = nWithoutp.get('treenode')  
                found = True
                break
    if not found:
        node['treenode'] = Node(node.get('id'))
        print("ORPHAN ? " + node.get('id'))
        
# check for top-levels that are not really top level, just happened to be first
for index, node in enumerate(nodesWithoutParents):
    if any(nd.get('id') == node.get('id') for nd in nodesWithParents):
#        remove mislabeled top-level        
        nodesWithoutParents.remove(node) 
#        print("Mislabeled empty top-level removed"+ str(node))
        
for node in nodesWithoutParents:
    print(RenderTree(node.get('treenode')))    