# Exploring RDP News Metadata with JSON Viewer
Get started by absorbing valid RDP credentials

In [None]:
%run ./credentials.ipynb

##  Setup for RDP API Access

In [None]:
import requests, json, time, getopt, sys, pandas

# Application Constants
RDP_version = "/v1"
base_URL = "https://api.refinitiv.com"
category_URL = "/auth/oauth2"
endpoint_URL = "/token"
CLIENT_SECRET = ""
SCOPE = "trapi"
TOKEN_FILE = "token.txt"
TOKEN_ENDPOINT = base_URL + category_URL + RDP_version + endpoint_URL

## Obtain a Valid RDP Authentication Token 

In [None]:
def _requestNewToken(refreshToken):
    if refreshToken is None:
        tData = {
            "username": RDP_LOGIN,
            "password": RDP_PASSWORD,
            "grant_type": "password",
            "scope": SCOPE,
            "takeExclusiveSignOnControl": "true"
        };
    else:
        tData = {
            "refresh_token": refreshToken,
            "grant_type": "refresh_token",
        };

    # Make a REST call to get latest access token
    response = requests.post(
        TOKEN_ENDPOINT,
        headers = {
            "Accept": "application/json"
        },
        data = tData,
        auth = (
            APP_KEY,
            CLIENT_SECRET
        )
    )
    
    if response.status_code != 200:
        raise Exception("Failed to get access token {0} - {1}".format(response.status_code, response.text));

    # Return the new token
    return json.loads(response.text);

def saveToken(tknObject):
    tf = open(TOKEN_FILE, "w+");
    print("Saving the new token");
    # Append the expiry time to token
    tknObject["expiry_tm"] = time.time() + int(tknObject["expires_in"]) - 10;
    # Store it in the file
    json.dump(tknObject, tf, indent=4)

def getToken():
    try:
        print("Reading the token from: " + TOKEN_FILE);
        # Read the token from a file
        tf = open(TOKEN_FILE, "r+")
        tknObject = json.load(tf);

        # Is access token valid
        if tknObject["expiry_tm"] > time.time():
            # return access token
            return tknObject["access_token"];

        print("Token expired, refreshing a new one...");
        tf.close();
        # Get a new token from refresh token
        tknObject = _requestNewToken(tknObject["refresh_token"]);

    except Exception as exp:
        print("Caught exception: " + str(exp))
        print("Getting a new token using Password Grant...");
        tknObject = _requestNewToken(None);

    # Persist this token for future queries
    saveToken(tknObject)
    print("Token is: " + tknObject["access_token"])
    # Return access token
    return tknObject["access_token"];    
    
token = getToken()
token

## Request N News Headlines

In [None]:
def getHeadlines(query, numLines, date_from='', date_to='', return_as_text=False):
    news_category_URL = "/data/news"
    headlines_endpoint_URL = "/headlines?query="

    REQUEST_URL = base_URL + news_category_URL + RDP_version + headlines_endpoint_URL+ query +"&limit="+ str(numLines)
    if date_from != '':
        REQUEST_URL = REQUEST_URL + '&dateFrom='+date_from
    if date_to != '':
        REQUEST_URL = REQUEST_URL + '&dateTo='+date_to
    
    accessToken = getToken();
    print("Requesting: ",REQUEST_URL)
    
    acceptValue = "*/*"
    dResp = requests.get(REQUEST_URL, headers = {"Authorization": "Bearer " + accessToken, "Accept": acceptValue});
    
    if dResp.status_code != 200:
        print("Unable to get data. Code %s, Message: %s" % (dResp.status_code, dResp.text));
        if dResp.status_code != 401:   # error other then token expired
            return("Error "+str(dResp.status_code)) 
        accessToken = getToken();     # token refresh on token expired
        dResp = requests.get(REQUEST_URL, headers = {"Authorization": "Bearer " + accessToken, "Accept": acceptValue});
        if dResp.status_code == 200:
            print("Resource access successful")
    else:
        print("Resource access successful")
    if return_as_text:
        return dResp.text
    jResp = json.loads(dResp.text);
    dfH = pandas.json_normalize(jResp,record_path =['data'])
    return dfH
N = 50
#query = 'R:CLc1 AND Language:LEN';
query = 'AAPL.O, GOOG.O';
dfHeadlines = getHeadlines(query,N)
dfHeadlines

## Request a Story - Define a Function  

In [None]:
def getStory(storyId, jsonOrHtml):
    news_category_URL = "/data/news"
    story_endpoint_URL = "/stories"

    REQUEST_URL = base_URL + news_category_URL + RDP_version + story_endpoint_URL+ "/" + storyId 

    accessToken = getToken();
    print("Requesting: ",REQUEST_URL)
    
    acceptValue = "application/json"
    if jsonOrHtml != True:
        acceptValue = "text/html"
    dResp = requests.get(REQUEST_URL, headers = {"Authorization": "Bearer " + accessToken, "Accept": acceptValue});
    if dResp.status_code != 200:
        print("Unable to get data. Code %s, Message: %s" % (dResp.status_code, dResp.text));
        if dResp.status_code != 401:   # error other then token expired
            return("") 
        accessToken = getToken();     # token refresh on token expired
    else:
        print("Resource access successful")
        return dResp.text

## Request Story in Json

In [None]:
txt = getStory('urn:newsml:reuters.com:20210722:nDJR2R6XjM:1',True)
#txt = getStory('urn:newsml:reuters.com:20210811:nL1N2PI1YN:9')
#txt = getStory('urn:newsml:reuters.com:20210811:nTNF29sf:1', True)
jResp = json.loads(txt);
print(json.dumps(jResp, indent=2));

## Request Story for Display

In [None]:
txt = getStory('urn:newsml:reuters.com:20210722:nDJR2R6XjM:1', False)
print('********* story text')
print(txt)  
print('********* story HTML')
from IPython.core.display import display, HTML
display(HTML(txt))

## Request Stories Corresponding to Headlines by StoryID
First we just display the stories

In [None]:
dfHeadlines
for index, row in dfHeadlines.iterrows():
    txt = getStory(row['storyId'], False)
    display(HTML(txt))

Request stories by story ID, and if content metadata is included with the story, derive and store news sentiment ratings of the story

In [None]:
dfHeadlines
listDfStories = []
for index, row in dfHeadlines.iterrows():
    fullJsonOut = getStory(row['storyId'], True)
    if fullJsonOut == "":
        break
    jResp = json.loads(fullJsonOut);
    print(json.dumps(jResp, indent=4));
   
    # store storyJson with the headline
    dfHeadlines.loc[index, 'storyJson'] = fullJsonOut; 
    
    # if ratings are available in metadata, parse them and store them with the headline
    try:
        content_meta = jResp['newsItem'].get('contentMeta')
        if content_meta: #metedata exists
            content_meta_ex_property = content_meta.get('contentMetaExtProperty')
            if content_meta_ex_property:
                for ex_property in content_meta_ex_property:
                        if 'hasSentiment' in ex_property['_rel']:  #sentiment exists
                            dfHeadlines.loc[index, ex_property['_rel']] = ex_property['_value']
                        else:
                            dfHeadlines.loc[index, ex_property['_rel']] = 'No sentiment'
    except:
        print('&&&EXCEPTED&&& on item:')
        print(json.dumps(jResp, indent=4));
        
     # put json into dataframe
    dfS = pandas.json_normalize(jResp) 
    # collect a list of story dataframes
    listDfStories.append(dfS)
dfHeadlines
dfStories = pandas.concat(listDfStories)  #turn into a dataframe

## See which of the Stories Were Rated

In [None]:
pandas.set_option('display.max_rows', None)
dfHeadlines
dfHeadlines[['storyId','extCptRel:hasSentimentPositive','extCptRel:hasSentimentNegative','extCptRel:hasSentimentNeutral']]

## Request per StoryId and Display Story in HTML

In [None]:
html_story = getStory('urn:newsml:reuters.com:20210722:nDJR2R6XjM:1',False)
html_story

In [None]:
HTML(html_story)

##  Get Headlines with Parameters

In [None]:
dfHeadlinesParam = getHeadlines('Google', 50, date_from='2019-10-01T00:00:00Z', date_to='2020-11-15T00:00:00Z')
dfHeadlinesParam

## Convenient Retrieval with RDP Library

In [None]:
import refinitiv.dataplatform as rdp
from IPython.display import HTML

%run ./credentials.ipynb

In [None]:
rdp.open_platform_session(
    APP_KEY, 
    rdp.GrantPassword(
        username = RDP_LOGIN, 
        password = RDP_PASSWORD
    )
)

In [None]:
rdp.get_news_headlines(query = 'Google', count = 1000, date_from='2019-10-01', date_to='2020-11-15', sort_order='oldToNew' )

##  Inspect Signature of a method (get_news_headlines)

In [None]:
from inspect import signature
t = signature(rdp.get_news_headlines)
print(t)

## Examine Single Story with Json Viewer
We are going to use
https://github.com/AtsushiSakai/PyJSONViewer

It is perhaps more interesting to examine one of the stories that were rated, 

In [None]:
import pyjsonviewer
txt = getStory('urn:newsml:reuters.com:20211020:nL1N2RG24B:5',True)
print(txt)
jResp = json.loads(txt);
pyjsonviewer.view_data(json_data=jResp)

... we close the viewer before moving to the next steps

## Examine Headlines with Relevant Stories Data with Json Viewer
1. Convert headlines dataframe to json
2. Load json
3. Submit it to Json Viewer

In [None]:
jsn = dfHeadlines.to_json(orient="records")
jsn.replace("\\\"", "\"").strip()
print(jsn)
jResp = json.loads(jsn);
import pyjsonviewer
pyjsonviewer.view_data(json_data=jResp)

## Examine Stories Metadata with Json Viewer

In [None]:
jsn = dfStories.to_json(orient="records")
#print(jsn)
jResp = json.loads(jsn);
import pyjsonviewer
pyjsonviewer.view_data(json_data=jResp)