In [26]:
#====================================================================================
#On Demand request demo code, using the following steps:
# - Authentication token request
# - On Demand extraction request
# - Extraction status polling request
#   Extraction notes retrieval
# - Data retrieval and save to disk (the data file is gzipped)
#   Includes AWS download capability
#====================================================================================

#Set these parameters before running the code:
filePath = "/Users/xxxxxxxx/Documents/content/rth"  #Location to save downloaded files
fileNameRoot = "RTH_Python"     #Root of the name for the downloaded files
CREDENTIALS_FILE = "credential.ini"
useAws = True
#Set the last parameter above to:
# False to download from RTH servers
# True to download from Amazon Web Services cloud (recommended, it is faster)

#Imports:
import requests
import json
import shutil
import time
import urllib3
import gzip
import configparser
import os

# Check whether the specified path exists or not
isExist = os.path.exists(filePath)
if not isExist:
    # Create a new directory because it does not exist
    os.makedirs(filePath)
    print("The new directory is created!")
#====================================================================================
#Step 1: token request

# Read credentials from file
try:
    print("Read credentials from file")

    config = configparser.ConfigParser()
    config.read(CREDENTIALS_FILE)

    myUsername = config["RTH"]["username"]
    myPassword = config["RTH"]["password"]

except Exception as e:
    print(
        f"""Error message : {e}
        Cannot get credentials from a file, please create the credentials file as 'credential.ini' with the format below, 
            [RTH]
            username = YOUR_USERNAME
            password = YOUR_PASSWORD"""
    )

requestUrl = "https://selectapi.datascope.refinitiv.com/RestApi/v1/Authentication/RequestToken"

requestHeaders={
    "Prefer":"respond-async",
    "Content-Type":"application/json"
    }

requestBody={
    "Credentials": {
    "Username": myUsername,
    "Password": myPassword
  }
}

r1 = requests.post(requestUrl, json=requestBody,headers=requestHeaders)

if r1.status_code == 200 :
    jsonResponse = json.loads(r1.text.encode('ascii', 'ignore'))
    token = jsonResponse["value"]
    print ('Authentication token (valid 24 hours):')
    print (token)
else:
    print ('An error occurred, error status code: ' + r1.status_code)

Read credentials from file
Authentication token (valid 24 hours):
_Js2UPj_TkfWwiOt8TBWRbAifz3FLudoYAGcoCDTrGUd99yI6xvAV7LPWm-HNuYPbtUjS0Na4e0-HzCGsMsDdMYbBbWwsxFYHtjZcfK_zXDh6O8ORBC1ERIrHA6MUOWnmJigNzwvRp8FkP08wi7xeO-vSQRDAgBR5MUhsZyoQXmaNGrWvXelMpPieFvFZ0MgqQE_z-_8ImB36NDaiaW74_pFqpn-tmIEyClM1lYdlPN8OUfhHNhe-QuPyiL3YPUVOg2Bjijp8pPLooMSDrtLIv84cYQIPbUqlRwi9pVs0lgY


In [27]:
#Step 2: send an on demand extraction request using the received token 

requestUrl='https://selectapi.datascope.refinitiv.com/RestApi/v1/Extractions/ExtractRaw'

requestHeaders={
    "Prefer":"respond-async",
    "Content-Type":"application/json",
    "Authorization": "token " + token
}

requestBody={
  "ExtractionRequest": {
    "@odata.type": "#DataScope.Select.Api.Extractions.ExtractionRequests.TickHistoryTimeAndSalesExtractionRequest",
    "ContentFieldNames": [
      "Auction - Ask Price",
      "Auction - Bid Price",
      "Auction - Price",
      "Correction - Ask Price",
      "Correction - Bid Price",
      "Correction - Original Price",
      "Correction - Price",
      "Quote - Ask Price",
      "Quote - Bench Price",
      "Quote - Bid Price",
      "Quote - Fair Price",
      "Quote - Far Clearing Price",
      "Quote - Freight Price",
      "Quote - Invoice Price",
      "Quote - Lower Limit Price",
      "Quote - Mid Price",
      "Quote - Near Clearing Price",
      "Quote - Price",
      "Quote - Reference Price",
      "Quote - Theoretical Price",
      "Quote - Theoretical Price Ask",
      "Quote - Theoretical Price Bid",
      "Quote - Theoretical Price Mid",
      "Quote - Upper Limit Price",
      "Settlement Price - Date",
      "Settlement Price - Price",
      "Trade - Ask Price",
      "Trade - Average Price",
      "Trade - Bench Price",
      "Trade - Bid Price",
      "Trade - Freight Price",
      "Trade - Indicative Auction Price",
      "Trade - Lower Limit Price",
      "Trade - Mid Price",
      "Trade - Odd-Lot Trade Price",
      "Trade - Original Price",
      "Trade - Price",
      "Trade - Trade Price Currency",
      "Trade - Upper Limit Price",
      "Auction - Volume",
      "Correction - Accumulated Volume",
      "Correction - Original Accumulated Volume",
      "Correction - Original Volume",
      "Correction - Volume",
      "Quote - Volume",
      "Trade - Accumulated Volume",
      "Trade - Advancing Volume",
      "Trade - Declining Volume",
      "Trade - Exchange For Physical Volume",
      "Trade - Exchange For Swaps Volume",
      "Trade - Fair Value Volume",
      "Trade - Indicative Auction Volume",
      "Trade - Odd-Lot Trade Volume",
      "Trade - Original Volume",
      "Trade - Total Buy Volume",
      "Trade - Total Sell Volume",
      "Trade - Total Volume",
      "Trade - Unchanged Volume",
      "Trade - Volume"
    ],
    "IdentifierList": {
      "@odata.type": "#DataScope.Select.Api.Extractions.ExtractionRequests.InstrumentIdentifierList",
      "InstrumentIdentifiers": [
        {
          "Identifier": "0005.HK",
          "IdentifierType": "Ric"
        }
      ],
      "ValidationOptions": None,
      "UseUserPreferencesForValidationOptions": False
    },
    "Condition": {
      "MessageTimeStampIn": "GmtUtc",
      "ApplyCorrectionsAndCancellations": False,
      "ReportDateRangeType": "Range",
      "QueryStartDate": "2023-01-03T05:00:00.000-05:00",
      "QueryEndDate": "2023-01-03T06:00:00.000-05:00",
      "DisplaySourceRIC": False
    }
  }
}

r2 = requests.post(requestUrl, json=requestBody,headers=requestHeaders)

#Display the HTTP status of the response
#Initial response status (after approximately 30 seconds wait) is usually 202
status_code = r2.status_code
print ("HTTP status of the response: " + str(status_code))

HTTP status of the response: 202


In [28]:
#Step 3: if required, poll the status of the request using the received location URL.
#Once the request has completed, retrieve the jobId and extraction notes.

#If status is 202, display the location url we received, and will use to poll the status of the extraction request:
if status_code == 202 :
    requestUrl = r2.headers["location"]
    print ('Extraction is not complete, we shall poll the location URL:')
    print (str(requestUrl))
    
    requestHeaders={
        "Prefer":"respond-async",
        "Content-Type":"application/json",
        "Authorization":"token " + token
    }

#As long as the status of the request is 202, the extraction is not finished;
#we must wait, and poll the status until it is no longer 202:
while (status_code == 202):
    print ('As we received a 202, we wait 30 seconds, then poll again (until we receive a 200)')
    time.sleep(30)
    r3 = requests.get(requestUrl,headers=requestHeaders)
    status_code = r3.status_code
    print ('HTTP status of the response: ' + str(status_code))

#When the status of the request is 200 the extraction is complete;
#we retrieve and display the jobId and the extraction notes (it is recommended to analyse their content)):
if status_code == 200 :
    r3Json = json.loads(r3.text.encode('ascii', 'ignore'))
    jobId = r3Json["JobId"]
    print ('\njobId: ' + jobId + '\n')
    notes = r3Json["Notes"]
    print ('Extraction notes:\n' + notes[0])

#If instead of a status 200 we receive a different status, there was an error:
if status_code != 200 :
    print ('An error occured. Try to run this cell again. If it fails, re-run the previous cell.\n')

Extraction is not complete, we shall poll the location URL:
https://selectapi.datascope.refinitiv.com/RestApi/v1/Extractions/ExtractRawResult(ExtractionId='0x08734a37c688bb8e')
As we received a 202, we wait 30 seconds, then poll again (until we receive a 200)
HTTP status of the response: 200

jobId: 0x08734a37c688bb8e

Extraction notes:
Extraction Services Version 17.1.45473 (85dddf1679ec), Built Apr 13 2023 14:29:24
User ID: 9029477
Extraction ID: 2000000548529710
Correlation ID: CiD/9029477/0x0000000000000000/RESTAPI-I/EXT.2000000548529710
Schedule: 0x08734a37c688bb8e (ID = 0x0000000000000000)
Input List (1 items):  (ID = 0x08734a37c688bb8e) Created: 28/04/2023 12:57:09 Last Modified: 28/04/2023 12:57:09
Report Template (59 fields): _OnD_0x08734a37c688bb8e (ID = 0x08734a37c698bb8e) Created: 28/04/2023 12:56:35 Last Modified: 28/04/2023 12:56:35
Schedule dispatched via message queue (0x08734a37c688bb8e), Data source identifier (6682B8EA440843CFAE3953D3FE3CFCDE)
Schedule Time: 28/04/20

In [29]:
#Step 4: get the extraction results, using the received jobId.
#We also save the compressed data to disk, as a GZIP.
#We only display a few lines of the data.

#IMPORTANT NOTE:
#This code should not fail even with large data sets.
#If you need to manipulate the data, read and decompress the file, instead of decompressing
#data from the server on the fly.
#This is the recommended way to proceed, to avoid data loss issues.

requestUrl = "https://selectapi.datascope.refinitiv.com/RestApi/v1/Extractions/RawExtractionResults" + "('" + jobId + "')" + "/$value"

#AWS requires an additional header: X-Direct-Download
if useAws:
    requestHeaders={
        "Prefer":"respond-async",
        "Content-Type":"text/plain",
        "Accept-Encoding":"gzip",
        "X-Direct-Download":"true",
        "Authorization": "token " + token
    }
else:
    requestHeaders={
        "Prefer":"respond-async",
        "Content-Type":"text/plain",
        "Accept-Encoding":"gzip",
        "Authorization": "token " + token
    }

r4 = requests.get(requestUrl,headers=requestHeaders,stream=True)
#Ensure we do not automatically decompress the data on the fly:
r4.raw.decode_content = False
if useAws:
    print ('Content response headers (AWS server): type: ' + r4.headers["Content-Type"] + '\n')
    #AWS does not set header Content-Encoding="gzip".
else:
    print ('Content response headers (TRTH server): type: ' + r4.headers["Content-Type"] + ' - encoding: ' + r4.headers["Content-Encoding"] + '\n')

#Next 2 lines display some of the compressed data, but if you uncomment them save to file fails
#print ('20 bytes of compressed data:')
#print (r4.raw.read(20))

print ('Saving compressed data to file:' + fileName + ' ... please be patient')
fileName = filePath + '/' + fileNameRoot + ".csv.gz"
chunk_size = 1024
rr = r4.raw
with open(fileName, 'wb') as fd:
    shutil.copyfileobj(rr, fd, chunk_size)
fd.close

print ('Finished saving compressed data to file:' + fileName + '\n')

#Now let us read and decompress the file we just created.
#For the demo we limit the treatment to a few lines:
maxLines = 10
print ('Read data from file, and decompress at most ' + str(maxLines) + ' lines of it:')

uncompressedData = ""
count = 0
with gzip.open(fileName, 'rb') as fd:
    for line in fd:
        dataLine = line.decode("utf-8")
        #Do something with the data:
        print (dataLine)
        uncompressedData = uncompressedData + dataLine
        count += 1
        if count >= maxLines:
            break
fd.close()

#Note: variable uncompressedData stores all the data.
#This is not a good practice, that can lead to issues with large data sets.
#We only use it here as a convenience for the next step of the demo, to keep the code very simple.
#In production one would handle the data line by line (as we do with the screen display)

Content response headers (AWS server): type: application/gzip

Saving compressed data to file:/Users/U6086063/Documents/content/rth/RTH_Python.step5.csv.gz ... please be patient
Finished saving compressed data to file:/Users/U6086063/Documents/content/rth/RTH_Python.csv.gz

Read data from file, and decompress at most 10 lines of it:
#RIC,Domain,Date-Time,GMT Offset,Type,Price,Volume,Bid Price,Ask Price,UpLim Price,LoLim Price,Theo. Price,Freight Pr.,Date,Bench Price,Acc. Volume,Mid Price,Advancing Volume,Declining Volume,Unchanged Volume,Total Volume,Original Price,Original Volume,Reference Price,Far Clearing Price,Near Clearing Price,Fair Price,Average Price,Exch For Physical Vol,Exch For Swaps Vol,Odd-Lot Trade Price,Odd-Lot Trade Volume,Original Acc. Volume,Theoretical Price Ask,Theoretical Price Bid,Theoretical Price Mid,Total Buy Volume,Total Sell Volume,Trade Price Currency,Fair Value Volume,Indicative Auction Price,Indicative Auction Volume,Invoice Price

0005.HK,Market Price,20

In [30]:
#Step 5 (cosmetic): formating the response received in step 4 or 5 using a panda dataframe

from io import StringIO
import pandas as pd

TimeAndSales = pd.read_csv(StringIO(uncompressedData))
TimeAndSales

Unnamed: 0,#RIC,Domain,Date-Time,GMT Offset,Type,Price,Volume,Bid Price,Ask Price,UpLim Price,...,Theoretical Price Ask,Theoretical Price Bid,Theoretical Price Mid,Total Buy Volume,Total Sell Volume,Trade Price Currency,Fair Value Volume,Indicative Auction Price,Indicative Auction Volume,Invoice Price
0,0005.HK,Market Price,2023-01-03T03:00:03.327070204Z,8,Trade,48.3,4000.0,48.2,48.3,,...,,,,,,,,,,
1,0005.HK,Market Price,2023-01-03T03:00:03.327823431Z,8,Quote,,,48.2,48.3,,...,,,,,,,,,,
2,0005.HK,Market Price,2023-01-03T03:00:04.258999935Z,8,Quote,,,48.2,48.3,,...,,,,,,,,,,
3,0005.HK,Market Price,2023-01-03T03:00:06.559008049Z,8,Quote,,,48.2,48.3,,...,,,,,,,,,,
4,0005.HK,Market Price,2023-01-03T03:00:07.559246384Z,8,Quote,,,48.2,48.3,,...,,,,,,,,,,
5,0005.HK,Market Price,2023-01-03T03:00:07.667019126Z,8,Quote,,,48.2,48.3,,...,,,,,,,,,,
6,0005.HK,Market Price,2023-01-03T03:00:08.155050101Z,8,Trade,48.2,800.0,48.2,48.3,,...,,,,,,,,,,
7,0005.HK,Market Price,2023-01-03T03:00:08.155635248Z,8,Quote,,,48.2,48.3,,...,,,,,,,,,,
8,0005.HK,Market Price,2023-01-03T03:00:08.159014361Z,8,Quote,,,48.2,48.3,,...,,,,,,,,,,
