# Requesting ESG Bulk PIT Content Set- Python

ESG stands for Environmental, Social and (Corporate) Governance data.

Refinitiv Data Platform (RDP) provides simple web based API access to a broad range of content, including ESG content and ESG content in bulk.

PIT content is newly made available on RDP, we would like to discuss the recommended approach to working with this content set. 

In [64]:
import requests, json, time, getopt, sys
import pandas as pd

### Set Valid Credentials 

Valid RDP credentials are required to proceed:
* USERNAME
* PASSWORD
* CLIENTID

To read one's valid credentials from a file (that can be shared by many code examples), leave below code as is.

To provide credentials in place:
* replace the commented credentials with one's valid assigned credentials
* comment the read from file step readCredsFromFile

In [65]:
USERNAME = "VALIDUSER"
PASSWORD = "VALIDPASSWORD"
CLIENT_ID = "SELFGENERATEDCLIENTID"

def readCredsFromFile(filePathName):
### Read valid credentials from file
    global USERNAME, PASSWORD, CLIENT_ID
    credFile = open(filePathName,"r")    # one per line
                                                #--- RDP MACHINE ID---
                                                #--- LONG PASSWORD---
                                                #--- GENERATED CLIENT ID---

    USERNAME = credFile.readline().rstrip('\n')
    PASSWORD = credFile.readline().rstrip('\n')
    CLIENT_ID = credFile.readline().rstrip('\n')

    credFile.close()

readCredsFromFile("..\creds\credFileHuman.txt")

# Uncomment - to make sure that creds are either set in code or read in correctly
#print("USERNAME="+str(USERNAME))
#print("PASSWORD="+str(PASSWORD))
#print("CLIENT_ID="+str(CLIENT_ID))

### Set Application Constants

In [66]:
# Set Application Constants
RDP_AUTH_VERSION = "/v1"
RDP_ESG_BULK_VERSION = "/v1"
RDP_BASE_URL = "https://api.refinitiv.com"
RDP_ESG_PIT_BUCKET = "rft-bulk-esg"
CATEGORY_URL = "/auth/oauth2"
ENDPOINT_URL = "/token"
CLIENT_SECRET = ""
TOKEN_FILE = "token.txt"
SCOPE = "trapi"
FILESET_ID = ''
PACKAGE_ID = ''

### Define Token Handling and Obtain a Valid Token

Having a valid token is a pre-requisite to requesting of any RDP content, and will be passed into the next steps.

In [67]:
TOKEN_ENDPOINT = RDP_BASE_URL + CATEGORY_URL + RDP_AUTH_VERSION + ENDPOINT_URL

def _requestNewToken(refreshToken):
    if refreshToken is None:
        tData = {
            "username": USERNAME,
            "password": PASSWORD,
            "grant_type": "password",
            "scope": SCOPE,
            "takeExclusiveSignOnControl": "true"
        };
    else:
        tData = {
            "refresh_token": refreshToken,
            "grant_type": "refresh_token",
        };

    # Make a REST call to get latest access token
    response = requests.post(
        TOKEN_ENDPOINT,
        headers = {
            "Accept": "application/json"
        },
        data = tData,
        auth = (
            CLIENT_ID,
            CLIENT_SECRET
        )
    )
    
    if response.status_code != 200:
        raise Exception("Failed to get access token {0} - {1}".format(response.status_code, response.text));

    # Return the new token
    return json.loads(response.text);

def saveToken(tknObject):
    tf = open(TOKEN_FILE, "w+");
    print("Saving the new token");
    # Append the expiry time to token
    tknObject["expiry_tm"] = time.time() + int(tknObject["expires_in"]) - 10;
    # Store it in the file
    json.dump(tknObject, tf, indent=4)
    
def getToken():
    try:
        print("Reading the token from: " + TOKEN_FILE);
        # Read the token from a file
        tf = open(TOKEN_FILE, "r+")
        tknObject = json.load(tf);

        # Is access token valid
        if tknObject["expiry_tm"] > time.time():
            # return access token
            return tknObject["access_token"];

        print("Token expired, refreshing a new one...");
        tf.close();
        # Get a new token from refresh token
        tknObject = _requestNewToken(tknObject["refresh_token"]);

    except Exception as exp:
        print("Caught exception: " + str(exp))
        print("Getting a new token using Password Grant...");
        tknObject = _requestNewToken(None);

    # Persist this token for future queries
    saveToken(tknObject)
#    print("Token is: " + tknObject["access_token"])
    # Return access token
    return tknObject["access_token"];

accessToken = getToken();
print("Have token now");
print("Token is: " + accessToken)

Reading the token from: token.txt
Token expired, refreshing a new one...
Caught exception: Failed to get access token 400 - {"error":"invalid_grant"   } 
Getting a new token using Password Grant...
Saving the new token
Have token now
Token is: eyJ0eXAiOiJhdCtqd3QiLCJhbGciOiJSUzI1NiIsImtpZCI6ImRMdFd2Q0tCSC1NclVyWm9YMXFod2pZQ2t1eDV0V2ZSS2o4ME9vcjdUY28ifQ.eyJkYXRhIjoie1wiY2lwaGVydGV4dFwiOlwiSGl0RVBBTTFWYVlTeElUY2ZhVzl1ejZXeTRVTHZ1b2lnclV1bXZPeWF3ZW1NSDQ0Z3J3RDI3OGpJcHJ1MVl6MFI3NDRVc1pDOGdsUGY5a0JfRjFrbV9oUUJuLUVBTHExWDdqODlpdktYQ1ktUVRGdHhCWEtlSUtWOFhpYzU4OERzRVF5azUwVjhPbmNibFZYVTROQjNhZUFldGd3SWZ0RTJiYmczUmx2b2stZW5GTmdQOW40WGhCUVEtemJlUUxlZEFqc3o2TVVUZlBKQnlvdEZQSHZiZEttWTRjaGdNZFJWTTdBaHJyZTBZYTlWVEdVV0pMaU9Qd3oxMHdQRS1vSzJvRFlaMGpqQUxzTndySzNtUEVIazNJbjZ1YnNFMlIzSkx1R2Fpa1VnYUJaRlJYS0QzZkN6XzFqdFFwWXhmeElydG5MZkFLUFJZdnNUdXlkUHdyRXVyV0RyS3I3dzk0Z3l5RFE2cE1fS3F1TEtrR2lKTnpnNzBXYzR0cTlJVVlXMFlIX2IyU1pOeVZUWHdjTkZDWWxBa203SUxobmx6TWUxcDR2YlZ0N3ByTkJab2FDWVpRYzNTZ05nal8yTVh4blRDV2lNYmRtd

### Show all output

In [68]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Request Available ESG Bulk PIT File Sets per Package ID
PackageID assigned to PIT content set should be known prior, at this time it's '4173-aec7-8a0b0ac9-96f9-48e83ddbd2ad'

In [69]:
packageIdPIT = '4173-aec7-8a0b0ac9-96f9-48e83ddbd2ad'
FILESET_ENDPOINT = RDP_BASE_URL+'/file-store'+RDP_ESG_BULK_VERSION + '/file-sets?bucket='+ RDP_ESG_PIT_BUCKET

def requestFileSets(token, withNext, skipToken, attributes):   
    global FILESET_ENDPOINT
     
    
    print("Obtaining FileSets in ESG Bucket...")
  
    FILESET_ENDPOINT = RDP_BASE_URL+'/file-store'+RDP_ESG_BULK_VERSION + '/file-sets?bucket='+ RDP_ESG_PIT_BUCKET
    
    querystring = {}
    payload = ""
    jsonfull = ""
    jsonpartial = ""
    
    headers = {
            'Content-Type': "application/json",
            'Authorization': "Bearer " + token,
            'cache-control': "no-cache"
    }

    if attributes:
        FILESET_ENDPOINT = FILESET_ENDPOINT + attributes
    if withNext:
        FILESET_ENDPOINT = FILESET_ENDPOINT + '&skipToken=' +skipToken
    
    print('GET '+FILESET_ENDPOINT )    
    response = requests.request("GET", FILESET_ENDPOINT, data=payload, headers=headers, params=querystring)
    
    if response.status_code != 200:
        if response.status_code == 401:   # error when token expired
                accessToken = getToken();     # token refresh on token expired
                headers['Authorization'] = "Bearer " + accessToken
                response = requests.request("GET", FILESET_ENDPOINT, data=payload, headers=headers, params=querystring)
         
    print('Raw response=');
    print(response);
    
    if response.status_code == 200:
        jsonFullResp = json.loads(response.text)        
        return jsonFullResp; 
    else:
        return '';

jsonFullResp = requestFileSets(accessToken, False, '','&packageId='+packageIdPIT);

print('Parsed json response=');
print(json.dumps(jsonFullResp, indent=2));
print('Same response, tabular view');
dfPIT = pd.json_normalize(jsonFullResp['value'])
dfPIT

Obtaining FileSets in ESG Bucket...
GET https://api.refinitiv.com/file-store/v1/file-sets?bucket=rft-bulk-esg&packageId=4173-aec7-8a0b0ac9-96f9-48e83ddbd2ad
Raw response=
<Response [200]>
Parsed json response=
{
  "value": [
    {
      "id": "40df-b7c9-759513c8-997b-ff1c66266b66",
      "name": "RFT-ESG-PIT-SDI-2021-11-30",
      "bucketName": "rft-bulk-esg",
      "packageId": "4173-aec7-8a0b0ac9-96f9-48e83ddbd2ad",
      "attributes": [
        {
          "name": "ContentType",
          "value": "ESG PIT SDI Full"
        }
      ],
      "files": [
        "403e-8df7-fdbbbb41-b4c7-441a15d81ea1",
        "4048-8b15-178e99b0-9843-fbd388db4360",
        "4097-e6cb-a8f7d194-996e-f3c788fb0f93",
        "40f5-fe1c-b8109f48-a197-daa885b2b101",
        "412d-9b3b-0596cf5b-b583-ff6821dd30e2",
        "4142-a6e3-a869569b-a8d4-ce89c85a6598",
        "41a5-c001-e151826e-8fca-1c9ce88c2d8b",
        "4228-20b9-90b2c520-a5d5-bdc3f077d685",
        "4233-c194-c3ca8dbd-8e9b-175952a8b9da",
       

Unnamed: 0,id,name,bucketName,packageId,attributes,files,numFiles,availableFrom,status,created,modified
0,40df-b7c9-759513c8-997b-ff1c66266b66,RFT-ESG-PIT-SDI-2021-11-30,rft-bulk-esg,4173-aec7-8a0b0ac9-96f9-48e83ddbd2ad,"[{'name': 'ContentType', 'value': 'ESG PIT SDI...","[403e-8df7-fdbbbb41-b4c7-441a15d81ea1, 4048-8b...",105,2021-11-30T02:11:38Z,READY,2021-11-30T02:11:38Z,2021-11-30T08:31:34Z
1,422f-f5d3-f7b681d8-9aa4-296cc347e2ff,RFT-ESG-PIT-SDI-2021-11-03,rft-bulk-esg,4173-aec7-8a0b0ac9-96f9-48e83ddbd2ad,"[{'name': 'ContentType', 'value': 'ESG PIT SDI...","[4016-b825-3abcaf78-94d0-1bef0d6f5663, 4035-d7...",105,2021-11-03T09:21:48Z,READY,2021-11-03T09:21:48Z,2021-11-03T10:38:42Z
2,4276-c9a7-9813f3f2-bc16-7e633551dfe0,RFT-ESG-PIT-SDI-2021-11-09,rft-bulk-esg,4173-aec7-8a0b0ac9-96f9-48e83ddbd2ad,"[{'name': 'ContentType', 'value': 'ESG PIT SDI...","[4020-f98a-734f4320-a9a7-f8ca2519e66c, 402f-e4...",105,2021-11-09T07:27:02Z,READY,2021-11-09T07:27:02Z,2021-11-09T11:02:28Z
3,45ac-5634-dd4b09c1-b72f-a420da790b16,RFT-ESG-PIT-SDI-2021-11-18,rft-bulk-esg,4173-aec7-8a0b0ac9-96f9-48e83ddbd2ad,"[{'name': 'ContentType', 'value': 'ESG PIT SDI...","[4007-b245-b889e820-aa91-e794d3de05fb, 404c-e7...",105,2021-11-18T01:17:15Z,READY,2021-11-18T01:17:15Z,2021-11-18T05:43:02Z
4,4613-f597-9709e181-98ab-5323e09dfcbf,RFT-ESG-PIT-SDI-2021-11-22,rft-bulk-esg,4173-aec7-8a0b0ac9-96f9-48e83ddbd2ad,"[{'name': 'ContentType', 'value': 'ESG PIT SDI...","[4008-2cf2-eb77b3ef-811b-71409b3595cf, 4043-44...",105,2021-11-29T01:12:49Z,READY,2021-11-29T01:12:49Z,2021-11-29T03:28:29Z
5,47eb-256a-f2f718fd-9148-9ea390b6b71d,RFT-ESG-PIT-SDI-2021-10-28,rft-bulk-esg,4173-aec7-8a0b0ac9-96f9-48e83ddbd2ad,"[{'name': 'ContentType', 'value': 'ESG PIT SDI...","[4016-2a69-c8332278-b3ba-f14d3a3e4e66, 4016-b4...",100,2021-10-28T05:50:05Z,READY,2021-10-28T05:50:05Z,2021-10-28T07:37:24Z
6,4e39-72cf-b247b8b6-83f9-99b9cc32178f,RFT-ESG-PIT-SDI-2021-10-18,rft-bulk-esg,4173-aec7-8a0b0ac9-96f9-48e83ddbd2ad,"[{'name': 'ContentType', 'value': 'ESG PIT SDI...","[4006-2f51-619d1c7c-bdb3-519e12e1b4ab, 400f-33...",100,2021-10-18T06:21:17Z,READY,2021-10-18T06:21:17Z,2021-10-18T10:27:26Z


### Select ESG Fileset ID
We are going to select FileSetID of a Fileset that is most recent - with maximum "created" timestamp

In [70]:
dfPITlast = dfPIT[dfPIT.created == dfPIT.created.max()]
FILESET_ID = dfPITlast["id"].iloc[0]
print('FILESET_ID selected is: ' + FILESET_ID)

FILESET_ID selected is: 40df-b7c9-759513c8-997b-ff1c66266b66


### Request File IDs per selected Fileset ID

In [71]:
FILES_ENDPOINT_START = RDP_BASE_URL+'/file-store'+RDP_ESG_BULK_VERSION + '/files?filesetId='
 
def requestFileDetails(token, fileSetId, attributes, withNext, skipToken):   

    print("Obtaining File details for FileSet= "+ fileSetId + " ...")
    print("(If result is Response=400, make sure that fileSetId is set with a valid value...)")
    if withNext:
        FILES_ENDPOINT = RDP_BASE_URL + skipToken
    else:
        FILES_ENDPOINT = FILES_ENDPOINT_START + fileSetId
  
    if attributes:
        FILES_ENDPOINT = FILES_ENDPOINT + attributes
        
    querystring = {}
    payload = ""
    jsonfull = ""
    jsonpartial = ""
    
    headers = {
            'Content-Type': "application/json",
            'Authorization': "Bearer " + token,
            'cache-control': "no-cache"
    }
        
    response = requests.request("GET", FILES_ENDPOINT, data=payload, headers=headers, params=querystring)
    
    if response.status_code != 200:
        if response.status_code == 401:   # error when token expired
                accessToken = getToken();     # token refresh on token expired
                headers['Authorization'] = "Bearer " + accessToken
                response = requests.request("GET", FILES_ENDPOINT, data=payload, headers=headers, params=querystring)
         
    print('Raw response=');
    print(response);
    
    if response.status_code == 200:
        jsonFullResp = json.loads(response.text)        
        return jsonFullResp; 
    else:
        return '';

jsonFullResp = requestFileDetails(accessToken, FILESET_ID, '&pageSize=100', False, '');

print('Parsed json response=');
print(json.dumps(jsonFullResp, indent=2));
dfPart1 = pd.json_normalize(jsonFullResp['value'])
dfPart1

skipToken = jsonFullResp['@nextLink']
skipToken

jsonFullRespRemainder = requestFileDetails(accessToken, FILESET_ID, '&pageSize=100', True, skipToken);

print('Parsed json response=');
print(json.dumps(jsonFullRespRemainder, indent=2));
dfPart2 = pd.json_normalize(jsonFullRespRemainder['value'])
dfPart2

#Put the two results together
dfAll = dfPart1.append(dfPart2)
dfAll

Obtaining File details for FileSet= 40df-b7c9-759513c8-997b-ff1c66266b66 ...
(If result is Response=400, make sure that fileSetId is set with a valid value...)
Raw response=
<Response [200]>
Parsed json response=
{
  "value": [
    {
      "id": "403e-8df7-fdbbbb41-b4c7-441a15d81ea1",
      "filename": "EsgPITAnalyticValue.2003.F.2021-11-29-0751.zip",
      "filesetId": "40df-b7c9-759513c8-997b-ff1c66266b66",
      "storageLocation": {
        "url": "https://a206464-prod-esg.s3.amazonaws.com/ESGPIT/2021/11/29/EsgPITAnalyticValue.2003.F.2021-11-29-0751.zip",
        "@type": "s3"
      },
      "created": "2021-11-30T02:19:58Z",
      "modified": "2021-11-30T02:19:58Z",
      "href": "https://api.refinitiv.com/file-store/v1/files/403e-8df7-fdbbbb41-b4c7-441a15d81ea1/stream",
      "fileSizeInBytes": 1872447
    },
    {
      "id": "4048-8b15-178e99b0-9843-fbd388db4360",
      "filename": "EsgPITValueScores.2018.F.2021-11-29-0805.zip",
      "filesetId": "40df-b7c9-759513c8-997b-ff1c66

Unnamed: 0,id,filename,filesetId,created,modified,href,fileSizeInBytes,storageLocation.url,storageLocation.@type
0,403e-8df7-fdbbbb41-b4c7-441a15d81ea1,EsgPITAnalyticValue.2003.F.2021-11-29-0751.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:19:58Z,2021-11-30T02:19:58Z,https://api.refinitiv.com/file-store/v1/files/...,1872447,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
1,4048-8b15-178e99b0-9843-fbd388db4360,EsgPITValueScores.2018.F.2021-11-29-0805.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:25:31Z,2021-11-30T02:25:31Z,https://api.refinitiv.com/file-store/v1/files/...,33173187,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
2,4097-e6cb-a8f7d194-996e-f3c788fb0f93,EsgPITAnalyticValue.2007.F.2021-11-29-0752.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:25:09Z,2021-11-30T02:25:09Z,https://api.refinitiv.com/file-store/v1/files/...,4953633,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
3,40f5-fe1c-b8109f48-a197-daa885b2b101,EsgPITScores.2014.F.2021-11-29-0758.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:40:39Z,2021-11-30T02:40:39Z,https://api.refinitiv.com/file-store/v1/files/...,23958787,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
4,412d-9b3b-0596cf5b-b583-ff6821dd30e2,EsgPITScores.2017.F.2021-11-29-0759.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:43:31Z,2021-11-30T02:43:31Z,https://api.refinitiv.com/file-store/v1/files/...,66833428,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
...,...,...,...,...,...,...,...,...,...
95,4f2f-1fa8-b492fe12-9a9b-863d54fda9b5,EsgPITValues.2009.F.2021-11-29-0801.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:34:48Z,2021-11-30T02:34:48Z,https://api.refinitiv.com/file-store/v1/files/...,7026435,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
96,4f37-0327-e8003fbd-97bf-49b15b70e320,EsgPITAnalyticValueScore.2010.F.2021-11-29-075...,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:44:23Z,2021-11-30T02:44:23Z,https://api.refinitiv.com/file-store/v1/files/...,8947991,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
97,4f3f-910c-460e8c78-b0f9-6ac2744b80a8,EsgPITValueScores.2019.F.2021-11-29-0805.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:27:28Z,2021-11-30T02:27:28Z,https://api.refinitiv.com/file-store/v1/files/...,45257553,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
98,4f4d-421c-3d3cdc28-af1d-f3b6adfcee78,EsgPITScores.2008.F.2021-11-29-0757.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:34:40Z,2021-11-30T02:34:40Z,https://api.refinitiv.com/file-store/v1/files/...,8833520,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3


'/file-store/v1/files?pageSize=100&filesetId=40df-b7c9-759513c8-997b-ff1c66266b66&skipToken=U2Vla0lkPTRmODQtODkwZi00ZmIyNjQ4MS1hOThlLWNiYTFjNzI4OGFjMQ'

Obtaining File details for FileSet= 40df-b7c9-759513c8-997b-ff1c66266b66 ...
(If result is Response=400, make sure that fileSetId is set with a valid value...)
Raw response=
<Response [200]>
Parsed json response=
{
  "value": [
    {
      "id": "4f84-890f-4fb26481-a98e-cba1c7288ac1",
      "filename": "EsgPITValues.2005.F.2021-11-29-0801.zip",
      "filesetId": "40df-b7c9-759513c8-997b-ff1c66266b66",
      "storageLocation": {
        "url": "https://a206464-prod-esg.s3.amazonaws.com/ESGPIT/2021/11/29/EsgPITValues.2005.F.2021-11-29-0801.zip",
        "@type": "s3"
      },
      "created": "2021-11-30T02:32:09Z",
      "modified": "2021-11-30T02:32:09Z",
      "href": "https://api.refinitiv.com/file-store/v1/files/4f84-890f-4fb26481-a98e-cba1c7288ac1/stream",
      "fileSizeInBytes": 6406226
    },
    {
      "id": "4f8d-75f4-71217742-8d27-12b2ddfce262",
      "filename": "EsgPITAnalyticValueScore.2013.F.2021-11-29-0754.zip",
      "filesetId": "40df-b7c9-759513c8-997b-ff1c66266b66"

Unnamed: 0,id,filename,filesetId,created,modified,href,fileSizeInBytes,storageLocation.url,storageLocation.@type
0,4f84-890f-4fb26481-a98e-cba1c7288ac1,EsgPITValues.2005.F.2021-11-29-0801.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:32:09Z,2021-11-30T02:32:09Z,https://api.refinitiv.com/file-store/v1/files/...,6406226,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
1,4f8d-75f4-71217742-8d27-12b2ddfce262,EsgPITAnalyticValueScore.2013.F.2021-11-29-075...,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:48:11Z,2021-11-30T02:48:11Z,https://api.refinitiv.com/file-store/v1/files/...,10908118,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
2,4fd6-cf9e-0a017ad5-b0d9-bfaa8ca084d7,EsgPITValueScores.2004.F.2021-11-29-0803.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:50:41Z,2021-11-30T02:50:41Z,https://api.refinitiv.com/file-store/v1/files/...,2348092,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
3,4ff1-987b-c84b64ba-8ccb-8ee4713bb64a,EsgPITScores.2021.F.2021-11-29-0801.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:46:36Z,2021-11-30T02:46:36Z,https://api.refinitiv.com/file-store/v1/files/...,2080868,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
4,4ffb-dc46-69e60ffe-924f-3a396286bc55,EsgPITValueScores.2010.F.2021-11-29-0803.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:19:16Z,2021-11-30T02:19:16Z,https://api.refinitiv.com/file-store/v1/files/...,6336145,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3


Unnamed: 0,id,filename,filesetId,created,modified,href,fileSizeInBytes,storageLocation.url,storageLocation.@type
0,403e-8df7-fdbbbb41-b4c7-441a15d81ea1,EsgPITAnalyticValue.2003.F.2021-11-29-0751.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:19:58Z,2021-11-30T02:19:58Z,https://api.refinitiv.com/file-store/v1/files/...,1872447,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
1,4048-8b15-178e99b0-9843-fbd388db4360,EsgPITValueScores.2018.F.2021-11-29-0805.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:25:31Z,2021-11-30T02:25:31Z,https://api.refinitiv.com/file-store/v1/files/...,33173187,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
2,4097-e6cb-a8f7d194-996e-f3c788fb0f93,EsgPITAnalyticValue.2007.F.2021-11-29-0752.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:25:09Z,2021-11-30T02:25:09Z,https://api.refinitiv.com/file-store/v1/files/...,4953633,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
3,40f5-fe1c-b8109f48-a197-daa885b2b101,EsgPITScores.2014.F.2021-11-29-0758.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:40:39Z,2021-11-30T02:40:39Z,https://api.refinitiv.com/file-store/v1/files/...,23958787,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
4,412d-9b3b-0596cf5b-b583-ff6821dd30e2,EsgPITScores.2017.F.2021-11-29-0759.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:43:31Z,2021-11-30T02:43:31Z,https://api.refinitiv.com/file-store/v1/files/...,66833428,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
...,...,...,...,...,...,...,...,...,...
0,4f84-890f-4fb26481-a98e-cba1c7288ac1,EsgPITValues.2005.F.2021-11-29-0801.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:32:09Z,2021-11-30T02:32:09Z,https://api.refinitiv.com/file-store/v1/files/...,6406226,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
1,4f8d-75f4-71217742-8d27-12b2ddfce262,EsgPITAnalyticValueScore.2013.F.2021-11-29-075...,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:48:11Z,2021-11-30T02:48:11Z,https://api.refinitiv.com/file-store/v1/files/...,10908118,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
2,4fd6-cf9e-0a017ad5-b0d9-bfaa8ca084d7,EsgPITValueScores.2004.F.2021-11-29-0803.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:50:41Z,2021-11-30T02:50:41Z,https://api.refinitiv.com/file-store/v1/files/...,2348092,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
3,4ff1-987b-c84b64ba-8ccb-8ee4713bb64a,EsgPITScores.2021.F.2021-11-29-0801.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:46:36Z,2021-11-30T02:46:36Z,https://api.refinitiv.com/file-store/v1/files/...,2080868,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3


### Identify All the Latest Initialization files
Select .F files

In [81]:
dfFull = dfAll.loc[dfAll['filename'].str.contains('\.F\.')]
dfFull

Unnamed: 0,id,filename,filesetId,created,modified,href,fileSizeInBytes,storageLocation.url,storageLocation.@type
0,403e-8df7-fdbbbb41-b4c7-441a15d81ea1,EsgPITAnalyticValue.2003.F.2021-11-29-0751.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:19:58Z,2021-11-30T02:19:58Z,https://api.refinitiv.com/file-store/v1/files/...,1872447,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
1,4048-8b15-178e99b0-9843-fbd388db4360,EsgPITValueScores.2018.F.2021-11-29-0805.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:25:31Z,2021-11-30T02:25:31Z,https://api.refinitiv.com/file-store/v1/files/...,33173187,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
2,4097-e6cb-a8f7d194-996e-f3c788fb0f93,EsgPITAnalyticValue.2007.F.2021-11-29-0752.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:25:09Z,2021-11-30T02:25:09Z,https://api.refinitiv.com/file-store/v1/files/...,4953633,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
3,40f5-fe1c-b8109f48-a197-daa885b2b101,EsgPITScores.2014.F.2021-11-29-0758.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:40:39Z,2021-11-30T02:40:39Z,https://api.refinitiv.com/file-store/v1/files/...,23958787,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
4,412d-9b3b-0596cf5b-b583-ff6821dd30e2,EsgPITScores.2017.F.2021-11-29-0759.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:43:31Z,2021-11-30T02:43:31Z,https://api.refinitiv.com/file-store/v1/files/...,66833428,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
...,...,...,...,...,...,...,...,...,...
0,4f84-890f-4fb26481-a98e-cba1c7288ac1,EsgPITValues.2005.F.2021-11-29-0801.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:32:09Z,2021-11-30T02:32:09Z,https://api.refinitiv.com/file-store/v1/files/...,6406226,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
1,4f8d-75f4-71217742-8d27-12b2ddfce262,EsgPITAnalyticValueScore.2013.F.2021-11-29-075...,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:48:11Z,2021-11-30T02:48:11Z,https://api.refinitiv.com/file-store/v1/files/...,10908118,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
2,4fd6-cf9e-0a017ad5-b0d9-bfaa8ca084d7,EsgPITValueScores.2004.F.2021-11-29-0803.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:50:41Z,2021-11-30T02:50:41Z,https://api.refinitiv.com/file-store/v1/files/...,2348092,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
3,4ff1-987b-c84b64ba-8ccb-8ee4713bb64a,EsgPITScores.2021.F.2021-11-29-0801.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:46:36Z,2021-11-30T02:46:36Z,https://api.refinitiv.com/file-store/v1/files/...,2080868,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3


### Identify The Latest Delta Files
Select .I files

In [82]:
dfDelta = dfAll.loc[dfAll['filename'].str.contains('\.I\.')]
dfDelta

Unnamed: 0,id,filename,filesetId,created,modified,href,fileSizeInBytes,storageLocation.url,storageLocation.@type
34,4650-7a69-d5786b37-9595-e5a7374819aa,EsgPITValueScores.I.2021-11-29-0636.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:29:30Z,2021-11-30T02:29:30Z,https://api.refinitiv.com/file-store/v1/files/...,11421432,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
35,468d-836b-387c8c3a-8f8b-bbe61766009c,EsgPITValues.I.2021-11-29-0636.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:43:42Z,2021-11-30T02:43:42Z,https://api.refinitiv.com/file-store/v1/files/...,525560,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
49,4903-5060-0615b2ab-8e51-4c93a2f62f88,EsgPITAnalyticValueScore.I.2021-11-29-0636.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:28:58Z,2021-11-30T02:28:58Z,https://api.refinitiv.com/file-store/v1/files/...,21761923,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
69,4b9b-dbde-a277a2d9-b068-49404a00b0db,EsgPITScores.I.2021-11-29-0636.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:47:50Z,2021-11-30T02:47:50Z,https://api.refinitiv.com/file-store/v1/files/...,6990154,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3
78,4d1b-94d1-55d1cf6d-9ffd-b1b1a39cb66e,EsgPITAnalyticValue.I.2021-11-29-0636.zip,40df-b7c9-759513c8-997b-ff1c66266b66,2021-11-30T02:38:07Z,2021-11-30T02:38:07Z,https://api.refinitiv.com/file-store/v1/files/...,201698,https://a206464-prod-esg.s3.amazonaws.com/ESGP...,s3


###  Download File via File Id with Redirect

In [83]:
import shutil

FILES_STREAM_ENDPOINT_START = RDP_BASE_URL+'/file-store'+RDP_ESG_BULK_VERSION + '/files/'

# use valid values, obtained from the previous step
exampleFileId = '4edd-99af-da829f42-8ddd-07fabfcddca9'  
exampleFileName = 'RFT-ESG-Sources-Full-Init-2021-01-17-part07.jsonl.gz'

def requestFileDownload(token, fileId, fileName):   
    FILES_STREAM_ENDPOINT = FILES_STREAM_ENDPOINT_START + fileId+ '/stream'
    print("Obtaining File ... " + FILES_STREAM_ENDPOINT)
  
    chunk_size = 1000
    
    headers = {
            'Authorization': 'Bearer ' + token,
            'cache-control': "no-cache",
            'Accept': '*/*'
    }
        
    response = requests.request("GET", FILES_STREAM_ENDPOINT, headers=headers, stream=True, allow_redirects=True)
    
    if response.status_code != 200:
        if response.status_code == 401:   # error when token expired
                accessToken = getToken();     # token refresh on token expired
                headers['Authorization'] = "Bearer " + accessToken
                response = requests.request("GET",FILES_STREAM_ENDPOINT, headers=headers, stream=True, allow_redirects=True)

         
    print('Response code=' + str(response.status_code));
    
    if response.status_code == 200:
        print('Processing...')
        with open(fileName, 'wb') as fd:
            shutil.copyfileobj(response.raw, fd) 
        print('Look for gzipped file named: '+ fileName + ' in current directory')
        response.connection.close()
        
    return; 

# consider below an example only
#requestFileDownload(accessToken, exampleFileId, exampleFileName);
#requestFileDownload(accessToken, FILE_ID, FILE_NAME);

### Iterate over the Latest PIT Delta Files and Request Download

In [88]:
for index, row in dfDelta.iterrows():
    print (index,row["id"], row["filename"])
    requestFileDownload(accessToken, row["id"],'.\\PITfiles\\'+row["filename"]);

34 4650-7a69-d5786b37-9595-e5a7374819aa EsgPITValueScores.I.2021-11-29-0636.zip
Obtaining File ... https://api.refinitiv.com/file-store/v1/files/4650-7a69-d5786b37-9595-e5a7374819aa/stream
Reading the token from: token.txt
Response code=200
Processing...
Look for gzipped file named: .\PITfiles\EsgPITValueScores.I.2021-11-29-0636.zip in current directory
35 468d-836b-387c8c3a-8f8b-bbe61766009c EsgPITValues.I.2021-11-29-0636.zip
Obtaining File ... https://api.refinitiv.com/file-store/v1/files/468d-836b-387c8c3a-8f8b-bbe61766009c/stream
Reading the token from: token.txt
Response code=200
Processing...
Look for gzipped file named: .\PITfiles\EsgPITValues.I.2021-11-29-0636.zip in current directory
49 4903-5060-0615b2ab-8e51-4c93a2f62f88 EsgPITAnalyticValueScore.I.2021-11-29-0636.zip
Obtaining File ... https://api.refinitiv.com/file-store/v1/files/4903-5060-0615b2ab-8e51-4c93a2f62f88/stream
Reading the token from: token.txt
Response code=200
Processing...
Look for gzipped file named: .\PITfi

### Iterate over the Latest PIT Full Files and Request Download

In [89]:
for index, row in dfFull.iterrows():
    print (index,row["id"], row["filename"])
    requestFileDownload(accessToken, row["id"],'.\\PITfiles\\'+row["filename"]);

0 403e-8df7-fdbbbb41-b4c7-441a15d81ea1 EsgPITAnalyticValue.2003.F.2021-11-29-0751.zip
Obtaining File ... https://api.refinitiv.com/file-store/v1/files/403e-8df7-fdbbbb41-b4c7-441a15d81ea1/stream
Reading the token from: token.txt
Response code=200
Processing...
Look for gzipped file named: .\PITfiles\EsgPITAnalyticValue.2003.F.2021-11-29-0751.zip in current directory
1 4048-8b15-178e99b0-9843-fbd388db4360 EsgPITValueScores.2018.F.2021-11-29-0805.zip
Obtaining File ... https://api.refinitiv.com/file-store/v1/files/4048-8b15-178e99b0-9843-fbd388db4360/stream
Reading the token from: token.txt
Response code=200
Processing...
Look for gzipped file named: .\PITfiles\EsgPITValueScores.2018.F.2021-11-29-0805.zip in current directory
2 4097-e6cb-a8f7d194-996e-f3c788fb0f93 EsgPITAnalyticValue.2007.F.2021-11-29-0752.zip
Obtaining File ... https://api.refinitiv.com/file-store/v1/files/4097-e6cb-a8f7d194-996e-f3c788fb0f93/stream
Reading the token from: token.txt
Response code=200
Processing...
Look 

### Get File Location (Step 1 of 2)

In [None]:
import shutil

FILES_STREAM_ENDPOINT_START = RDP_BASE_URL+'/file-store'+RDP_ESG_BULK_VERSION + '/files/'
DIRECT_URL = ''
 
def requestFileLocation(token, fileId):   
    
    FILES_STREAM_ENDPOINT = FILES_STREAM_ENDPOINT_START + fileId+ '/stream?doNotRedirect=true'    
    print("Obtaining File ... " + FILES_STREAM_ENDPOINT)
  
    filename = FILE_NAME
    chunk_size = 1000
    
    headers = {
            'Authorization': 'Bearer ' + token,
            'cache-control': "no-cache",
            'Accept': '*/*'
    }
        
    response = requests.request("GET", FILES_STREAM_ENDPOINT, headers=headers, stream=False, allow_redirects=False)
    
    if response.status_code != 200:
        if response.status_code == 401:   # error when token expired
                accessToken = getToken();     # token refresh on token expired
                headers['Authorization'] = "Bearer " + accessToken
                response = requests.request("GET",FILES_STREAM_ENDPOINT, headers=headers, stream=False, allow_redirects=False)

         
    print('Response code=' + str(response.status_code));
    
    if response.status_code == 200:
        jsonFullResp = json.loads(response.text)
        print('Parsed json response=');
        print(json.dumps(jsonFullResp, indent=2));
        DIRECT_URL = jsonFullResp['url'];
        print('File Direct URL is: '  +str(DIRECT_URL)+ '|||');
        
        return jsonFullResp['url'];
    else:
        return 'Error response: '+ response.text


DIRECT_URL = requestFileLocation(accessToken, FILE_ID);

### Download File From File Location (Step 2 of 2)

In [None]:
from urllib.parse import urlparse, parse_qs
def requestDirectFileDownload(token, directUrl, fileName):   
    
    global DIRECT_URL
    print("Obtaining File from URL... " + directUrl)
    
    #Parse out URL parameters for submission into requests
    url_obj = urlparse(DIRECT_URL)
    parsed_params = parse_qs(url_obj.query)
    # extract the URL without query parameters
    parsed_url = url_obj._replace(query=None).geturl()

    response = requests.get(parsed_url, params=parsed_params,stream=True)
        
    if response.status_code != 200:
        if response.status_code == 401:   # error when token expired
                accessToken = getToken();     # token refresh on token expired
                headers['Authorization'] = "Bearer " + accessToken
                response = requests.get(parsed_url, params=query)

         
    print('Response code=' + str(response.status_code));        
  
    filename = 'another_'+fileName    
    
    if response.status_code == 200:
        print('Processing...')
        with open(filename, 'wb') as fd:
            shutil.copyfileobj(response.raw, fd) 

        print('Look for gzipped file named: '+ filename + ' in current directory')
        response.connection.close()
        
    return; 


requestDirectFileDownload(accessToken, DIRECT_URL, FILE_NAME);