---
## Assessment of greenhouse gas emissions in the portfolio
---

In [1]:
# load imports
import pandas as pd
import numpy as np
import math
# graphing modules
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# set dataframe options
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [2]:
# execute the helper functions defined for accessing RDP REST API calls 
%run RDPDefines.ipynb

### Load the previously downloaded dataset for climate

In [3]:
allClimate = pd.read_pickle('ClimateData.pkl').astype({
    'OrganizationId': np.int64,
    'FinancialPeriodFiscalYear': int,
    'Scope1EstimatedTotal': float,
    'Scope2EstimatedTotal': float,
    'Scope3EstimationUpstreamTotal': float,
    'Scope3EstimationDownstreamTotal': float,
    'Scope1EstimatedMethod': object,
    'Scope2EstimatedMethod': object,
    'Scope3EstimationUpstreamMethod': object,
    'Scope3EstimationDownstreamMethod': object,
    'SalesinUSD': float,
    'ExchangeRatetoUSD': float,
    'EnterpriseValueincludingCashandShortTermInvestmentsinUSD': float
})
allClimate

Unnamed: 0,Scope1EstimatedTotal,Scope1EstimatedMethod,ExchangeRatetoUSD,SalesinUSD,Scope2EstimatedMethod,Scope2EstimatedTotal,OrganizationId,FinancialPeriodFiscalYear,Scope3EstimationDownstreamMethod,Scope3EstimationUpstreamMethod,Scope3EstimationUpstreamTotal,EnterpriseValueincludingCashandShortTermInvestmentsinUSD,Scope3EstimationDownstreamTotal
0,2733.630,Aggregated_model,1.232,279759393.900,Aggregated_model,2175.873,4295859418,2018,,,,,
1,,,0.011,30434609200.000,,,4295880726,2010,,,,,
2,,,1.000,367090000.000,,,4295901070,2004,,,,,
3,460.993,Aggregated_model,1.000,48416000.000,Aggregated_model,1349.206,5043466967,2021,Aggregated_model,Aggregated_model,2663.730,1198832044.860,1727.140
4,39307.000,Reported_value,0.000,9719520000.000,Reported_value,298000.000,4295874502,2020,Aggregated_model,Aggregated_model,230977.030,29324781833.731,170081.970
...,...,...,...,...,...,...,...,...,...,...,...,...,...
114846,,,1.038,1128240000.000,,,5000757240,2012,,,,,
114847,26.120,Reported_value,0.158,302201000.000,Reported_value,1282.510,4295865252,2022,Aggregated_model,Aggregated_model,24011.800,412585820.646,18545.790
114848,,,1.000,0.000,,,4295903677,2021,,,,486731732.500,
114849,732.260,Aggregated_model,1.000,119686000.000,Aggregated_model,1848.219,4295907267,2018,Aggregated_model,Aggregated_model,358.730,984657359.440,1386.730


### Load the portfolio to be analized
Portfolio is a CSV file with the constituent *RIC* and corresponding *Weight* in percent

*Weight* must add up to 100%

|RIC|WEIGHT %|
|--|--|
|AAPL.O|2.5|
|MSFT.O|1.2|
|..|..|

In [6]:
# load the portfolio to be analized
portfolio = pd.read_csv('Portfolio.csv')
portfolio

Unnamed: 0,RIC,Weight
0,SMFT3.SA,20
1,MGLU3.SA,10
2,KBC.BR,10
3,ICT.PS,10
4,000880.KS,10
5,TRELb.ST,10
6,3549.T,10
7,028300.KQ,10
8,6841.T,5
9,H.TO,5


### Convert Symbology

Lookup *Organization PermID* and *Reporting currency* for all instruments

RDP SYmbology API: [/data/symbology/beta1/convert]()

In [7]:
allResponses = []
RIClist = portfolio['RIC'].tolist()
bucketSize = 90

buckets = [ RIClist[i: i + bucketSize] for i in range(0, len(RIClist), bucketSize) ]
for bucket in buckets:
  reqData = {
    "universe": bucket,
    "to": ["OrganizationId"]
  }

  hResp = postRequest('/data/symbology/beta1/convert', reqData)
  print(hResp)
  allResponses = allResponses + hResp['universe']

Getting access token...
...token received
{'links': {'count': 10}, 'variability': '', 'universe': [{'Instrument': 'SMFT3.SA', 'Company Common Name': 'Smartfit Escola de Ginastica e Danca SA', 'Organization PermID': '5046047670', 'Reporting Currency': 'BRL'}, {'Instrument': 'MGLU3.SA', 'Company Common Name': 'Magazine Luiza SA', 'Organization PermID': '4296141085', 'Reporting Currency': 'BRL'}, {'Instrument': 'KBC.BR', 'Company Common Name': 'Kbc Groep NV', 'Organization PermID': '8589934262', 'Reporting Currency': 'EUR'}, {'Instrument': 'ICT.PS', 'Company Common Name': 'International Container Terminal Services Inc', 'Organization PermID': '4295886348', 'Reporting Currency': 'USD'}, {'Instrument': '000880.KS', 'Company Common Name': 'Hanwha Corp', 'Organization PermID': '4295881078', 'Reporting Currency': 'KRW'}, {'Instrument': 'TRELb.ST', 'Company Common Name': 'Trelleborg AB', 'Organization PermID': '4295890333', 'Reporting Currency': 'SEK'}, {'Instrument': '3549.T', 'Company Common 

In [8]:
# merge Symbology and Currency data into the portfolio
orgsData = pd.DataFrame(allResponses)
portfolio = portfolio.merge(orgsData, how='inner', left_on='RIC', right_on='Instrument')
portfolio = portfolio.drop(['Instrument'], axis = 1)
portfolio['Organization PermID'] = portfolio['Organization PermID'].astype(np.int64)
portfolio

Unnamed: 0,RIC,Weight,Company Common Name,Organization PermID,Reporting Currency
0,SMFT3.SA,20,Smartfit Escola de Ginastica e Danca SA,5046047670,BRL
1,MGLU3.SA,10,Magazine Luiza SA,4296141085,BRL
2,KBC.BR,10,Kbc Groep NV,8589934262,EUR
3,ICT.PS,10,International Container Terminal Services Inc,4295886348,USD
4,000880.KS,10,Hanwha Corp,4295881078,KRW
5,TRELb.ST,10,Trelleborg AB,4295890333,SEK
6,3549.T,10,Kusuri No Aoki Holdings Co Ltd,5051613459,JPY
7,028300.KQ,10,HLB Inc,4295881801,KRW
8,6841.T,5,Yokogawa Electric Corp,4295880372,JPY
9,H.TO,5,Hydro One Ltd,5046709166,CAD


---
# Temp Code

In [4]:
portfolio = pd.read_pickle('PORT.pkl')
portfolio

Unnamed: 0,RIC,Weight,Company Common Name,Organization PermID,Reporting Currency
0,SMFT3.SA,0.001,Smartfit Escola de Ginastica e Danca SA,5046047670,BRL
1,MGLU3.SA,0.003,Magazine Luiza SA,4296141085,BRL
2,KBC.BR,0.028,Kbc Groep NV,8589934262,EUR
3,ICT.PS,0.006,International Container Terminal Services Inc,4295886348,USD
4,000880.KS,0.002,Hanwha Corp,4295881078,KRW
...,...,...,...,...,...
4158,WEGE3.SA,0.017,WEG SA,4295859789,BRL
4159,8954.T,0.005,Orix JREIT Inc,4295878495,JPY
4160,SO.N,0.115,Southern Co,4295903239,USD
4161,YUM.N,0.057,Yum! Brands Inc,4295905127,USD


In [5]:
# delete the offending instrument data <-- temporary measure to test the output with excel

allClimate.loc[allClimate['OrganizationId'] == 4295856665, 'Scope1EstimatedTotal'] = 0
allClimate.loc[allClimate['OrganizationId'] == 4295856665, 'Scope2EstimatedTotal'] = 0
allClimate.loc[allClimate['OrganizationId'] == 4295856665, 'Scope3EstimationUpstreamTotal'] = 0
allClimate.loc[allClimate['OrganizationId'] == 4295856665, 'Scope3EstimationDownstreamTotal'] = 0

# /Temp Code

In [38]:
# define the calculation parameters
PortfolioAmountInvested = 1000000
years = [2019, 2020, 2021, 2022]
measures = ['Scope1EstimatedTotal', 'Scope2EstimatedTotal', 'Scope3EstimationUpstreamTotal', 'Scope3EstimationDownstreamTotal']

# other parameters used in the notebook
lsegColors = ['#FF5000', '#FFC800', '#00D0D4', '#9064CD', '#00C389', '#FFFF00']

### Calculate: Data coverage % of this portfolio 
---

In [7]:
# calculate the coverage % for each scope for each year for this portfolio
cMmeasures = ['Scope1EstimatedTotal', 'Scope1EstimatedMethod', 'Scope2EstimatedTotal', 'Scope2EstimatedMethod', 'Scope3EstimationUpstreamTotal', 'Scope3EstimationUpstreamMethod', 'Scope3EstimationDownstreamTotal', 'Scope3EstimationDownstreamMethod', 'SalesinUSD', 'EnterpriseValueincludingCashandShortTermInvestmentsinUSD']

def computeCoverageHoldings(year):
    yPort = portfolio.merge(allClimate[allClimate['FinancialPeriodFiscalYear'] == year], how='left', left_on='Organization PermID', right_on='OrganizationId')
    pSize = len(yPort.index)
    return [((pSize - yPort[measure].isna().sum()) / pSize * 100) for measure in cMmeasures]


def computeCoverageWeight(year):
    yPort = portfolio.merge(allClimate[allClimate['FinancialPeriodFiscalYear'] == year], how='left', left_on='Organization PermID', right_on='OrganizationId')
    return [yPort[yPort[measure].notna() & (yPort[measure] != 0) ]['Weight'].sum() for measure in cMmeasures]


In [8]:
# get data for all the measures for all the years
hoCoData = []
wtCoData = []
for yr in years:
    print(f'Getting coverage data for {yr}')
    hoCoData.append(computeCoverageHoldings(yr))
    wtCoData.append(computeCoverageWeight(yr))


Getting coverage data for 2019
Getting coverage data for 2020
Getting coverage data for 2021
Getting coverage data for 2022


In [39]:
# plot the coverage results
plotData = []
idx = 0
for yr in years:
    plotData.append(go.Bar(x = cMmeasures, name = str(yr),  y = hoCoData[idx],  text = [int(x) for x in hoCoData[idx]], marker_color=lsegColors[idx]))
    idx = idx + 1

layout = go.Layout(barmode = 'group')
fig = go.Figure(data = plotData, layout = layout)
fig.update_layout(title_text="Data coverage (% of portfolio holdings)", bargap=0.3)
fig.show()

plotData = []
idx = 0
for yr in years:
    plotData.append(go.Bar(x = cMmeasures, name = str(yr),  y = wtCoData[idx],  text = [int(x) for x in wtCoData[idx]], marker_color=lsegColors[idx]))
    idx = idx + 1

layout = go.Layout(barmode = 'group')
fig = go.Figure(data = plotData, layout = layout)
fig.update_layout(title_text="Data coverage (% of portfolio weight)", bargap=0.3)
fig.show()

In [60]:
# plot the coverage results
plotData = []
idx = 0
for yr in years:
    plotData.append(go.Bar(y = cMmeasures, name = str(yr),  x = hoCoData[idx], marker_color=lsegColors[idx], orientation='h' ))
    idx = idx + 1

layout = go.Layout(barmode = 'group')
fig = go.Figure(data = plotData, layout = layout)
fig.update_layout(title_text="Data coverage (% of portfolio holdings)", height=1000, width=1000)
fig.show()

'''
plotData = []
idx = 0
for yr in years:
    plotData.append(go.Bar(y = cMmeasures, name = str(yr),  x = wtCoData[idx], marker_color=lsegColors[idx], orientation='h' ))
    idx = idx + 1

layout = go.Layout(barmode = 'group')
fig = go.Figure(data = plotData, layout = layout)
fig.update_layout(title_text="Data coverage (% of portfolio weight)")
fig.show()
'''

'\nplotData = []\nidx = 0\nfor yr in years:\n    plotData.append(go.Bar(y = cMmeasures, name = str(yr),  x = wtCoData[idx], marker_color=lsegColors[idx], orientation=\'h\' ))\n    idx = idx + 1\n\nlayout = go.Layout(barmode = \'group\')\nfig = go.Figure(data = plotData, layout = layout)\nfig.update_layout(title_text="Data coverage (% of portfolio weight)")\nfig.show()\n'

### Calculate: Weighted Average Carbon Intensity
---

**The Cauchy-Schwarz Inequality**
$$\left( \sum_{k=1}^n a_k b_k \right)^2 \leq \left( \sum_{k=1}^n a_k^2 \right) \left( \sum_{k=1}^n b_k^2 \right)$$

In [18]:
# define the formula for weighted carbon intesity
#   Sum(weight * estimate/revenue)/Sum(weight) if estimate is not 0
def getWACI(tPort, measure):
    sDF = tPort[tPort[measure].notna() & tPort['SalesinUSD'].notna() & (tPort['SalesinUSD'] != 0)]
    neu = (sDF['Weight'] * sDF[measure] / (sDF['SalesinUSD'] / 1000000)).sum()
    deno = sDF['Weight'].sum()
    return neu/deno


def getCarbonIntensityForYear(year):
    # merge data for a particular year into portfolio
    yPort = portfolio.merge(allClimate[allClimate['FinancialPeriodFiscalYear'] == year], how='left', left_on='Organization PermID', right_on='OrganizationId')
    return [getWACI(yPort, m) for m in measures]

In [19]:
# get data for all the measures for all the years
wciData = []
for yr in years:
    print(f'Getting data for {yr}')
    wciData.append(getCarbonIntensityForYear(yr))

Getting data for 2019
Getting data for 2020
Getting data for 2021
Getting data for 2022


In [40]:
# chart it in a bar graph
plotData = []
idx = 0
for yr in years:
    plotData.append(go.Bar(x = measures, name = str(yr),  y = wciData[idx],  text = [int(x) for x in wciData[idx]], marker_color=lsegColors[idx]))
    idx = idx + 1

layout = go.Layout(barmode = 'group')
fig = go.Figure(data = plotData, layout = layout)
fig.update_layout(title_text="Portfolio's Weighted Average Carbon Intensity (Weight by Scope)", yaxis=dict(title='Ton CO₂/MUSD sales'))
fig.show()

### Calculate: Financed Emissions
---

In [22]:
# define the formula for financed emissions
def getFE(tPort, measure):
    sDF = tPort[tPort[measure].notna() & tPort['EnterpriseValueincludingCashandShortTermInvestmentsinUSD'].notna() & (tPort['EnterpriseValueincludingCashandShortTermInvestmentsinUSD'] != 0)]
    neu = (sDF[measure] * PortfolioAmountInvested * sDF['Weight'] / sDF['EnterpriseValueincludingCashandShortTermInvestmentsinUSD'] ).sum()
    deno = sDF['Weight'].sum()
    return neu/deno


def getFinancedEmissionsForYear(year):
    # merge data for a particular year into portfolio
    yPort = portfolio.merge(allClimate[allClimate['FinancialPeriodFiscalYear'] == year], how='left', left_on='Organization PermID', right_on='OrganizationId')
    return [getFE(yPort, m) for m in measures]


In [23]:
# get data for all the measures for all the years
feData = []
for yr in years:
    print(f'Getting data for {yr}')
    feData.append(getFinancedEmissionsForYear(yr))


Getting data for 2019
Getting data for 2020
Getting data for 2021
Getting data for 2022


In [41]:
# chart it in a bar graph
plotData = []
idx = 0
for yr in years:
    plotData.append(go.Bar(x = measures, name = str(yr),  y = feData[idx],  text = [int(x) for x in feData[idx]], marker_color=lsegColors[idx]))
    idx = idx + 1

layout = go.Layout(barmode = 'group')
fig = go.Figure(data = plotData, layout = layout)
fig.update_layout(title_text=f"Financed Emissions for a portfolio valued at {PortfolioAmountInvested:,}", yaxis=dict(title='Ton CO₂/MUSD sales'))
fig.show()

### Calculate: PCAF Data Quality Score
---

In [28]:
# assign a score to each type of estimation method
scoreMapping = {
    'Scope1EstimatedMethod': {
        'Reported': 2,
        'Energy_model': 3,
        'Energy_extrapolated': 4,
        'Winsorized': 4,
        'Extrapolated': 4,
        'Aggregated Estimate': 5,
        'Aggregated_model': 5,
        'Reported_value': 2,
    },

    'Scope2EstimatedMethod': {
        'Reported': 2,
        'Winsorized': 4,
        'Extrapolated': 4,
        'Aggregated Estimate': 5,
        'Aggregated_model': 5,
        'Reported_value': 2
    },

    'Scope3EstimationDownstreamMethod':	{
        'Reported': 2,
        'Fossil_fuel_production_model': 3,
        'Winsorized': 4,
        'Extrapolated': 4,
        'Aggregated_model': 5,
        'Reported_value': 2,
        'Aggregated model': 5,
        'Fossil fuel production model': 3
    },

    'Scope3EstimationUpstreamMethod': {
        'Reported': 2,
        'Winsorized': 4,
        'Extrapolated': 4,
        'Aggregated_model': 5,
        'Reported_value': 2,
        'Aggregated model': 5
    }
}

In [29]:
# define parameters for the PCAF calculation - year and the fields to be used
pcafYear = 2021
numberScoreGrades = 6
pcafMeasures = {
    'Scope1EstimatedMethod': 'Scope1EstimatedTotal',
	'Scope2EstimatedMethod': 'Scope2EstimatedTotal',
	'Scope3EstimationUpstreamMethod': 'Scope3EstimationUpstreamTotal',
	'Scope3EstimationDownstreamMethod': 'Scope3EstimationDownstreamTotal'
}

In [30]:
# define the formula for PCAF Score grade
def getPCAFByWeight(tPort, scope):
    scoresByWeight = [0] * numberScoreGrades
    # aggregate all the weights for a particular score
    sDF = tPort[tPort['EnterpriseValueincludingCashandShortTermInvestmentsinUSD'].notna() & (tPort['EnterpriseValueincludingCashandShortTermInvestmentsinUSD'] != 0)]
    for grade in range(numberScoreGrades):
        scoresByWeight[grade] = sDF[sDF[scope] == grade]['Weight'].sum() 

    # also add the weights which don't have enterprise value into the n/a bucket
    sDF = tPort[tPort['EnterpriseValueincludingCashandShortTermInvestmentsinUSD'].isna() | (tPort['EnterpriseValueincludingCashandShortTermInvestmentsinUSD'] == 0)]
    scoresByWeight[0] += sDF['Weight'].sum() 

    return scoresByWeight

In [31]:
# define the formula for PCAF Score grade
def getPCAFByFE(tPort, scope):
    # calculate the financed emissions for a scope
    tPort[f'fe_{scope}'] = tPort[pcafMeasures[scope]] * PortfolioAmountInvested * tPort['Weight'] / tPort['EnterpriseValueincludingCashandShortTermInvestmentsinUSD']
    # find coverage % for this scope
    coverage = tPort[tPort[pcafMeasures[scope]].notna() & tPort['EnterpriseValueincludingCashandShortTermInvestmentsinUSD'].notna()]['Weight'].sum()
    # find the ratio of FE to coverage
    R4170 = tPort[f'fe_{scope}'].sum()/coverage
    # calculate rebased financed emissions and insert into FE misssing values
    tPort['rebasedFE'] = tPort['Weight'] * PortfolioAmountInvested * R4170 / 1000000
    tPort[f'fe_{scope}'] = tPort[f'fe_{scope}'].fillna(tPort['rebasedFE'])
    allFE = tPort[f'fe_{scope}'].sum()
    # generate score grades by FE
    scoresByFE = [ (tPort[tPort[scope] == grade][f'fe_{scope}'].sum()/allFE) * 100 for grade in range(numberScoreGrades)]
    return scoresByFE

In [32]:
# merge data for a particular year into portfolio
pcafPort = portfolio.merge(allClimate[allClimate['FinancialPeriodFiscalYear'] == pcafYear], how='left', left_on='Organization PermID', right_on='OrganizationId')
# replace the method values with score grade numbers 
pcafPort = pcafPort.replace(scoreMapping)
# fill up the unavailable methods with 0
pcafPort.fillna({k:0 for k in pcafMeasures.keys()}, inplace=True)

pcafWeights = [getPCAFByWeight(pcafPort, measure) for measure in pcafMeasures.keys()]
pcafFE = [getPCAFByFE(pcafPort, measure) for measure in pcafMeasures.keys()]

In [46]:
scoreLabels = ['No Data', '1', '2', '3', '4', '5']
measureNames = pcafMeasures.keys()

# Create subplots for index weight pie charts
fig = make_subplots(rows=1, cols=len(measureNames), specs=[[{'type':'domain'}] * len(measureNames)] )
idx = 0
for m in measureNames:
    fig.add_trace(go.Pie(labels=scoreLabels, values=pcafWeights[idx], name=m), 1, 1 + idx)
    idx = idx + 1

fig.update_traces(hole=.5, hoverinfo="label+percent+name", marker=dict(colors=lsegColors))
fig.update_layout(title_text = f"PCAF quality score for {pcafYear} (Index Weight)")
fig.show()

# Create subplots for Financed Emissions pie charts
fig = make_subplots(rows=1, cols=len(measureNames), specs=[[{'type':'domain'}] * len(measureNames)] )
idx = 0
for m in measureNames:
    fig.add_trace(go.Pie(labels=scoreLabels, values=pcafFE[idx], name=m), 1, 1 + idx)
    idx = idx + 1

fig.update_traces(hole=.5, hoverinfo="label+percent+name", marker=dict(colors=lsegColors))
fig.update_layout(title_text = f"PCAF quality score for {pcafYear} (Financed Emissions)")
fig.show()

In [43]:
scoreLabels = ['1', '2', '3', '4', '5', 'No Data']
measureNames = list(pcafMeasures.keys())

# show first bar chart
tWeights = list(map(list, zip(*pcafWeights)))
# move no data entry to the end
tWeights = tWeights[1:] + [tWeights[0]]
plotData = []
idx = 0
for grade in scoreLabels:
    plotData.append(go.Bar(x = measureNames, name = grade,  y = tWeights[idx], marker_color=lsegColors[idx] ))
    idx = idx + 1

fig = go.Figure(plotData)
# Change the bar mode
fig.update_layout(barmode='stack', bargap=0.8, title_text=f"PCAF quality score for {pcafYear} (Index Weight)", yaxis=dict(title='%'))
fig.show()

# add second bar chart
tWeights = list(map(list, zip(*pcafFE)))
# move no data entry to the end
tWeights = tWeights[1:] + [tWeights[0]]
plotData = []
idx = 0
for grade in scoreLabels:
    plotData.append(go.Bar(x = measureNames, name = grade,  y = tWeights[idx], marker_color=lsegColors[idx] ))
    idx = idx + 1

fig = go.Figure(plotData)
# Change the bar mode
fig.update_layout(barmode='stack', bargap=0.8, title_text=f"PCAF quality score for {pcafYear} (Financed Emissions)", yaxis=dict(title='%'))
fig.show()

### One time step: Download and save the Bulk files
---

In [17]:
# For clients with subscription to full climate data
# download the climate standard file:
bFiles = downloadJSONBulkFile('bulk-Climate', 'ContentType:Climate', ['Measures-Full-v1-DataItems', 'Json', 'Init'])

# For clients with subscription to ESG data Climate add-on package
# Download the climate addon file 
#   Bucket: bulk-Climate
#   ContentType: ClimateAddOn
#   FileSetName: Bulk-Climate-Global-Measures-AddOn-v1-DataItems-Jsonl-Init

# Also download the ESG Measures file to get Company Revenue and Enterprise Value
# Finally combine and save the information in a unified DataFrame

Getting access token...
...token received
{'value': [{'id': '4005-f8cb-27593d0d-82a1-65e06fa6c8e0', 'name': 'Bulk-Climate-Global-Measures-WealthPro-Limited-v1-DataItems-Csv-Delta-2023-11-12T17:05:01.948Z', 'bucketName': 'bulk-climate', 'packageId': '442f-6fa3-592ad4f5-95a1-108bde772301', 'attributes': [{'name': 'ContentType', 'value': 'Climate'}, {'name': 'ResultCount', 'value': '106132'}], 'files': ['4bd8-80cd-94e56d06-b04d-810303c44a9b'], 'numFiles': 1, 'contentFrom': '2023-11-05T16:55:00Z', 'contentTo': '2023-11-12T16:55:00Z', 'availableFrom': '2023-11-12T17:17:20Z', 'availableTo': '2023-11-26T17:17:19Z', 'status': 'READY', 'created': '2023-11-12T17:17:20Z', 'modified': '2023-11-12T17:17:22Z'}, {'id': '4042-2d7b-3e0f894e-bdbf-d55d624648c1', 'name': 'Bulk-Climate-Global-Measures-WealthPro-CDP-Limited-v1-Csv-Init-2023-11-19T17:04:41.632Z', 'bucketName': 'bulk-climate', 'packageId': '436d-2891-52a25ccf-8d36-bc39f94b66ce', 'attributes': [{'name': 'ResultCount', 'value': '4368'}, {'name'

In [19]:
# lookup the field to datatype mapping from the field glossary document
fields = {
    'OrganizationId': 'DatapointValue',
    'FinancialPeriodFiscalYear': 'DatapointValue',
    'Scope1EstimatedTotal': 'DatapointValue',
    'Scope2EstimatedTotal': 'DatapointValue',
    'Scope3EstimationUpstreamTotal': 'DatapointValue',
    'Scope3EstimationDownstreamTotal': 'DatapointValue',
    'Scope1EstimatedMethod': 'DatapointValueText',
    'Scope2EstimatedMethod': 'DatapointValueText',
    'Scope3EstimationUpstreamMethod': 'DatapointValueText',
    'Scope3EstimationDownstreamMethod': 'DatapointValueText',
    'SalesinUSD': 'DatapointValue',
    'ExchangeRatetoUSD': 'DatapointValue',
    'EnterpriseValueincludingCashandShortTermInvestmentsinUSD': 'DatapointValue'
}

In [20]:
# parse out the entries in the file
climateData = []
matchFields = list(fields.keys())

for cMeasuresFile in bFiles:
    for l in cMeasuresFile.splitlines():
        jObj = json.loads(l)
        dt = {}
        for measure in jObj['ESGMeasureValue']['EsgMeasureValues']:
            if measure['EsgDataMeasure'] in matchFields:
                dt[measure['EsgDataMeasure']] = measure['EsgDatapointValue'][fields[measure['EsgDataMeasure']]]
            
        dt['OrganizationId'] = jObj['ESGStatementDetails']['OrganizationId']
        dt['FinancialPeriodFiscalYear'] = jObj['ESGStatementDetails']['FinancialPeriodFiscalYear']

        climateData.append(dt)

print(f'Loaded climate data for {len(climateData)} organizations')        

Loaded climate data for 114851 organizations


In [21]:
# load the dataset into a pandas dataframe
df1 = pd.DataFrame(climateData)
# save into a pickle
df1.to_pickle('ClimateData.pkl')