In [1]:
import pandas as pd
import json

### 1. Reading the ESG documents

In [3]:
df = pd.read_csv('esg_documents_for_dax_companies.csv', delimiter='|', index_col=0)

In [4]:
df.columns

Index(['company', 'content', 'datatype', 'date', 'domain', 'esg_topics',
       'internal', 'symbol', 'title', 'url'],
      dtype='object')

In [5]:
df.shape

(11188, 10)

### 2. Reading the SDG descriptions

Source: https://sdgs.un.org/goals

#### With targets (plain text) and targets_json_array

In [6]:
sdg_all = pd.read_csv('sdg_descriptions_with_targetsText.csv', index_col=False)

In [7]:
sdg_all

Unnamed: 0,id,name,description,targets,targets_json_array,progress
0,1,No Poverty,End poverty in all its forms everywhere,"['1.1', 'By 2030, eradicate extreme poverty fo...","[{""target"":""1.1"",""description"":""By 2030, eradi...",['The impact of the COVID-19 pandemic reversed...
1,2,Zero Hunger,"End hunger, achieve food security and improved...","['2.1', 'By 2030, end hunger and ensure access...","[{""target"":""2.1"",""description"":""By 2030, end h...","['Between 2014 and the onset of the pandemic, ..."
2,3,Good Health and Well-being,Ensure healthy lives and promote well-being fo...,"['3.1', 'By 2030, reduce the global maternal m...","[{""target"":""3.1"",""description"":""By 2030, reduc...","['By April 2022, the coronavirus causing COVID..."
3,4,Quality Education,Ensure inclusive and equitable quality educati...,"['4.1', 'By 2030, ensure that all girls and bo...","[{""target"":""4.1"",""description"":""By 2030, ensur...",['The COVID-19 outbreak has caused a global ed...
4,5,Gender Equality,Achieve gender equality and empower all women ...,"['5.1', 'End all forms of discrimination again...","[{""target"":""5.1"",""description"":""End all forms ...","[""The world is not on track to achieve gender ..."
5,6,Clean Water and Sanitation,Ensure availability and sustainable management...,"['6.1', 'By 2030, achieve universal and equita...","[{""target"":""6.1"",""description"":""By 2030, achie...","['Access to safe water, sanitation and hygiene..."
6,7,Affordable and Clean Energy,"Ensure access to affordable, reliable, sustain...","['7.1', 'By 2030, ensure universal access to a...","[{""target"":""7.1"",""description"":""By 2030, ensur...","['Despite progress, there are still over 700 m..."
7,8,Decent Work and Economic Growth,"Promote sustained, inclusive and sustainable e...","['8.1', 'Sustain per capita economic growth in...","[{""target"":""8.1"",""description"":""Sustain per ca...","['In 2020, the COVID-19 pandemic unleashed the..."
8,9,"Industry, Innovation and Infrastructure","Build resilient infrastructure, promote inclus...","['9.1', 'Develop quality, reliable, sustainabl...","[{""target"":""9.1"",""description"":""Develop qualit...",['The manufacturing industry exhibited a recov...
9,10,Reduced Inequalities,Reduce inequality within and among countries,"['10.1', 'By 2030, progressively achieve and s...","[{""target"":""10.1"",""description"":""By 2030, prog...",['The COVID-19 crisis has exacerbated global i...


#### With only the targets_json_array

In [8]:
sdg_json = pd.read_csv('sdg_descriptions_without_targetsText.csv', index_col=False)

In [9]:
sdg_json

Unnamed: 0,id,name,description,targets_json_array,progress
0,1,No Poverty,End poverty in all its forms everywhere,"[{""target"":""1.1"",""description"":""By 2030, eradi...",['The impact of the COVID-19 pandemic reversed...
1,2,Zero Hunger,"End hunger, achieve food security and improved...","[{""target"":""2.1"",""description"":""By 2030, end h...","['Between 2014 and the onset of the pandemic, ..."
2,3,Good Health and Well-being,Ensure healthy lives and promote well-being fo...,"[{""target"":""3.1"",""description"":""By 2030, reduc...","['By April 2022, the coronavirus causing COVID..."
3,4,Quality Education,Ensure inclusive and equitable quality educati...,"[{""target"":""4.1"",""description"":""By 2030, ensur...",['The COVID-19 outbreak has caused a global ed...
4,5,Gender Equality,Achieve gender equality and empower all women ...,"[{""target"":""5.1"",""description"":""End all forms ...","[""The world is not on track to achieve gender ..."
5,6,Clean Water and Sanitation,Ensure availability and sustainable management...,"[{""target"":""6.1"",""description"":""By 2030, achie...","['Access to safe water, sanitation and hygiene..."
6,7,Affordable and Clean Energy,"Ensure access to affordable, reliable, sustain...","[{""target"":""7.1"",""description"":""By 2030, ensur...","['Despite progress, there are still over 700 m..."
7,8,Decent Work and Economic Growth,"Promote sustained, inclusive and sustainable e...","[{""target"":""8.1"",""description"":""Sustain per ca...","['In 2020, the COVID-19 pandemic unleashed the..."
8,9,"Industry, Innovation and Infrastructure","Build resilient infrastructure, promote inclus...","[{""target"":""9.1"",""description"":""Develop qualit...",['The manufacturing industry exhibited a recov...
9,10,Reduced Inequalities,Reduce inequality within and among countries,"[{""target"":""10.1"",""description"":""By 2030, prog...",['The COVID-19 crisis has exacerbated global i...


##### How to access the json field  `targets_json_array`

In [10]:
# To access the first element in targets_json_array
list(sdg_json['targets_json_array'])[0]

'[{"target":"1.1","description":"By 2030, eradicate extreme poverty for all people everywhere, currently measured as people living on less than $1.25 a day","indicators":[{"target":"1.1.1","description":"Proportion of the population living below the international poverty line by sex, age, employment status and geographical location (urban/rural)"}]},{"target":"1.2","description":"By 2030, reduce at least by half the proportion of men, women and children of all ages living in poverty in all its dimensions according to national definitions","indicators":[{"target":"1.2.1","description":"Proportion of population living below the national poverty line, by sex and age"},{"target":"1.2.2","description":"Proportion of men, women and children of all ages living in poverty in all its dimensions according to national definitions"}]},{"target":"1.3","description":"Implement nationally appropriate social protection systems and measures for all, including floors, and by 2030 achieve substantial cov

In [11]:
# Read the json data
json_data = json.loads(list(sdg_json['targets_json_array'])[0])

In [12]:
# To access the target number as well as the description of indicators of the last target (1.b.1 in this case)
json_data[-1]['indicators'][0]['target'], json_data[-1]['indicators'][0]['description']

('1.b.1', 'Pro-poor public social spending')