In [None]:
# List of modules that are not installed in the course
!pip install OpenPermID
!pip install Levenshtein
!pip install geocoder

In [1]:
import pandas as pd
import numpy as np
import re

# Downloading all bonds ever owned in CSPP

In [None]:
import requests, datetime

In [None]:
# This function gets the csv from the url and places the new data in a dictionary with keys = ISIN,
# and value = [NCB, ISSUER, MATURITY DATE, COUPON RATE]
def downloadDataToDictionary(url,dictionary):
    r = requests.get(url) # create HTTP response object
    nameCompany = '' # make a string for the company name 
                     # (do this here so that is in scope of whole function)
    if r.status_code != 200: return # if website wasn't accessed in the right way, 
                                    # stop the function
    # this for loop loops through all the lines of the retrieved csv-file, except for the heading
    for line in r.text.split('\r\n')[1:]:
        if not re.search(r'[a-z]',line): continue # if the line doesn't contain letters, 
                                                  # go to the next line
        if re.search(r',+$',line): line = re.sub(r',+$',r'',line) # remove commas at end of line
        splitLine = line.split(',')
        if len(splitLine) < 5: continue # We expect at least 5 items as we want 5 columns 
                                        # and name could lead to additional columns
        if re.search(r'(?:\".*,.*\")',line): # searches commas between " as these are part of the name 
                                             # and shouldn't be split
            nameCompany = re.search(r'(?:\".*,.*\")',line).group(0) 
                                             # name of the company is between the ""
            nameCompany = re.sub(r"\"","",nameCompany) # remove the ""
        else:
            for str in splitLine:
                re.sub('\"','',str)
            nameCompany = splitLine[2]
        if (splitLine[1] not in dictionary): # only add new ISINs to the dictionary
            dictionary[splitLine[1]] = [splitLine[0], nameCompany, splitLine[-2], splitLine[-1]]

In [None]:
dateToDownload = datetime.date(2017, 6, 23)
change_url_date = datetime.date(2020, 3, 27)
end_date = datetime.date(2021,4,23)
delta = datetime.timedelta(days=7)
dictionaryBondsECB = {}

while dateToDownload <= change_url_date:
    date = dateToDownload.strftime("%Y%m%d")
    url = "https://www.ecb.europa.eu/mopo/pdf/CSPPholdings_"+date+".csv"
    downloadDataToDictionary(url,dictionaryBondsECB)
    dateToDownload += delta
dateToDownload+delta
while dateToDownload <= end_date:
    date = dateToDownload.strftime("%Y%m%d")
    url = "https://www.ecb.europa.eu/mopo/pdf/CSPP_PEPP_corporate_bond_holdings_"+date+".csv"
    downloadDataToDictionary(url,dictionaryBondsECB)
    dateToDownload += delta

In [None]:
matrixData = [] # 2D array with row per ISIN and columns for different data
for ISIN, dataInDictionary in dictionaryBondsECB.items():
    item = [ISIN] + dataInDictionary
    matrixData.append(item)
holdingsECB = pd.DataFrame(matrixData, columns=["ISIN","NCB","ISSUER","MATURITY DATE","COUPON RATE"])

In [None]:
# Export data to csv for easy retrieval on computers with Eikon
holdingsECB.to_csv('data/holdingsECB.csv',index=False,sep=";")

# ECB green bonds that are listed on Euronext stock exchange 

In [None]:
#reading data of green bonds listed on Euronext 
euronext_greenbonds = pd.read_excel("data/Euronext-Green-Bond-List.xlsx", header=0)
euronext_greenbonds

In [None]:
#Getting the ISIN of the Euronext green bonds 
euronext_greenbond_isin = euronext_greenbonds["ISIN"]
euronext_greenbond_isin

In [None]:
#Comparing the ISINs of the ECB and Euronext green bonds and returning matches 
ecbgreenbonds = holdingsECB[(holdingsECB["ISIN"].isin(euronext_greenbond_isin))]
ecbgreenbonds

# Find Sectors and Locations of Companies

TODO: Make this function work for all the ECB bonds instead of just the ones from April 2nd.

In [2]:
# Requirements
from OpenPermID import OpenPermID
import Levenshtein
import geocoder

In [3]:
# Gain access to the permid database
opid = OpenPermID()
opid.set_access_token("r95vEAhvmucG8iNGtsP17hjbgUGMhz4j")

In [6]:
holdingsECB = pd.read_csv("data/holdingsECB.csv", header=0, delimiter=';')
companies = holdingsECB.ISSUER.astype('string').unique()
permid_mappings = pd.DataFrame({})
unmapped_companies = []

for company in companies:
    
    # In case of connection error, allow it to try at most 5 times
    err, count = 0, 0
    while (err != None and count < 5):
        output, err = opid.search(company)
        count = count - 1
    if err != None:
        unmapped_companies.append(company)
        continue
    
    
    df = output['organizations']
    if len(df) == 0:
        permid = np.NaN
        name = np.NaN
    elif len(df) == 1:
        permid = df.iloc[0,0].split('/')[-1]
        name = df.iloc[0,1]
    else:
        # If multiple records are return, choose the record which the company name is the most similar to the keyword
        similarityScores = df.organizationName.apply(lambda x: Levenshtein.ratio(company, x))
        max_index = similarityScores.idxmax()
        permid = df.iloc[max_index, 0].split('/')[-1]
        name = df.iloc[max_index, 1]
    permid_mappings = permid_mappings.append(pd.DataFrame({'keyword':[company], 'companyName': [name], 'PermID':[permid]}))
    
permid_mappings

Unnamed: 0,keyword,companyName,PermID
0,2i Rete Gas S.p.A.,2I Rete Gas SpA,1-5000936840
0,A2A S.p.A.,A2A SpA,1-5000005309
0,ABB Finance B.V.,ABB Finance BV,1-5000066931
0,Abertis Infraestructuras SA,Abertis Infraestructuras SA,1-4295889666
0,ACEA S.p.A.,Acea SpA,1-4295875677
...,...,...,...
0,Vonovia SE,Vonovia SE,1-5063761614
0,Vienna Insurance Group AG,Donau Versicherung Vienna Insurance Group AG,1-5000004244
0,Vantage Towers AG,Vantage Towers AG,1-5076167430
0,LSEG Netherlands BV,LSEG Netherlands BV,1-5079710330


In [7]:
permids = permid_mappings.PermID.dropna().astype('string')
sector_lookups = pd.DataFrame({})
unsuccessful_lookups = []

for permid in permids:
    
    # In case of connection error, allow it to try at most 5 times
    err, count = 0, 0
    while (err != None and count < 5):
        output, err = opid.lookup(permid)
        count = count - 1
    if err != None:
        unsuccessful_lookups.append(permid)
        continue

    if "hasPrimaryBusinessSector" in output.columns:
        sector_info = output.loc[:, 'hasPrimaryBusinessSector': 'hasPrimaryIndustryGroup']
        sector_info = sector_info.applymap(lambda x: x.split('/')[-1])
    if "isIncorporatedIn" in output.columns:
        loc_info = output.loc[:, 'isIncorporatedIn': 'isDomiciledIn']
        loc_info = loc_info.applymap(lambda x: x.split('/')[-2])
        
    row = pd.DataFrame({'PermID': [permid]})
    row = pd.concat([row, sector_info], axis = 1) if type(sector_info) == pd.DataFrame else row
    row = pd.concat([row, loc_info], axis = 1) if type(loc_info) == pd.DataFrame else row
    sector_lookups = sector_lookups.append(row)
    
    sector_info, loc_info = None, None

sector_lookups

Unnamed: 0,PermID,hasPrimaryBusinessSector,hasPrimaryEconomicSector,hasPrimaryIndustryGroup,isIncorporatedIn,isDomiciledIn
0,1-5000936840,1-4294952820,1-4294952821,1-4294952817,3175395,3175395
0,1-5000005309,1-4294952820,1-4294952821,1-4294952819,3175395,3175395
0,1-5000066931,1-4294952766,1-4294952767,1-4294952765,2750405,2750405
0,1-4295889666,1-4294952945,1-4294952767,1-4294952750,2510769,2510769
0,1-4295875677,1-4294952820,1-4294952821,1-4294952813,3175395,3175395
...,...,...,...,...,...,...
0,1-5063761614,1-4294952860,1-1004365438,1-4294952858,2921044,2921044
0,1-5000004244,1-4294952866,1-4294952740,1-4294952865,2782113,2782113
0,1-5076167430,1-4294952762,1-4294952767,1-4294952761,2921044,2921044
0,1-5079710330,,,,953987,2750405


In [8]:
sector_lookups_converted = sector_lookups.copy()
sector_types = sector_lookups.columns[1:4]
for sector_type in sector_types:
    sector_dict = {}
    sectors = sector_lookups.loc[:, sector_type].dropna().astype('string').unique()
    for sector in sectors:
        output, err = opid.lookup(sector)
        sector_dict[sector] = output.iloc[0, -1]
    sector_lookups_converted[sector_type] = sector_lookups[sector_type].fillna('missing').astype('string').apply(lambda x: np.NaN if x == 'missing' else sector_dict[x])

loc_types = sector_lookups.columns[4:]
for loc_type in loc_types:
    loc_dict = {}
    locs = sector_lookups.loc[:, loc_type].dropna().astype('string').unique()
    for loc in locs:
        g = geocoder.geonames(loc, method='details', key='brian1998716')
        loc_dict[loc] = g.address
    sector_lookups_converted[loc_type] = sector_lookups[loc_type].fillna('missing').astype('string').apply(lambda x: np.NaN if x == 'missing' else loc_dict[x])

sector_lookups_converted

Unnamed: 0,PermID,hasPrimaryBusinessSector,hasPrimaryEconomicSector,hasPrimaryIndustryGroup,isIncorporatedIn,isDomiciledIn
0,1-5000936840,Utilities,Utilities,Natural Gas Utilities,Italy,Italy
0,1-5000005309,Utilities,Utilities,Electric Utilities & IPPs,Italy,Italy
0,1-5000066931,Industrial Goods,Industrials,"Machinery, Tools, Heavy Vehicles, Trains & Ships",Netherlands,Netherlands
0,1-4295889666,Transportation,Industrials,Transport Infrastructure,Spain,Spain
0,1-4295875677,Utilities,Utilities,Multiline Utilities,Italy,Italy
...,...,...,...,...,...,...
0,1-5063761614,Real Estate,Real Estate,Real Estate Operations,Germany,Germany
0,1-5000004244,Insurance,Financials,Insurance,Austria,Austria
0,1-5076167430,Industrial & Commercial Services,Industrials,Construction & Engineering,Germany,Germany
0,1-5079710330,,,,South Africa,Netherlands


In [9]:
sector_mappings = pd.merge(permid_mappings, sector_lookups_converted, how = 'left', on = 'PermID')
sector_mappings

Unnamed: 0,keyword,companyName,PermID,hasPrimaryBusinessSector,hasPrimaryEconomicSector,hasPrimaryIndustryGroup,isIncorporatedIn,isDomiciledIn
0,2i Rete Gas S.p.A.,2I Rete Gas SpA,1-5000936840,Utilities,Utilities,Natural Gas Utilities,Italy,Italy
1,A2A S.p.A.,A2A SpA,1-5000005309,Utilities,Utilities,Electric Utilities & IPPs,Italy,Italy
2,ABB Finance B.V.,ABB Finance BV,1-5000066931,Industrial Goods,Industrials,"Machinery, Tools, Heavy Vehicles, Trains & Ships",Netherlands,Netherlands
3,Abertis Infraestructuras SA,Abertis Infraestructuras SA,1-4295889666,Transportation,Industrials,Transport Infrastructure,Spain,Spain
4,ACEA S.p.A.,Acea SpA,1-4295875677,Utilities,Utilities,Multiline Utilities,Italy,Italy
...,...,...,...,...,...,...,...,...
379,Vonovia SE,Vonovia SE,1-5063761614,Real Estate,Real Estate,Real Estate Operations,Germany,Germany
380,Vienna Insurance Group AG,Donau Versicherung Vienna Insurance Group AG,1-5000004244,Insurance,Financials,Insurance,Austria,Austria
381,Vantage Towers AG,Vantage Towers AG,1-5076167430,Industrial & Commercial Services,Industrials,Construction & Engineering,Germany,Germany
382,LSEG Netherlands BV,LSEG Netherlands BV,1-5079710330,,,,South Africa,Netherlands


In [10]:
sector_mappings.count()

keyword                     384
companyName                 339
PermID                      339
hasPrimaryBusinessSector    313
hasPrimaryEconomicSector    313
hasPrimaryIndustryGroup     313
isIncorporatedIn            330
isDomiciledIn               330
dtype: int64

# Read all the Eikon data

In [11]:
eikon_data_folder = "data/"
eikon_data_environment = pd.read_csv(eikon_data_folder+"holdingsECBEnvironment.txt",sep="\t")
# TODO: remove right, empty columns from data frame
eikon_data_general = pd.read_csv(eikon_data_folder+"holdingsECBGeneralInfo.txt",sep="\t")
eikon_data_industry = pd.read_csv(eikon_data_folder+"holdingsECBIndustryAndSector.txt",sep="\t")

In [12]:
eikon_data_merged = eikon_data_general.merge(eikon_data_environment, "left", "ISIN") #append environment
eikon_data_complete = eikon_data_merged.merge(eikon_data_industry, "left", "ISIN") # appended industry
eikon_data_complete.rename(columns={'CO2.1': 'CO2_1'}, inplace=True) #changed column name to prevent syntax errors 

# Compare Eikon and PermID databases

In [83]:
NaN_message = "Unable to collect data for the field(.*)"
sector_data = eikon_data_complete[['ISIN', 'NCB', 'ISSUER','ICB Industry name','ICB Sector name','ICB Supersector name']]
sector_data = sector_data.replace(to_replace = NaN_message, value = np.NaN, regex = True)

sector_data_company = pd.DataFrame({})
for i in sector_data.ISSUER.unique():
    df = sector_data[sector_data.ISSUER == i].iloc[[0]].reset_index()
    index = df.notnull().sum(axis = 1).idxmax()
    sector_data_company = sector_data_company.append(df.iloc[[index]])
sector_data_company

Unnamed: 0,index,ISIN,NCB,ISSUER,ICB Industry name,ICB Sector name,ICB Supersector name
0,0,XS1088274169,IT,2i Rete Gas S.p.A.,,,
0,4,XS0859920406,IT,A2A S.p.A.,Utilities,Electricity,Utilities
0,9,XS0763122578,BE,ABB Finance B.V.,,,
0,12,ES0211845203,ES,Abertis Infraestructuras SA,,,
0,18,XS0495012428,IT,ACEA S.p.A.,Utilities,Electricity,Utilities
...,...,...,...,...,...,...,...
0,1794,DE000A3E5FR9,DE,Vonovia SE,Real Estate,Real Estate Investment and Services Development,Real Estate
0,1795,AT0000A2QL75,FI,Vienna Insurance Group AG,Financials,Non-life Insurance,Insurance
0,1800,DE000A3H3J14,DE,Vantage Towers AG,,,
0,1807,XS2327298217,BE,LSEG Netherlands BV,,,


In [86]:
sector_data_company.count()

index                   370
ISIN                    370
NCB                     370
ISSUER                  370
ICB Industry name       158
ICB Sector name         158
ICB Supersector name    158
dtype: int64

In [89]:
sector_mappings.count()

keyword                     384
companyName                 339
PermID                      339
hasPrimaryBusinessSector    313
hasPrimaryEconomicSector    313
hasPrimaryIndustryGroup     313
isIncorporatedIn            330
isDomiciledIn               330
dtype: int64

__More companies mapped in Eikon database (370 vs 339), but more industries mapped in PermID database (313 vs 158).__

# Industry and sector analysis

## Get all industries and sectors in which ECB invested

In [14]:
sector_data = eikon_data_complete[['ISIN','ICB Industry name','ICB Sector name','ICB Supersector name']]
sector_data = sector_data[sector_data['ICB Industry name'] != "Unable to collect data for the field 'TR.ICBIndustry' and some specific identifier(s)."]
# TODO: use regex to make it less stringent
# sector_data.to_excel('output/test_sector_data.xlsx') # Export the resulting data to an excel file, create output folder if you want to use it!
percentage_known_sectors = len(sector_data.index)/len(eikon_data_complete.index) # 50% of the rows removed

## Represent industries and sectors

Create a pie chart to represent the number of times an industry, sector or supersector is present in the data (multiple bonds for the same company are counted separately)

In [None]:
import matplotlib.pyplot as plt # TODO: add to requirements if used

def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        if (pct > 2.4):
            return '{p:.2f}%'.format(p=pct)
        else:
            return ''
    return my_autopct

def get_all_sectors(sector_type):
    sector_data['number']=1 # TODO: count them in a cleaner way
    sectors = sector_data[[sector_type, 'number']].groupby([sector_type]).sum()
    sectors = sectors.sort_values("number", axis=0, ascending=False, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None)
    return sectors
    
def make_pie_chart(column_name):
    fig = plt.figure(figsize=(9,9))
    ax = plt.subplot(111)

    sectors = get_all_sectors(column_name)
    
    sectors.plot(kind='pie', y='number', ax=ax, autopct=make_autopct(sectors['number']), fontsize=12, legend=False, rotatelabels=True, pctdistance=0.8)
    plt.axis("off")
    plt.title(column_name+"\n\n\n")

make_pie_chart('ICB Supersector name')
make_pie_chart('ICB Sector name')
make_pie_chart('ICB Industry name')

## Which industries and sectors are green?

TODO: automate this process

A sector is considered green if it is in the list of "Green economy sectors" by Igor Mishevski.
https://medium.com/@mishevski/green-economy-sectors-ceecabeec7f0

In [None]:
industries = get_all_sectors('ICB Industry name')
super_sectors = get_all_sectors('ICB Supersector name')
sectors = get_all_sectors('ICB Sector name')

In [None]:
list_of_green_sectors = ["Real Estate", "Energy", "Oil, Gas and Coal"] # Buildings, Energy supply, ... TODO: update list

In [None]:
industries["green"]=0
super_sectors["green"]=0
sectors["green"]=0
for i in range(0, len(industries.index)):
    if industries.index[i] in list_of_green_sectors:
        industries["green"][i]=1
    if super_sectors.index[i] in list_of_green_sectors:
        super_sectors["green"][i]=1
    if sectors.index[i] in list_of_green_sectors:
        sectors["green"][i]=1
print(industries)
print(super_sectors)
print(sectors)

In [None]:
 def show_table(column_names, row_names, content):
    fig, ax = plt.subplots() 
    ax.set_axis_off() 
    table = ax.table( 
        cellText = content,  
        rowLabels = row_names,  
        colLabels = column_names, 
        rowColours =["c"] * len(row_names),  
        colColours =["c"] * len(column_names), 
        cellLoc ='center',  
        loc ='upper left')         

    ax.set_title('Percentage of industries or sectors that is green', 
                 fontweight ="bold") 

    plt.show() 

In [None]:
# Calculate the percentage of represented industries considered green
percentage_industries_green = industries["green"].sum()/len(industries.index)
percentage_super_sectors_green = super_sectors["green"].sum()/len(super_sectors.index)
percentage_sectors_green = sectors["green"].sum()/len(sectors.index)

# Calculate the percentage of represented bonds in green industries
percentage_bonds_green_industries = (industries["green"]*industries["number"]).sum()/industries["number"].sum()
percentage_bonds_green_super_sectors = (super_sectors["green"]*super_sectors["number"]).sum()/super_sectors["number"].sum()
percentage_bonds_green_sectors = (sectors["green"]*sectors["number"]).sum()/sectors["number"].sum()

show_table(["Compared to number of sectors", "Compared to number of bonds"], ["Industries", "Super sectors", "Sectors"],
           [[percentage_industries_green.round(3), percentage_bonds_green_industries.round(3)],
            [percentage_super_sectors_green.round(3), percentage_bonds_green_super_sectors.round(3)],
            [percentage_sectors_green.round(3), percentage_bonds_green_sectors.round(3)]])

# Environmental Analysis

## CO2 Data/Graphs

### Spaghetti plot CO$_2$

In [None]:
co2_data = eikon_data_complete[["ISSUER", "CO2", "CO2_1"]] #compnay name and CO2 subset 
company_co2_data = co2_data.drop_duplicates(subset= ["ISSUER"]) #Unique company name subset 
company_co2_data = company_co2_data[(company_co2_data.CO2 != '0') & (company_co2_data.CO2_1 != '0')] #not null value for CO2
company_co2_data = company_co2_data.reset_index() #resets index
company_co2_data = company_co2_data.drop(columns=["index"]) #removes extra column
company_co2_data = company_co2_data.replace(to_replace = '[,]', value ='.', regex=True) #making decimal points legible
company_co2_data['CO2'] =company_co2_data['CO2'].astype(float) #converting numbers to floats 
company_co2_data['CO2_1'] =company_co2_data['CO2_1'].astype(float)

#overall slope increase or decrease 
slopes = company_co2_data['CO2_1'] - company_co2_data['CO2']
slopes.sum() #shows an overall decrease in total emissions **Could cluster by sector. Hard to do anything else with 2 data points*


#making data easier to graph 
co2graph_data = company_co2_data[["CO2", "CO2_1"]]
co2graph_data = co2graph_data.transpose()
co2graph_data.insert(0, "x", [0, 1], True)


plt.style.use('seaborn-darkgrid')
palette = plt.get_cmap('Set1')

#spaghetti plot of CO2 emissions 
plt.figure(figsize=(20,20))
for column in co2graph_data.drop(columns=["x"], axis=1):
    plt.plot(co2graph_data["x"], co2graph_data[column], marker='', linewidth=1, alpha=0.9)
    
plt.show()



### Histogram change in CO$_2$

In [None]:
plt.hist(x=slopes, bins=7,alpha=0.7)
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title(r'Histogram of the change in nomalized CO$_2$ emission during the CSPP')
plt.show()

### emission of 2021 as a function of the emission in 2015

In [None]:
plt.plot(company_co2_data["CO2"],company_co2_data["CO2_1"],'o')
plt.title("emission of 2021 as a function of the emission in 2015", size=15)
plt.xlabel("emission in 2015", size=15)
plt.ylabel("emission in 2021",size=15)
plt.show()

Make a linear fit through for this graph (see github of course: data analytics > radient_descent)

In [None]:
from sklearn.linear_model import LinearRegression
# We will use the module Linear Regression of sklearn to perform the analysis
# Initialize the model
ols = LinearRegression()
# Fit the model to the data
ols.fit(company_co2_data["CO2"].values.reshape(-1, 1),company_co2_data["CO2_1"])

print('Fit is of the form:',np.round(ols.intercept_,3),'+',np.round(ols.coef_[0],3),'x')

As the slope is slightly below one, we can conclude that in general, the normalized CO$_2$ emission decreases.

In [None]:
plt.figure(figsize=(10,5))

# plot of data
plt.plot(company_co2_data["CO2"],company_co2_data["CO2_1"],'o',label="data")

# plot of fit
x = np.arange(0,max((company_co2_data["CO2"])+100))
y = ols.intercept_ + ols.coef_[0]*x
plt.plot(x,y,'r-',label="fit")

# making a nice figure
plt.title("emission of 2021 as a function of the emission in 2015", size=15)
plt.xlabel("emission in 2015", size=15)
plt.ylabel("emission in 2021",size=15)
plt.legend(fontsize="15")

plt.show()

## ESG data/plots

In [None]:
column_names_esg_company_data = ["ISSUER", "ESG Score 2015", "ESG Score 2016", "ESG Score 2017", 
                                "ESG Score 2018", "ESG Score 2019", "ESG Score 2020", 
                                "ESG Score 2021"]
#started cleaning data as above 
esg_data = eikon_data_complete[column_names_esg_company_data]
esg_company_data = esg_data.drop_duplicates(subset= ["ISSUER"])
esg_company_data = esg_company_data.replace(to_replace = '[,]', value ='.', regex=True)

# replace zeros with nans, as these are easier to replace
esg_company_data = esg_company_data.replace(to_replace = '0', value = np.nan) 

# remove rows with no data for ESG score
esg_company_data.dropna(axis=0, how='all', 
                        subset=column_names_esg_company_data[1:8], inplace=True)

#converting all numbers to floats 
for column_name in column_names_esg_company_data[1:8]:
    esg_company_data[column_name] = esg_company_data[column_name].astype(float)

# interpolate data that is missing
esg_company_data.iloc[:,1:] = esg_company_data.iloc[:,1:].interpolate(method='linear', axis=1, limit_direction='both',
                                                                      inplace=False)
#TODO: If we want to keep this apart, we can make a new variable holding the filled in dataframe

# reset the indexes
esg_company_data = esg_company_data.reset_index()
esg_company_data = esg_company_data.drop(columns=["index"])

### Spaghetti plot ESG Scores

In [None]:
plt.figure(figsize=(10,10))
years = range(2015,2022)
for index,row in esg_company_data.iterrows():
    plt.plot(years,row[1:8])
plt.show()

### Some Descriptive Statistics

In [None]:
esg_company_data.describe()

In [None]:
plt.figure(figsize=(10,5))

years = range(2015,2022)

mean = esg_company_data.mean().values
error = esg_company_data.std().values

max_values = esg_company_data.max().values
min_values = esg_company_data.min().values

plt.errorbar(years, mean, yerr=error, ecolor='r', capsize=10)

plt.show()

In this figure, we can clearly see that the average ESG-score of the companies increases.

In [None]:
esg_company_data.iloc[:,1:].boxplot(figsize=(10,10))

### ESG evolution of each company

fit the evolution of each company

In [None]:
list_of_coef = []
list_of_intercepts = []
years = np.arange(0,7)

from sklearn.linear_model import LinearRegression
# We will use the module Linear Regression of sklearn to perform the analysis
# Initialize the model
ols = LinearRegression()
# Fit the model to the data
for index,row in esg_company_data.iterrows():
    resultOfFit = ols.fit(years.reshape(-1, 1),row[1:8])
    list_of_coef.append(resultOfFit.coef_[0])
    list_of_intercepts.append(resultOfFit.intercept_)

Make a boxplot of the coefficients of these fits. This should give an indication about the general evolution (increase vs decrease).

In [None]:
plt.figure(figsize=(2,5))

plt.boxplot(list_of_coef)

plt.show()

This figure shows us that only roughly 25% of companies have a negative slope, while the other 75% have an increasin ESG score.

Let us now try to find a relation between the initial ESG score and the most recent one. This can be done by plotting and calculating the covariance matrix.

In [None]:
plt.figure()

plt.plot(list_of_intercepts,list_of_coef, 'b.')

plt.show()

In [None]:
np.corrcoef(list_of_coef, list_of_intercepts)

There seems to be some negative correlation ==> lower initial value, faster increase in ESG score.