# Data collection (returns)

In [4]:
import pandas as pd
import numpy as np
import os.path
import importlib
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from dateutil.relativedelta import relativedelta

import utilities.variables as variables
import utilities.api_ticker_service as data_ticker_service
importlib.reload(data_ticker_service)
importlib.reload(variables)

<module 'utilities.variables' from '/Users/herbishtini/Documents/UNI/Master Thesis/sustainability_portfolio_optimisation/utilities/variables.py'>

### Reading file

In [5]:
df = pd.read_csv('../data/data_1_esg_raw.csv') 
df.head(5)

Unnamed: 0,company_name,ticker_symbol,company_esg_score,company_esg_score_group,industry
0,HENSOLDT AG,ETR:HAG,14.9,Low ESG Risk,Aerospace & Defense
1,Aptiv Plc,NYS:APTV,9.1,Negligible ESG Risk,Auto Components
2,"BorgWarner, Inc.",NYS:BWA,10.0,Negligible ESG Risk,Auto Components
3,Bosch Fren Sistemleri Sanayi ve Ticaret AS,IST:BFREN.E,8.1,Negligible ESG Risk,Auto Components
4,Bosch Ltd.,BOM:500530,6.5,Negligible ESG Risk,Auto Components


### Sort by ESG score

In [6]:
df_sorted = df.sort_values('company_esg_score')
df_sorted

Unnamed: 0,company_name,ticker_symbol,company_esg_score,company_esg_score_group,industry
4184,ROADIS Transportation Holding SLU,-,4.0,Negligible ESG Risk,Transportation Infrastructure
1018,JAB Holding Co. SARL,-,4.2,Negligible ESG Risk,Diversified Financials
2037,Dexus,ASX:DXS,4.2,Negligible ESG Risk,Real Estate
4180,Entidad Pública Empresarial ADIF-Alta Velocidad,-,4.3,Negligible ESG Risk,Transportation Infrastructure
3498,RS Group Plc,LON:RS1,4.5,Negligible ESG Risk,Technology Hardware
...,...,...,...,...,...
1428,International Finance Facility for Immunisatio...,-,20.0,Low ESG Risk,Healthcare
3843,Orange SA,PAR:ORA,20.0,Low ESG Risk,Telecommunication Services
4365,Promigas SA ESP,BOG:PROMIGAS,20.0,Low ESG Risk,Utilities
3950,Li & Fung Ltd.,-,20.0,Low ESG Risk,Textiles & Apparel


In [7]:
len(df_sorted)

4386

### Group by industry and company-esg-score-group

In [8]:
# Group by the first and second columns and count the occurrences
df_grouped = df.groupby(['industry', 'company_esg_score_group']).size().reset_index(name='Count')
df_grouped = df_grouped.sort_values(by=['industry', 'company_esg_score_group'], ascending=[True, False])
df_grouped

Unnamed: 0,industry,company_esg_score_group,Count
0,Aerospace & Defense,Low ESG Risk,1
2,Auto Components,Negligible ESG Risk,12
1,Auto Components,Low ESG Risk,105
3,Automobiles,Low ESG Risk,22
5,Banks,Negligible ESG Risk,43
...,...,...,...
63,Transportation,Low ESG Risk,107
65,Transportation Infrastructure,Negligible ESG Risk,21
64,Transportation Infrastructure,Low ESG Risk,92
67,Utilities,Negligible ESG Risk,11


### Remove stocks without a valid stock exchange

In [9]:
len(df_sorted[df_sorted['ticker_symbol'] != '-'])

3801

In [10]:
df_sorted[df_sorted['ticker_symbol'] != '-']

Unnamed: 0,company_name,ticker_symbol,company_esg_score,company_esg_score_group,industry
2037,Dexus,ASX:DXS,4.2,Negligible ESG Risk,Real Estate
3498,RS Group Plc,LON:RS1,4.5,Negligible ESG Risk,Technology Hardware
3486,"Kimball Electronics, Inc.",NAS:KE,4.5,Negligible ESG Risk,Technology Hardware
2100,TAG Immobilien AG,ETR:TEG,4.6,Negligible ESG Risk,Real Estate
2105,Unibail-Rodamco-Westfield SE,PAR:URW,4.7,Negligible ESG Risk,Real Estate
...,...,...,...,...,...
1140,Nippon Life India Asset Management Ltd.,BOM:540767,20.0,Low ESG Risk,Diversified Financials
3850,"Taiwan Mobile Co., Ltd.",TAI:3045,20.0,Low ESG Risk,Telecommunication Services
4160,Union Pacific Corp.,NYS:UNP,20.0,Low ESG Risk,Transportation
3843,Orange SA,PAR:ORA,20.0,Low ESG Risk,Telecommunication Services


### Create new columns "stock_exchange" & "stock_ticker_symbol"

In [11]:
# Create new column stock_exchange by splitting ticker_symbol
df_sorted['stock_exchange'] = df_sorted['ticker_symbol'].str.split(':').str[0]

In [12]:
# Create new column stock_ticker_symbol by splitting ticker_symbol
df_sorted['stock_ticker_symbol'] = df_sorted['ticker_symbol'].str.split(':').str[1]

In [13]:
df_sorted[df_sorted['ticker_symbol'] != '-']['stock_exchange'].value_counts('')

NYS    551
NAS    408
TKS    372
LON    226
TAI    183
      ... 
LIM      1
LIT      1
BRA      1
FRA      1
BER      1
Name: stock_exchange, Length: 62, dtype: int64

## Selected Stock-Exchanges

In [14]:
len(df_sorted)

4386

### Filter Stock Exchanges

In [15]:
# drop existing column "stock_ticker_symbol"
df_sorted = df_sorted.drop(columns=['ticker_symbol'])

In [16]:
# Filter by a single column value 
df_filtered = df_sorted[df_sorted['stock_exchange'].isin(['NYS', 'NAS', 'TKS', 'LON', 'ETR'])]
len(df_filtered)

1662

In [17]:
df_filtered

Unnamed: 0,company_name,company_esg_score,company_esg_score_group,industry,stock_exchange,stock_ticker_symbol
3498,RS Group Plc,4.5,Negligible ESG Risk,Technology Hardware,LON,RS1
3486,"Kimball Electronics, Inc.",4.5,Negligible ESG Risk,Technology Hardware,NAS,KE
2100,TAG Immobilien AG,4.6,Negligible ESG Risk,Real Estate,ETR,TEG
2068,LEG Immobilien SE,5.1,Negligible ESG Risk,Real Estate,ETR,LEG
521,"Steelcase, Inc.",5.3,Negligible ESG Risk,Commercial Services,NYS,SCS
...,...,...,...,...,...,...
3676,"KVH Industries, Inc. (Delaware)",20.0,Low ESG Risk,Technology Hardware,NAS,KVHI
2905,Moonpig Group Plc,20.0,Low ESG Risk,Retailing,LON,MOON
1963,"NeoGenomics, Inc.",20.0,Low ESG Risk,Pharmaceuticals,NAS,NEO
3057,"Japan Material Co., Ltd.",20.0,Low ESG Risk,Semiconductors,TKS,6055


#### Group by industry and company-esg-score-group

In [18]:
# Group by the first and second columns and count the occurrences
df_filtered_grouped = df_filtered.groupby(['industry', 'company_esg_score_group']).size().reset_index(name='Count')
df_filtered_grouped = df_filtered_grouped.sort_values(by=['industry', 'company_esg_score_group'], ascending=[True, False])
df_filtered_grouped

Unnamed: 0,industry,company_esg_score_group,Count
0,Aerospace & Defense,Low ESG Risk,1
2,Auto Components,Negligible ESG Risk,5
1,Auto Components,Low ESG Risk,45
3,Automobiles,Low ESG Risk,4
5,Banks,Negligible ESG Risk,1
4,Banks,Low ESG Risk,15
6,Building Products,Low ESG Risk,13
7,Chemicals,Low ESG Risk,11
9,Commercial Services,Negligible ESG Risk,32
8,Commercial Services,Low ESG Risk,83


In [19]:
df_filtered.to_csv('../data/data_2_esg_filtered.csv')

### Adding market capital of companies

In [20]:
if 'df_market_cap' not in locals():
    df_market_cap = pd.read_csv('../data/data_2_esg_filtered.csv')
df_market_cap

Unnamed: 0.1,Unnamed: 0,company_name,company_esg_score,company_esg_score_group,industry,stock_exchange,stock_ticker_symbol
0,3498,RS Group Plc,4.5,Negligible ESG Risk,Technology Hardware,LON,RS1
1,3486,"Kimball Electronics, Inc.",4.5,Negligible ESG Risk,Technology Hardware,NAS,KE
2,2100,TAG Immobilien AG,4.6,Negligible ESG Risk,Real Estate,ETR,TEG
3,2068,LEG Immobilien SE,5.1,Negligible ESG Risk,Real Estate,ETR,LEG
4,521,"Steelcase, Inc.",5.3,Negligible ESG Risk,Commercial Services,NYS,SCS
...,...,...,...,...,...,...,...
1657,3676,"KVH Industries, Inc. (Delaware)",20.0,Low ESG Risk,Technology Hardware,NAS,KVHI
1658,2905,Moonpig Group Plc,20.0,Low ESG Risk,Retailing,LON,MOON
1659,1963,"NeoGenomics, Inc.",20.0,Low ESG Risk,Pharmaceuticals,NAS,NEO
1660,3057,"Japan Material Co., Ltd.",20.0,Low ESG Risk,Semiconductors,TKS,6055


#### Updating `stock_ticker_symbol` for Frankfurt and London
Frankfurt & London tickers require a postfix '.DE' & '.L' respectively for Yahoo API to recognize them.

In [21]:
# Postfix to add
frankfurt_postfix = '.DE'
london_postfix = '.L'
tokyo_postfix = '.T'

# Condition: Add postfix '.DE' to 'stock_exchange' column if the 'stock_ticker_symbol' column is 'ETR' (Frankfurt)
df_market_cap['stock_ticker_symbol'] = np.where((df_market_cap['stock_exchange'] == 'ETR'), df_market_cap['stock_ticker_symbol'] + frankfurt_postfix, df_market_cap['stock_ticker_symbol'])

# Condition: Add postfix '.L' to 'stock_exchange' column if the 'stock_ticker_symbol' column is 'Lon' (London)
df_market_cap['stock_ticker_symbol'] = np.where((df_market_cap['stock_exchange'] == 'LON'), df_market_cap['stock_ticker_symbol'] + london_postfix, df_market_cap['stock_ticker_symbol'])

# Condition: Add postfix '.T' to 'stock_exchange' column if the 'stock_ticker_symbol' column is 'TKS' (Tokyo)
df_market_cap['stock_ticker_symbol'] = np.where((df_market_cap['stock_exchange'] == 'TKS'), df_market_cap['stock_ticker_symbol'] + tokyo_postfix, df_market_cap['stock_ticker_symbol'])

In [22]:
df_market_cap[['stock_exchange', 'stock_ticker_symbol']]

Unnamed: 0,stock_exchange,stock_ticker_symbol
0,LON,RS1.L
1,NAS,KE
2,ETR,TEG.DE
3,ETR,LEG.DE
4,NYS,SCS
...,...,...
1657,NAS,KVHI
1658,LON,MOON.L
1659,NAS,NEO
1660,TKS,6055.T


## Fetch Market-Capital

In [31]:
# Fetch market cap in batches of 50 with a 3-second delay between batches
if os.path.isfile('../data/data_3_market_caps.csv'):
    df_market_caps = data_ticker_service.fetch_market_cap(df_market_cap['stock_ticker_symbol'], batch_size=50, delay=3)

df_market_caps


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/TRTN.PRE?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=TRTN.PRE&crumb=Au8nxETrnR0
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/BT.A.L?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=BT.A.L&crumb=Au8nxETrnR0


Unnamed: 0,stock_ticker_symbol,market_capital,trailing_pe,beta,return_on_equity,earnings_growth
0,RS1.L,3668790784,19.858974,0.863,0.13226,-0.368
1,KE,428135136,21.370369,1.258,0.03854,
2,TEG.DE,2765612288,,1.037,-0.03766,
3,LEG.DE,6927171072,,0.957,-0.08126,
4,SCS,1492859648,17.210526,1.314,0.10633,8.0
...,...,...,...,...,...,...
1657,KVHI,91276288,,0.594,-0.14536,
1658,MOON.L,701879680,2035.0,1.238,,-0.072
1659,NEO,1940312192,,1.191,-0.08391,
1660,6055.T,170442342400,27.933996,0.585,0.13585,0.379


In [32]:
df_market_caps.to_csv('../data/data_3_market_caps.csv')

In [50]:
df_market_caps = pd.read_csv('../data/data_3_market_caps.csv', index_col=0)

In [52]:
df_market_caps

Unnamed: 0,stock_ticker_symbol,market_capital,trailing_pe,beta,return_on_equity,earnings_growth
0,RS1.L,3.668791e+09,19.858974,0.863,0.13226,-0.368
1,KE,4.281351e+08,21.370369,1.258,0.03854,
2,TEG.DE,2.765612e+09,,1.037,-0.03766,
3,LEG.DE,6.927171e+09,,0.957,-0.08126,
4,SCS,1.492860e+09,17.210526,1.314,0.10633,8.000
...,...,...,...,...,...,...
1657,KVHI,9.127629e+07,,0.594,-0.14536,
1658,MOON.L,7.018797e+08,2035.000000,1.238,,-0.072
1659,NEO,1.940312e+09,,1.191,-0.08391,
1660,6055.T,1.704423e+11,27.933996,0.585,0.13585,0.379


Merging ESG score with market-capital of companies

In [53]:
df_market_cap = pd.merge(df_market_cap, df_market_caps, on='stock_ticker_symbol', left_index=False, right_index=False)
df_market_cap

Unnamed: 0.1,Unnamed: 0,company_name,company_esg_score,company_esg_score_group,industry,stock_exchange,stock_ticker_symbol,market_capital_x,market_capital_euro,market_capital_y,trailing_pe,beta,return_on_equity,earnings_growth
0,3498,RS Group Plc,4.5,Negligible ESG Risk,Technology Hardware,LON,RS1.L,3668790784,4365861032.96,3.668791e+09,19.858974,0.863,0.13226,-0.368
1,3486,"Kimball Electronics, Inc.",4.5,Negligible ESG Risk,Technology Hardware,NAS,KE,428135136,393884325.12,4.281351e+08,21.370369,1.258,0.03854,
2,2100,TAG Immobilien AG,4.6,Negligible ESG Risk,Real Estate,ETR,TEG.DE,2765612288,2765612288,2.765612e+09,,1.037,-0.03766,
3,2068,LEG Immobilien SE,5.1,Negligible ESG Risk,Real Estate,ETR,LEG.DE,6927171072,6927171072,6.927171e+09,,0.957,-0.08126,
4,521,"Steelcase, Inc.",5.3,Negligible ESG Risk,Commercial Services,NYS,SCS,1492859648,1373430876.16,1.492860e+09,17.210526,1.314,0.10633,8.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1657,3676,"KVH Industries, Inc. (Delaware)",20.0,Low ESG Risk,Technology Hardware,NAS,KVHI,91276288,91276288,9.127629e+07,,0.594,-0.14536,
1658,2905,Moonpig Group Plc,20.0,Low ESG Risk,Retailing,LON,MOON.L,701879680,701879680,7.018797e+08,2035.000000,1.238,,-0.072
1659,1963,"NeoGenomics, Inc.",20.0,Low ESG Risk,Pharmaceuticals,NAS,NEO,1940312192,1940312192,1.940312e+09,,1.191,-0.08391,
1660,3057,"Japan Material Co., Ltd.",20.0,Low ESG Risk,Semiconductors,TKS,6055.T,170442342400,170442342400,1.704423e+11,27.933996,0.585,0.13585,0.379


In [64]:
# Cleaning merge generated columns
df_market_cap.rename(columns={'market_capital_x': 'market_capital'}, inplace=True)
df_market_cap.drop(['market_capital_y'], axis=1, inplace=True)

In [65]:
df_market_cap

Unnamed: 0.1,Unnamed: 0,company_name,company_esg_score,company_esg_score_group,industry,stock_exchange,stock_ticker_symbol,market_capital,market_capital_euro,trailing_pe,beta,return_on_equity,earnings_growth
0,3498,RS Group Plc,4.5,Negligible ESG Risk,Technology Hardware,LON,RS1.L,3668790784,3668790784,19.858974,0.863,0.13226,-0.368
1,3486,"Kimball Electronics, Inc.",4.5,Negligible ESG Risk,Technology Hardware,NAS,KE,428135136,428135136,21.370369,1.258,0.03854,
2,2100,TAG Immobilien AG,4.6,Negligible ESG Risk,Real Estate,ETR,TEG.DE,2765612288,2765612288,,1.037,-0.03766,
3,2068,LEG Immobilien SE,5.1,Negligible ESG Risk,Real Estate,ETR,LEG.DE,6927171072,6927171072,,0.957,-0.08126,
4,521,"Steelcase, Inc.",5.3,Negligible ESG Risk,Commercial Services,NYS,SCS,1492859648,1492859648,17.210526,1.314,0.10633,8.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1657,3676,"KVH Industries, Inc. (Delaware)",20.0,Low ESG Risk,Technology Hardware,NAS,KVHI,91276288,91276288,,0.594,-0.14536,
1658,2905,Moonpig Group Plc,20.0,Low ESG Risk,Retailing,LON,MOON.L,701879680,701879680,2035.000000,1.238,,-0.072
1659,1963,"NeoGenomics, Inc.",20.0,Low ESG Risk,Pharmaceuticals,NAS,NEO,1940312192,1940312192,,1.191,-0.08391,
1660,3057,"Japan Material Co., Ltd.",20.0,Low ESG Risk,Semiconductors,TKS,6055.T,170442342400,170442342400,27.933996,0.585,0.13585,0.379


### Market Capital to Euro

To make sense of the market-capital value we have to first pick a preferred currency and convert them all to it.
For this purpose we will be using Euro as the standard currency.


In [66]:
df_market_cap['market_capital_euro'] = df_market_cap['market_capital']

Exchange Rate (19.08.2024)

In [67]:
exchange_rate = {
    "yen_to_euro": 0.0058,
    "us_to_euro": 0.92,
    "pound_to_euro": 1.19
}

In [68]:
for i, row in enumerate(df_market_cap):
    # Yen
    if df_market_cap['stock_exchange'][i] == 'TKS':
        df_market_cap.at[i, 'market_capital_euro'] = df_market_cap['market_capital'][i] * exchange_rate['pound_to_euro']
    # Dollar
    if df_market_cap['stock_exchange'][i] == 'NYS':
        df_market_cap.at[i, 'market_capital_euro'] = df_market_cap['market_capital'][i] * exchange_rate['us_to_euro']
    if df_market_cap['stock_exchange'][i] == 'NAS':
        df_market_cap.at[i, 'market_capital_euro'] = df_market_cap['market_capital'][i] * exchange_rate['us_to_euro']
    # Pound
    if df_market_cap['stock_exchange'][i] == 'LON':
        df_market_cap.at[i, 'market_capital_euro'] = df_market_cap['market_capital'][i] * exchange_rate['pound_to_euro']

In [69]:
df_market_cap.to_csv('../data/data_4_market_cap_euro.csv')
df_market_cap

Unnamed: 0.1,Unnamed: 0,company_name,company_esg_score,company_esg_score_group,industry,stock_exchange,stock_ticker_symbol,market_capital,market_capital_euro,trailing_pe,beta,return_on_equity,earnings_growth
0,3498,RS Group Plc,4.5,Negligible ESG Risk,Technology Hardware,LON,RS1.L,3668790784,4365861032.96,19.858974,0.863,0.13226,-0.368
1,3486,"Kimball Electronics, Inc.",4.5,Negligible ESG Risk,Technology Hardware,NAS,KE,428135136,393884325.12,21.370369,1.258,0.03854,
2,2100,TAG Immobilien AG,4.6,Negligible ESG Risk,Real Estate,ETR,TEG.DE,2765612288,2765612288,,1.037,-0.03766,
3,2068,LEG Immobilien SE,5.1,Negligible ESG Risk,Real Estate,ETR,LEG.DE,6927171072,6927171072,,0.957,-0.08126,
4,521,"Steelcase, Inc.",5.3,Negligible ESG Risk,Commercial Services,NYS,SCS,1492859648,1373430876.16,17.210526,1.314,0.10633,8.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1657,3676,"KVH Industries, Inc. (Delaware)",20.0,Low ESG Risk,Technology Hardware,NAS,KVHI,91276288,91276288,,0.594,-0.14536,
1658,2905,Moonpig Group Plc,20.0,Low ESG Risk,Retailing,LON,MOON.L,701879680,701879680,2035.000000,1.238,,-0.072
1659,1963,"NeoGenomics, Inc.",20.0,Low ESG Risk,Pharmaceuticals,NAS,NEO,1940312192,1940312192,,1.191,-0.08391,
1660,3057,"Japan Material Co., Ltd.",20.0,Low ESG Risk,Semiconductors,TKS,6055.T,170442342400,170442342400,27.933996,0.585,0.13585,0.379


### Column normalization

Since ESG-Score and market-capital are on different scales, it's important to normalize them so they can be compared directly.

In [78]:
df_scaled = pd.read_csv('../data/data_4_market_cap_euro.csv', index_col=0)

In [71]:
# Normalize ESG-Score and market-capital
scaler = MinMaxScaler()
df_scaled[['company_esg_score_scale', 'market_capital_scale']] = scaler.fit_transform(df_scaled[['company_esg_score', 'market_capital_euro']])

In [80]:
df_scaled

Unnamed: 0.1,Unnamed: 0,company_name,company_esg_score,company_esg_score_group,industry,stock_exchange,stock_ticker_symbol,market_capital,market_capital_euro,trailing_pe,beta,return_on_equity,earnings_growth
0,3498,RS Group Plc,4.5,Negligible ESG Risk,Technology Hardware,LON,RS1.L,3.668791e+09,4.365861e+09,19.858974,0.863,0.13226,-0.368
1,3486,"Kimball Electronics, Inc.",4.5,Negligible ESG Risk,Technology Hardware,NAS,KE,4.281351e+08,3.938843e+08,21.370369,1.258,0.03854,
2,2100,TAG Immobilien AG,4.6,Negligible ESG Risk,Real Estate,ETR,TEG.DE,2.765612e+09,2.765612e+09,,1.037,-0.03766,
3,2068,LEG Immobilien SE,5.1,Negligible ESG Risk,Real Estate,ETR,LEG.DE,6.927171e+09,6.927171e+09,,0.957,-0.08126,
4,521,"Steelcase, Inc.",5.3,Negligible ESG Risk,Commercial Services,NYS,SCS,1.492860e+09,1.373431e+09,17.210526,1.314,0.10633,8.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1657,3676,"KVH Industries, Inc. (Delaware)",20.0,Low ESG Risk,Technology Hardware,NAS,KVHI,9.127629e+07,9.127629e+07,,0.594,-0.14536,
1658,2905,Moonpig Group Plc,20.0,Low ESG Risk,Retailing,LON,MOON.L,7.018797e+08,7.018797e+08,2035.000000,1.238,,-0.072
1659,1963,"NeoGenomics, Inc.",20.0,Low ESG Risk,Pharmaceuticals,NAS,NEO,1.940312e+09,1.940312e+09,,1.191,-0.08391,
1660,3057,"Japan Material Co., Ltd.",20.0,Low ESG Risk,Semiconductors,TKS,6055.T,1.704423e+11,1.704423e+11,27.933996,0.585,0.13585,0.379


To create a final score, we use the ESG score and the market capital score, applying the following weights: the ESG score is multiplied by a coefficient of 0.25, and the market capital score is multiplied by a coefficient of 0.75.

## Introducing score column

In [73]:
# Create final score
coefficient_esg_score = variables.ESG_WEIGHT
coefficient_market_capital = variables.MARKET_CAPITAL_WEIGHT
df_scaled['score'] = df_scaled['company_esg_score_scale'] * coefficient_esg_score + df_scaled['market_capital_scale'] * coefficient_market_capital

# Sort by score
df_scaled.sort_values(by='score', ascending=False)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,company_name,company_esg_score,company_esg_score_group,industry,stock_exchange,stock_ticker_symbol,market_capital,market_capital_euro,trailing_pe,beta,return_on_equity,earnings_growth,company_esg_score_scale,market_capital_scale,score
734,734,790,Sony Group Corp.,15.6,Low ESG Risk,Consumer Durables,TKS,6758.T,1.595672e+13,1.595672e+13,16.260162,0.820,0.13113,0.078,0.716129,1.000000,0.929032
456,456,3671,KEYENCE Corp.,13.7,Low ESG Risk,Technology Hardware,TKS,6861.T,1.561133e+13,1.561133e+13,41.322422,0.703,0.13956,0.099,0.593548,0.978355,0.882153
486,486,2820,"FAST RETAILING CO., LTD.",14.0,Low ESG Risk,Retailing,TKS,9983.T,1.352642e+13,1.352642e+13,36.363636,0.687,0.19893,0.374,0.612903,0.847695,0.788997
1627,1627,1930,"Daiichi Sankyo Co., Ltd.",19.8,Low ESG Risk,Pharmaceuticals,TKS,4568.T,1.097793e+13,1.097793e+13,48.309906,0.282,0.14113,0.500,0.987097,0.687982,0.762761
1631,1631,3839,KDDI Corp.,19.8,Low ESG Risk,Telecommunication Services,TKS,9433.T,9.803605e+12,9.803605e+12,15.948885,0.069,0.11472,0.036,0.987097,0.614387,0.707565
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1037,1037,679,Triton International Ltd.,17.1,Low ESG Risk,Commercial Services,NYS,TRTN.PRE,,,,,,,0.812903,,
1135,1135,341,National Rural Utilities Cooperative Finance C...,17.6,Low ESG Risk,Banks,NYS,NRUC,,,,,,,0.845161,,
1200,1200,3675,Kontron AG,17.9,Low ESG Risk,Technology Hardware,ETR,KTN.DE,,,,,,,0.864516,,
1236,1236,3829,BT Group Plc,18.1,Low ESG Risk,Telecommunication Services,LON,BT.A.L,,,,,,,0.877419,,


In [74]:
df_scaled

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,company_name,company_esg_score,company_esg_score_group,industry,stock_exchange,stock_ticker_symbol,market_capital,market_capital_euro,trailing_pe,beta,return_on_equity,earnings_growth,company_esg_score_scale,market_capital_scale,score
0,0,3498,RS Group Plc,4.5,Negligible ESG Risk,Technology Hardware,LON,RS1.L,3.668791e+09,4.365861e+09,19.858974,0.863,0.13226,-0.368,0.000000,0.000273,0.000205
1,1,3486,"Kimball Electronics, Inc.",4.5,Negligible ESG Risk,Technology Hardware,NAS,KE,4.281351e+08,3.938843e+08,21.370369,1.258,0.03854,,0.000000,0.000024,0.000018
2,2,2100,TAG Immobilien AG,4.6,Negligible ESG Risk,Real Estate,ETR,TEG.DE,2.765612e+09,2.765612e+09,,1.037,-0.03766,,0.006452,0.000173,0.001743
3,3,2068,LEG Immobilien SE,5.1,Negligible ESG Risk,Real Estate,ETR,LEG.DE,6.927171e+09,6.927171e+09,,0.957,-0.08126,,0.038710,0.000434,0.010003
4,4,521,"Steelcase, Inc.",5.3,Negligible ESG Risk,Commercial Services,NYS,SCS,1.492860e+09,1.373431e+09,17.210526,1.314,0.10633,8.000,0.051613,0.000086,0.012968
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1657,1657,3676,"KVH Industries, Inc. (Delaware)",20.0,Low ESG Risk,Technology Hardware,NAS,KVHI,9.127629e+07,9.127629e+07,,0.594,-0.14536,,1.000000,0.000006,0.250004
1658,1658,2905,Moonpig Group Plc,20.0,Low ESG Risk,Retailing,LON,MOON.L,7.018797e+08,7.018797e+08,2035.000000,1.238,,-0.072,1.000000,0.000044,0.250033
1659,1659,1963,"NeoGenomics, Inc.",20.0,Low ESG Risk,Pharmaceuticals,NAS,NEO,1.940312e+09,1.940312e+09,,1.191,-0.08391,,1.000000,0.000121,0.250091
1660,1660,3057,"Japan Material Co., Ltd.",20.0,Low ESG Risk,Semiconductors,TKS,6055.T,1.704423e+11,1.704423e+11,27.933996,0.585,0.13585,0.379,1.000000,0.010681,0.258011


In [75]:
df_scaled = df_scaled.loc[:, ~df_scaled.columns.str.contains('^Unnamed')]
df_scaled

Unnamed: 0,company_name,company_esg_score,company_esg_score_group,industry,stock_exchange,stock_ticker_symbol,market_capital,market_capital_euro,trailing_pe,beta,return_on_equity,earnings_growth,company_esg_score_scale,market_capital_scale,score
0,RS Group Plc,4.5,Negligible ESG Risk,Technology Hardware,LON,RS1.L,3.668791e+09,4.365861e+09,19.858974,0.863,0.13226,-0.368,0.000000,0.000273,0.000205
1,"Kimball Electronics, Inc.",4.5,Negligible ESG Risk,Technology Hardware,NAS,KE,4.281351e+08,3.938843e+08,21.370369,1.258,0.03854,,0.000000,0.000024,0.000018
2,TAG Immobilien AG,4.6,Negligible ESG Risk,Real Estate,ETR,TEG.DE,2.765612e+09,2.765612e+09,,1.037,-0.03766,,0.006452,0.000173,0.001743
3,LEG Immobilien SE,5.1,Negligible ESG Risk,Real Estate,ETR,LEG.DE,6.927171e+09,6.927171e+09,,0.957,-0.08126,,0.038710,0.000434,0.010003
4,"Steelcase, Inc.",5.3,Negligible ESG Risk,Commercial Services,NYS,SCS,1.492860e+09,1.373431e+09,17.210526,1.314,0.10633,8.000,0.051613,0.000086,0.012968
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1657,"KVH Industries, Inc. (Delaware)",20.0,Low ESG Risk,Technology Hardware,NAS,KVHI,9.127629e+07,9.127629e+07,,0.594,-0.14536,,1.000000,0.000006,0.250004
1658,Moonpig Group Plc,20.0,Low ESG Risk,Retailing,LON,MOON.L,7.018797e+08,7.018797e+08,2035.000000,1.238,,-0.072,1.000000,0.000044,0.250033
1659,"NeoGenomics, Inc.",20.0,Low ESG Risk,Pharmaceuticals,NAS,NEO,1.940312e+09,1.940312e+09,,1.191,-0.08391,,1.000000,0.000121,0.250091
1660,"Japan Material Co., Ltd.",20.0,Low ESG Risk,Semiconductors,TKS,6055.T,1.704423e+11,1.704423e+11,27.933996,0.585,0.13585,0.379,1.000000,0.010681,0.258011


In [76]:
#columns_relevant = ['company_name', 'industry', 'stock_exchange', 'stock_ticker_symbol', 'market_capital_euro', 'score']
df_scaled.to_csv('../data/data_5_scaled.csv')

In [77]:
df_scaled

Unnamed: 0,company_name,company_esg_score,company_esg_score_group,industry,stock_exchange,stock_ticker_symbol,market_capital,market_capital_euro,trailing_pe,beta,return_on_equity,earnings_growth,company_esg_score_scale,market_capital_scale,score
0,RS Group Plc,4.5,Negligible ESG Risk,Technology Hardware,LON,RS1.L,3.668791e+09,4.365861e+09,19.858974,0.863,0.13226,-0.368,0.000000,0.000273,0.000205
1,"Kimball Electronics, Inc.",4.5,Negligible ESG Risk,Technology Hardware,NAS,KE,4.281351e+08,3.938843e+08,21.370369,1.258,0.03854,,0.000000,0.000024,0.000018
2,TAG Immobilien AG,4.6,Negligible ESG Risk,Real Estate,ETR,TEG.DE,2.765612e+09,2.765612e+09,,1.037,-0.03766,,0.006452,0.000173,0.001743
3,LEG Immobilien SE,5.1,Negligible ESG Risk,Real Estate,ETR,LEG.DE,6.927171e+09,6.927171e+09,,0.957,-0.08126,,0.038710,0.000434,0.010003
4,"Steelcase, Inc.",5.3,Negligible ESG Risk,Commercial Services,NYS,SCS,1.492860e+09,1.373431e+09,17.210526,1.314,0.10633,8.000,0.051613,0.000086,0.012968
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1657,"KVH Industries, Inc. (Delaware)",20.0,Low ESG Risk,Technology Hardware,NAS,KVHI,9.127629e+07,9.127629e+07,,0.594,-0.14536,,1.000000,0.000006,0.250004
1658,Moonpig Group Plc,20.0,Low ESG Risk,Retailing,LON,MOON.L,7.018797e+08,7.018797e+08,2035.000000,1.238,,-0.072,1.000000,0.000044,0.250033
1659,"NeoGenomics, Inc.",20.0,Low ESG Risk,Pharmaceuticals,NAS,NEO,1.940312e+09,1.940312e+09,,1.191,-0.08391,,1.000000,0.000121,0.250091
1660,"Japan Material Co., Ltd.",20.0,Low ESG Risk,Semiconductors,TKS,6055.T,1.704423e+11,1.704423e+11,27.933996,0.585,0.13585,0.379,1.000000,0.010681,0.258011


## Historical returns
Historical returns are analyzed over the past 25 years. This timeframe is chosen because it captures both short-term volatility and long-term market disruptions, while filtering out the daily or monthly fluctuations.

In [90]:
df_returns = pd.read_csv('../data/data_5_scaled.csv', index_col=0)

In [93]:
df_returns.head()

Unnamed: 0,company_name,company_esg_score,company_esg_score_group,industry,stock_exchange,stock_ticker_symbol,market_capital,market_capital_euro,trailing_pe,beta,return_on_equity,earnings_growth,company_esg_score_scale,market_capital_scale,score
0,RS Group Plc,4.5,Negligible ESG Risk,Technology Hardware,LON,RS1.L,3668791000.0,4365861000.0,19.858974,0.863,0.13226,-0.368,0.0,0.000273,0.000205
1,"Kimball Electronics, Inc.",4.5,Negligible ESG Risk,Technology Hardware,NAS,KE,428135100.0,393884300.0,21.370369,1.258,0.03854,,0.0,2.4e-05,1.8e-05
2,TAG Immobilien AG,4.6,Negligible ESG Risk,Real Estate,ETR,TEG.DE,2765612000.0,2765612000.0,,1.037,-0.03766,,0.006452,0.000173,0.001743
3,LEG Immobilien SE,5.1,Negligible ESG Risk,Real Estate,ETR,LEG.DE,6927171000.0,6927171000.0,,0.957,-0.08126,,0.03871,0.000434,0.010003
4,"Steelcase, Inc.",5.3,Negligible ESG Risk,Commercial Services,NYS,SCS,1492860000.0,1373431000.0,17.210526,1.314,0.10633,8.0,0.051613,8.6e-05,0.012968


In [94]:
ticker_list = df_returns['stock_ticker_symbol'].to_list()
# Format the new date as 'YYYY-MM-DD'
start_date = (datetime.now() - relativedelta(years=variables.max_span_years)).strftime('%Y-%m-%d')
end_date = datetime.now().strftime('%Y-%m-%d')
#
#monthly_returns = data_ticker_service.get_monthly_returns(tickers, start_date, end_date)
df_monthly_adj_close = data_ticker_service.get_returns_in_chunks(ticker_list, start_date, end_date, interval='1mo', chunk_size=20, sleep_duration=5)
df_monthly_adj_close

Downloading data for tickers: ['RS1.L', 'KE', 'TEG.DE', 'LEG.DE', 'SCS', 'HNI', 'AVT', 'ACCO', 'BRNK.DE', 'VNA.DE', '7912.T', 'KEYS', 'SGRO.L', 'CBRE', 'BRC', 'PGRE', 'PSON.L', 'BBOX.L', 'REL.L', 'TMV.DE']
Pausing to avoid overloading the API...
Downloading data for tickers: ['BHE', 'PLXS', 'GPE.L', 'HAS', 'CHGG', 'JLL', 'LAND.L', 'OLED', 'CDW', 'FLEX', 'REZI', 'TPL', 'ARW', 'FN', 'NOVT', 'RYN', 'SNX', '7911.T', 'CMPR', 'DLAR.L']
Pausing to avoid overloading the API...
Downloading data for tickers: ['2379.T', '6754.T', 'SHA.DE', 'TTMI', '3234.T', 'PBI', 'BYG.L', 'ASGN', 'COA.L', 'HOUS', 'VVV', 'PAGE.L', 'DB1.DE', 'AVB', '3309.T', 'STWD', 'GROW.L', 'MTO.L', 'KFY', 'MEI']
Pausing to avoid overloading the API...
Downloading data for tickers: ['BLND.L', 'RGLD', 'KFRC', 'IWG.L', '8954.T', 'ZIL2.DE', 'SANM', 'SXS.L', 'YETI', 'ACN', 'HPP', '7893.T', 'SVS.L', 'AGNC', 'CTS', 'IPG', 'BMI', 'FUTR.L', 'EBOX.L', 'SCSC']
Pausing to avoid overloading the API...
Downloading data for tickers: ['MAN', '


1 Failed download:
['TRTN.PRE']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Pausing to avoid overloading the API...
Downloading data for tickers: ['EXTR', 'AOF.DE', 'TGT', '2767.T', 'ASHM.L', '8985.T', 'STJ.L', '4452.T', 'RLJ', 'LNT', '2491.T', 'ATRI', 'P911.DE', '8804.T', '8802.T', 'ARHS', 'RDC.DE', 'HZO', '4751.T', 'MEDP']
Pausing to avoid overloading the API...
Downloading data for tickers: ['7751.T', 'BKNG', 'LSEG.L', 'AMN', 'ROST', 'CAL', '6951.T', '1973.T', '8439.T', 'MANH', '3492.T', '5105.T', 'EQH', 'PCA.L', 'EWCZ', '9101.T', 'HTZ', 'PAT.DE', 'TFC', 'OHI']
Pausing to avoid overloading the API...
Downloading data for tickers: ['TROW', 'FSLR', 'CCH.L', '9364.T', '7730.T', 'NEOG', 'HNR1.DE', '4619.T', 'DAY', '6592.T', 'SNDR', 'MONY.L', 'MDB', 'MEG', 'MERC', 'SHYF', 'WG.L', 'CHEF', 'HCSG', '2874.T']
Pausing to avoid overloading the API...
Downloading data for tickers: ['WU', 'APLE', '9068.T', 'MSGS', 'DBI', 'CON.DE', 'IHR.L', 'ROK', 'GNTX', 'WDAY', 'CCS', 'INFA', '9831.T', 'COHR', '9401.T', 'CATO', '9719.T', '3148.T', 'RH', '8079.T']
Pausing to avoid overl


1 Failed download:
['SYAB.DE']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1mo 1999-09-11 -> 2024-09-11)')


Pausing to avoid overloading the API...
Downloading data for tickers: ['SAIA', 'KN', '6814.T', '7241.T', 'NDX1.DE', 'LINC', 'DAKT', 'CHCT', 'PRI', 'CHKP', 'HFFG', 'ALIT', 'NETW.L', 'TEAM', 'DGII', 'EVTC', 'HHFA.DE', 'AFL', 'SBGI', 'PECO']
Pausing to avoid overloading the API...
Downloading data for tickers: ['PUM.DE', '8129.T', 'NCNO', 'SBH', 'NWG.L', 'NEXN.L', 'EVT.DE', 'FI', 'JILL', '9404.T', 'UNFI', 'CBL', 'AHH', 'SFM', 'HR', 'KMI', 'KWS.L', '8766.T', '7740.T', '2168.T']
Pausing to avoid overloading the API...
Downloading data for tickers: ['6723.T', 'TWO', '3191.T', '9076.T', 'MPW', 'AEIS', 'AMKR', 'ZIG.L', '6923.T', 'KREF', 'AER', 'PFE', '4301.T', 'NVMI', 'LADR', '3046.T', '9024.T', '2170.T', '3612.T', 'YOU.L']
Pausing to avoid overloading the API...
Downloading data for tickers: ['KTN.DE', 'LIO.L', 'ICFI', 'INFN', '7294.T', 'HURN', 'SLP', '4716.T', '7599.T', 'UCTT', 'CHPT', 'CFLT', 'ONEW', 'VMUK.L', '7735.T', 'VYX', 'B4B.DE', 'XYL', 'RBLX', 'AYI']
Pausing to avoid overloading the


1 Failed download:
['BT.A.L']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Pausing to avoid overloading the API...
Downloading data for tickers: ['SHL.DE', 'CRL', 'AUTO.L', 'ADI', 'NSA', '8114.T', 'ES', 'LGF.A', 'EEX', 'WTB.L', 'KSS', '4812.T', 'KNX', 'TRTX', 'EVOK.L', 'LAMR', '4816.T', 'CPRX', '4151.T', 'ONT.L']



1 Failed download:
['LGF.A']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Pausing to avoid overloading the API...
Downloading data for tickers: ['BELFB', '7354.T', 'TWKS', '9434.T', 'MOD', 'BEZ.L', 'WHR', 'DRVN', '8591.T', 'PRSR.L', '7844.T', 'DDS', 'RMR', '4549.T', '7575.T', 'MNDY', 'KCO.DE', 'EVRI', '3635.T', 'HSTM']
Pausing to avoid overloading the API...
Downloading data for tickers: ['DNB', 'AXP', 'ORC', 'ARRY', 'GTY.DE', '4503.T', 'FOR', 'GEF', 'EHTH', '7747.T', 'TPIC', 'NTST', '3086.T', 'PKG', 'OC', '9987.T', 'ARR', 'IMAX', '3659.T', '8601.T']
Pausing to avoid overloading the API...
Downloading data for tickers: ['DTE.DE', 'IHS', '8963.T', '5938.T', 'NBPE.L', 'TAL', '7803.T', 'PAGS', 'BIRD', 'FLGT', 'WRBY', 'PGNY', 'PFC.L', 'SRP.L', 'UTI', 'BEPC', '3481.T', 'ATG.L', 'DV', 'WINA']
Pausing to avoid overloading the API...
Downloading data for tickers: ['7451.T', 'GTY', 'HEN3.DE', 'TYL', 'PRG', 'ADV', 'BLK', 'NTCT', '7459.T', 'FAN.L', 'PDCO', '9613.T', 'TYMN.L', 'PAYC', 'GDDY', 'RMBS', 'PACB', 'CRDA.L', 'MGRC', 'DNOW']
Pausing to avoid overloading the API

Unnamed: 0_level_0,RS1.L,KE,TEG.DE,LEG.DE,SCS,HNI,AVT,ACCO,BRNK.DE,VNA.DE,...,DEQ.DE,KIDS,HALO,MATW,9842.T,KVHI,MOON.L,NEO,6055.T,UNP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1999-10-01,192.424942,,,,5.165832,9.774066,21.209171,,,,...,,,,9.178375,,3.06250,,,,8.588829
1999-11-01,218.152206,,,,5.319274,10.770150,21.354940,,,,...,,,,8.509521,,3.03125,,,,7.250434
1999-12-01,241.588638,,,,4.910100,10.974279,23.517170,,,,...,,,,10.065028,,3.06250,,,,6.730484
2000-01-01,246.585907,,,,4.700321,9.629849,20.923582,,,,...,,,,8.509521,,3.75000,,,,6.227546
2000-02-01,246.585907,,,,5.061888,9.223397,26.063131,,,,...,,,,8.870208,,7.37500,,,,5.879422
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-01,690.306335,22.920000,14.260000,78.867676,13.556043,46.408180,54.285213,4.940793,2.480,27.895920,...,18.555727,31.580000,44.290001,27.826420,1897.316406,5.18000,160.000000,13.71,1944.0,230.274567
2024-06-01,687.853210,21.980000,13.670000,76.260002,12.861370,44.735172,51.193146,4.629781,1.998,26.549999,...,19.906872,28.760000,52.360001,24.824409,1922.033936,4.65000,190.600006,13.87,2214.0,225.062576
2024-07-01,818.000000,23.700001,13.950000,80.860001,14.379726,54.602348,53.759998,5.033655,2.145,28.410000,...,21.843512,30.780001,55.259998,28.689289,1799.435059,4.45000,218.000000,17.73,1998.0,245.424240
2024-08-01,783.000000,18.450001,14.830000,87.059998,14.140000,53.509308,55.180000,5.398128,2.070,31.180000,...,20.672520,31.950001,63.849998,25.101889,1719.350342,4.44000,210.500000,16.52,1774.0,254.734711


In [98]:
len(df_monthly_adj_close.columns)

1658

In [96]:
df_monthly_adj_close.to_csv('../data/10_monthly_adjacent_close.csv')