# Import Libraries

In [1]:
import yfinance as yf
import pandas as pd
import yesg
from datetime import datetime
import numpy as np
from tqdm import trange
import investpy as ip
import yahooquery as yq


# Récupération des données

On cherche les tickers des entreprises Néerlandaises côtées en bourse. Ainsi, grâce au fichier csv nous pouvons obtenir les tickers des entreprises :

In [2]:
tickers = pd.read_csv("./datas/Euronext_Equities_2022-12-02.csv", sep=";")
tickers_amsterdam = tickers[tickers['Currency']=='EUR']['Symbol'].tolist()
for i in range(len(tickers_amsterdam)):
    tickers_amsterdam[i] = tickers_amsterdam[i] + ".AS"
print(f"Nous avons : {len(tickers_amsterdam)} actions")

Nous avons : 168 actions


## Récupération des prix

Désormais récupérons le prix de toutes les actions disponibles.

In [24]:
tickers = yf.Tickers(tickers_amsterdam)
datas = tickers.history(period='max')
datas.index = pd.to_datetime(datas.index)

[*********************100%***********************]  168 of 168 completed

26 Failed downloads:
- VAMT.AS: No data found, symbol may be delisted
- ONWD.AS: No data found, symbol may be delisted
- SGO.AS: No data found, symbol may be delisted
- RET.AS: No data found, symbol may be delisted
- EHCW.AS: No data found, symbol may be delisted
- ENTPT.AS: No data found, symbol may be delisted
- SPR1W.AS: No data found, symbol may be delisted
- DSC2S.AS: No data found, symbol may be delisted
- FAGR.AS: No data found, symbol may be delisted
- FLEW.AS: No data found, symbol may be delisted
- SPR1T.AS: No data found, symbol may be delisted
- AED.AS: No data found, symbol may be delisted
- BHNDW.AS: No data found, symbol may be delisted
- BHNDT.AS: No data found, symbol may be delisted
- VAMW.AS: No data found, symbol may be delisted
- ADUX.AS: No data found, symbol may be delisted
- EPICW.AS: No data found, symbol may be delisted
- HEGAW.AS: No data found, symbol may be delisted
- NAIW.AS: No data

Prenons uniquement le prix de fermeture ('Close').

In [25]:
datas_price = datas['Close']

In [26]:
date_from = pd.Timestamp('2010-01-01')
data_filter = datas_price.loc[date_from:]
data_filter.head()

Unnamed: 0_level_0,AALB.AS,ABN.AS,ACOMO.AS,AD.AS,ADUX.AS,ADYEN.AS,AED.AS,AF.AS,AGN.AS,AJAX.AS,...,VAMW.AS,VASTN.AS,VEON.AS,VLK.AS,VPK.AS,VTA.AS,VVY.AS,WDP.AS,WHA.AS,WKL.AS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,8.183498,,3.089914,6.128687,,,,10.09,2.645846,6.127661,...,,17.244045,,17.517265,20.687563,0.684595,,,21.835215,10.764404
2010-01-05,8.148955,,3.07868,6.077343,,,,10.09,2.639794,6.165899,...,,17.331297,,17.505106,20.774899,0.441419,,,22.011976,10.760955
2010-01-06,8.310164,,3.089914,6.111786,,,,10.09,2.630441,6.165899,...,,17.114998,,17.68259,20.745789,0.41763,,,21.822359,10.733355
2010-01-07,8.267944,,3.07587,5.977906,,,,10.09,2.683808,5.974709,...,,17.162251,,17.505106,20.967762,0.420273,,,21.565245,10.726455
2010-01-08,8.283298,,3.101151,6.039648,,,,10.09,2.766887,6.070304,...,,17.216782,,17.502676,20.825846,0.422916,,,21.533104,10.691954


Supprimons les colonnes avec un NaN à la fin, car elles ne sont plus échangées sur les marchés financiers. Ou bien lorsqu'elles sont échangées depuis trop peu de temps. Nous n'avons pas assez de recul sur ces actions.

In [27]:
last_date = data_filter.index.to_list()[-1]
first_date = data_filter.index.to_list()[0]
last_row_NaN = pd.Series.to_frame(data_filter.iloc[-1].isna())
first_row_NaN = pd.Series.to_frame(data_filter.iloc[0].isna())
missing_price_end = last_row_NaN.index[last_row_NaN[last_date]==True].to_list()
missing_price_begin = first_row_NaN.index[first_row_NaN[first_date]==True].to_list()
for elt in missing_price_end:
    if elt in missing_price_begin:
        missing_price_begin.remove(elt)
data_filter = data_filter.drop(missing_price_end, axis=1)
data_filter = data_filter.drop(missing_price_begin, axis=1)

Sauvegardons ce fichier et voici un apperçu du DataFrame que nous obtenons :

In [29]:
data_filter = data_filter.dropna(axis=1, how='all')
data_filter.to_csv('./datas/prices.csv')
data_filter

Unnamed: 0_level_0,AALB.AS,ACOMO.AS,AD.AS,AGN.AS,AJAX.AS,AKZA.AS,ALX.AS,AMG.AS,AMUND.AS,ARCAD.AS,...,TOM2.AS,TWEKA.AS,URW.AS,VALUE.AS,VASTN.AS,VLK.AS,VPK.AS,VTA.AS,WHA.AS,WKL.AS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,8.183498,3.089914,6.128687,2.645846,6.127661,29.324984,78.000000,8.194963,12.383609,10.344408,...,6.600390,9.511971,68.601768,3.624460,17.244045,17.517265,20.687563,0.684595,21.835215,10.764404
2010-01-05,8.148955,3.078680,6.077343,2.639794,6.165899,29.102234,75.000000,8.169353,12.383609,10.731913,...,6.921458,9.702948,68.513435,3.983886,17.331297,17.505106,20.774899,0.441419,22.011976,10.760955
2010-01-06,8.310164,3.089914,6.111786,2.630441,6.165899,29.177525,72.800003,8.080161,12.383609,10.791023,...,6.905454,9.746504,69.109772,4.757104,17.114998,17.682590,20.745789,0.417630,21.822359,10.733355
2010-01-07,8.267944,3.075870,5.977906,2.683808,5.974709,28.703789,73.000000,8.124315,12.383609,10.804158,...,7.026479,9.602434,68.380905,5.134652,17.162251,17.505106,20.967762,0.420273,21.565245,10.726455
2010-01-08,8.283298,3.101151,6.039648,2.766887,6.070304,28.499855,74.000000,8.106654,12.383609,10.731913,...,7.065488,9.629237,68.402977,5.436690,17.216782,17.502676,20.825846,0.422916,21.533104,10.691954
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-09,38.790001,19.799999,28.549999,4.690000,11.500000,65.139999,0.672000,36.759998,1.250000,38.419998,...,7.095000,37.700001,52.490002,5.780000,20.950001,22.900000,28.200001,4.900000,12.870000,104.849998
2022-12-12,38.639999,19.400000,28.174999,4.657000,11.350000,64.139999,0.690000,35.919998,1.250000,38.119999,...,6.955000,37.299999,50.799999,5.660000,20.250000,22.250000,28.370001,4.850000,12.620000,104.650002
2022-12-13,39.509998,19.400000,28.084999,4.715000,11.300000,65.180000,0.672000,36.520000,1.250000,38.360001,...,7.035000,38.240002,52.700001,5.700000,20.600000,22.650000,28.260000,4.980000,12.740000,104.099998
2022-12-14,39.459999,19.400000,27.735001,4.707000,11.400000,65.300003,0.672000,35.939999,1.250000,38.299999,...,6.860000,38.279999,53.090000,5.700000,21.200001,22.600000,27.969999,4.980000,12.730000,104.599998


## Récupération des scores ESG

Pour la construction de notre portefeuille d'actions nous avons besoin des scores ESG de toutes les entreprises disponibles.

In [30]:
tickers_price = data_filter.columns.to_list()

esg_scores = pd.DataFrame(columns = ['Ticker Yahoo', 'Environment Score', 'Social Score', 'Governance Score', 'Total Score'], index = range(len(tickers_price)))

for i in trange(len(tickers_price)):
    ticker = tickers_price[i]
    try:
        sus = yf.Ticker(ticker).sustainability
        scores = sus.loc[['environmentScore','socialScore','governanceScore','totalEsg'],'Value']
        esg_scores.loc[i] = [ticker, scores[0], scores[1], scores[2], scores[3]]
    except:
        esg_scores.loc[i] = [ticker, np.NaN, np.NaN, np.NaN, np.NaN]
        pass
esg_scores.head()

100%|██████████| 66/66 [12:06<00:00, 11.00s/it]


Unnamed: 0,Ticker Yahoo,Environment Score,Social Score,Governance Score,Total Score
0,AALB.AS,,,,
1,ACOMO.AS,,,,
2,AD.AS,6.82,9.63,4.35,20.8
3,AGN.AS,0.51,7.74,6.63,14.88
4,AJAX.AS,,,,


Sauvegardons ce fichier dans le dossier datas.

In [33]:
esg_scores.to_csv('./datas/esg_scores.csv')
esg_scores

Unnamed: 0,Ticker Yahoo,Environment Score,Social Score,Governance Score,Total Score
0,AALB.AS,,,,
1,ACOMO.AS,,,,
2,AD.AS,6.82,9.63,4.35,20.8
3,AGN.AS,0.51,7.74,6.63,14.88
4,AJAX.AS,,,,
...,...,...,...,...,...
61,VLK.AS,,,,
62,VPK.AS,10.56,8.74,3.75,23.05
63,VTA.AS,,,,
64,WHA.AS,,,,


Nous voyons bien qu'il manque énormément de score ESG, nous allons donc être obligés d'aller chercher à la main les scores restant.

 # Pre-processing

We have to follow few steps :

* Analyse the liquidity of all firms
    * Market capitalization
    * Average daily volume exchange
    * Free float part
* ESG filter
    * exclude x% of firms with the worts ESG score
    * keep firms with the best ESG momentum
    * take a specific KPI
* Financial analysis
    * Profit Margin
    * Return on assets

We can also analyse the correlation between our chosen stocks.

Then after that we have stocks we will use in our portfolio we need to find best weights. We will use two different methods :
* Mean variance method 
* Black litterman method