In [3]:
!pip install yahoo_fin



In [30]:
!pip install requests_html



**We found the below method of downloading Yahoo Finance information from the statistics page on the internet and it seems to work. Alternativly, one could go into each of the ticker codes of Yahoo Finance page**


In [5]:
import yahoo_fin.stock_info as si #Get data
import pandas as pd #Data manipulation
from tqdm import tqdm #Make a progress bar because that's cool...
import requests # to get data
from bs4 import BeautifulSoup # to parse external data
from requests_html import HTMLSession
import matplotlib.pyplot as plt

In [6]:
#Scraper of EUROSTOXX50 index
scrEURSTX = requests.get('https://en.wikipedia.org/wiki/EURO_STOXX_50')
soupEURSTX = BeautifulSoup(scrEURSTX.text, 'lxml')
tableEURSTX = soupEURSTX.find(text="Ticker").find_parent("table").find('tbody').findAll('tr')[1:]
EURSTX_companies = pd.DataFrame()
for row in tableEURSTX:
    ticker = row.findAll('td')[0].text.strip()
    title = row.findAll('td')[2].text.strip()
    industry = row.findAll('td')[5].text.strip()
    row_ = pd.Series({"Ticker":ticker, "Name": title, "Industry":industry})
    EURSTX_companies = pd.concat([EURSTX_companies, row_], axis=1, ignore_index = True)
    
EURSTX_companies = EURSTX_companies.T
EURSTX_ticker = EURSTX_companies.iloc[:,0]
EURSTX_companies
EURSTX_ticker

0       ADS.DE
1     ADYEN.AS
2        AD.AS
3        AI.PA
4       AIR.PA
5       ALV.DE
6       ABI.BR
7      ASML.AS
8        CS.PA
9       BAS.DE
10     BAYN.DE
11     BBVA.MC
12      SAN.MC
13      BMW.DE
14      BNP.PA
15      CRG.IR
16      DAI.DE
17       BN.PA
18      DB1.DE
19      DPW.DE
20      DTE.DE
21     ENEL.MI
22      ENI.MI
23       EL.PA
24     FLTR.IR
25      IBE.MC
26      ITX.MC
27      IFX.DE
28     INGA.AS
29      ISP.MI
30      KER.PA
31    KNEBV.HE
32       OR.PA
33      LIN.DE
34       MC.PA
35     MUV2.DE
36       RI.PA
37     PHIA.AS
38      PRX.AS
39      SAF.PA
40      SAN.PA
41      SAP.DE
42       SU.PA
43      SIE.DE
44     STLA.MI
45      TTE.PA
46      UMG.AS
47       DG.PA
48      VOW.DE
49      VNA.DE
Name: Ticker, dtype: object

In [7]:
companies = ["ADS.DE", "ADYEN.AS", "AD.AS", "AI.PA", "AIR.PA", "ALV.DE", "ABI.BR", "ASML.AS", "CS.PA", "BAS.DE", "BAYN.DE", "BBVA.MC", "SAN.MC", "BMW.DE", "BNP.PA", "CRG.IR", "DAI.DE", "BN.PA", "DB1.DE", "DPW.DE", "DTE.DE", "ENEL.MI", "ENI.MI", "CS.PA", "EL.PA", "FLTR.IR", "IBE.MC", "ITX.MC", "IFX.DE", "INGA.AS", "ISP.MI", "KER.PA", "KNEBV.HE", "OR.PA", "LIN.DE", "MC.PA", "MUV2.DE", "RI.PA", "PHIA.AS", "PRX.AS", "SAF.PA", "SAN.PA", "SAP.DE", "SU.PA", "SIE.DE", "STLA.MI", "TTE.PA", "UMG.AS", "DG.PA", "VOW.DE", "VNA.DE"]

In [8]:
ticker_stats = {}
for ticker in companies:
    temp = si.get_stats_valuation(ticker)
    temp = temp.iloc[:,:2]
    #temp.iloc[:,:2] creates to columns
    temp.columns = ["Attribute", "Recent"]
    #We want the name of the data and then the data itself
 
    ticker_stats[ticker] = temp
    #Where to get


# combine all the stats valuation tables into a single data frame
df1 = pd.concat(ticker_stats)
#Adding the data together by concatenating.
df1 = df1.reset_index()
#Adding a sequantial index and using the Ticker as the index 

del df1["level_1"]
#dropping level 1 of the multi-index

# update column names
df1.columns = ["Ticker", "Attribute", "Recent"]

# Same, but for more data
ticker_extra_stats = {}
for ticker in tqdm(companies):
    ticker_extra_stats[ticker] = si.get_stats(ticker)
    

df2 = pd.concat(ticker_extra_stats)

df2 = df2.reset_index()

del df2["level_1"]

df2.columns = ["Ticker", "Attribute", "Value"]

100%|██████████| 51/51 [00:47<00:00,  1.07it/s]


In [9]:
print(df1)

     Ticker                  Attribute  Recent
0    ADS.DE      Market Cap (intraday)  46.54B
1    ADS.DE           Enterprise Value  47.38B
2    ADS.DE               Trailing P/E   32.69
3    ADS.DE                Forward P/E   24.94
4    ADS.DE  PEG Ratio (5 yr expected)    0.59
..      ...                        ...     ...
445  VNA.DE  PEG Ratio (5 yr expected)     NaN
446  VNA.DE          Price/Sales (ttm)    9.45
447  VNA.DE           Price/Book (mrq)    1.41
448  VNA.DE   Enterprise Value/Revenue   11.12
449  VNA.DE    Enterprise Value/EBITDA    4.13

[450 rows x 3 columns]


In [10]:
print(df2)

      Ticker                     Attribute    Value
0     ADS.DE             Beta (5Y Monthly)     0.87
1     ADS.DE              52-Week Change 3  -13.02%
2     ADS.DE       S&P500 52-Week Change 3   14.94%
3     ADS.DE                52 Week High 3   336.25
4     ADS.DE                 52 Week Low 3   231.55
...      ...                           ...      ...
2545  VNA.DE       Total Debt/Equity (mrq)   162.57
2546  VNA.DE           Current Ratio (mrq)     0.39
2547  VNA.DE    Book Value Per Share (mrq)    47.85
2548  VNA.DE     Operating Cash Flow (ttm)    1.44B
2549  VNA.DE  Levered Free Cash Flow (ttm)    8.66B

[2550 rows x 3 columns]


**Now we simply need to format the data into a format which is usable in relation to the graph we wish to display:**

At the moment, we have the data in very few columns and many rows. Essentially, every ticker is repeated for every attribute. We now wish to change this such that the ticker is displayed once in a row and all the attributed on a column name. 

In [11]:
# Convert from long dataframe to wide dataframe using the pivot() method.
df1_wide = df1.pivot(index = "Ticker", columns="Attribute", values="Recent")
df2_wide = df2.pivot(index = "Ticker", columns="Attribute", values="Value")





In [12]:
print(df1_wide)

Attribute Enterprise Value Enterprise Value/EBITDA Enterprise Value/Revenue  \
Ticker                                                                        
ABI.BR                 NaN                     NaN                      NaN   
AD.AS                  NaN                     NaN                      NaN   
ADS.DE              47.38B                   14.17                     2.09   
ADYEN.AS            46.92B                   94.14                    10.12   
AI.PA               84.75B                   14.32                     4.02   
AIR.PA              84.00B                   10.68                     1.53   
ALV.DE             106.40B                     NaN                     0.94   
ASML.AS            239.76B                   34.22                    12.88   
BAS.DE              78.66B                    6.87                     1.05   
BAYN.DE             81.70B                   11.26                     1.90   
BBVA.MC                NaN                     NaN  

In [13]:
print(df2_wide)

Attribute % Held by Insiders 1 % Held by Institutions 1  \
Ticker                                                    
ABI.BR                  48.74%                   14.19%   
AD.AS                    0.05%                   52.47%   
ADS.DE                  10.29%                   53.19%   
ADYEN.AS                23.65%                   62.53%   
AI.PA                    2.61%                   29.60%   
AIR.PA                  25.84%                   31.47%   
ALV.DE                   0.00%                   39.94%   
ASML.AS                  0.01%                   57.08%   
BAS.DE                   0.00%                   28.81%   
BAYN.DE                  0.00%                   38.21%   
BBVA.MC                  0.02%                   34.30%   
BMW.DE                  46.74%                   21.57%   
BN.PA                    2.65%                   55.72%   
BNP.PA                  13.20%                   51.26%   
CRG.IR                   0.02%                   52.93% 

The next problem we encounter is that there are a bunch of footnotes in the attribute descriptions. The reason for this is that on the Yahoo Finance website, the attributes may / may not have some additional information attached to them. 

However, since we are aiming to create a scheduled graph, we are not really interested in these additional footnotes. 

Next step would be to eliminate these footnote numbers. 
In order to do that we need to rename all the colums affeted and subsequently change them from strings into floats




In [14]:
#df1_wide.rename(columns = {"NAME OF CURRENT ATTRIBUTE W/ FOOTNOTE":"NEW NAME"}, inplace = True)

###.astype 

df1_wide.columns

Index(['Enterprise Value', 'Enterprise Value/EBITDA',
       'Enterprise Value/Revenue', 'Forward P/E', 'Market Cap (intraday)',
       'PEG Ratio (5 yr expected)', 'Price/Book (mrq)', 'Price/Sales (ttm)',
       'Trailing P/E'],
      dtype='object', name='Attribute')