In [57]:
import numpy as np
import pandas as pd
import sys
import datetime as dt
from time import mktime
import requests
from bs4 import BeautifulSoup
from typing import List
import re
import io

In [194]:
lista_indices = ["%5EIBEX", "%5EBFX","%5EBVSP", "%5EDJI", "%5EFCHI", "%5EFTSE", "%5EGDAXI", "%5EHSI", "%5EIBEX", 
                "%5EMXX", "%5EJKSE", "%5EMERV", "%5EOMXSPI", "%5EOSEAX", "%5ESSMI", "%5ESTI"]

In [45]:
def convert_to_unix(date):
    """
    converts date to unix timestamp
    
    parameters: date - in format (dd-mm-yyyy)
    
    returns integer unix timestamp
    """
    datum = dt.datetime.strptime(date, '%d-%m-%Y')
    
    return int(mktime(datum.timetuple()))


In [49]:
def get_crumbs_and_cookies(ticker: str):
    # Thanks to MAIK ROSENHEINRICH
    """
    get crumb and cookies for historical data csv download from yahoo finance  
    parameters: stock - short-handle identifier of the company    
    returns a tuple of header, crumb and cookie
    """   
    url = 'https://finance.yahoo.com/quote/{}/history'.format(ticker)
    
    with requests.session():
        header = {'Connection': 'keep-alive',
                   'Expires': '-1',
                   'Upgrade-Insecure-Requests': '1',
                   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) \
                   AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
                   }        
        website = requests.get(url, headers=header)
        soup = BeautifulSoup(website.text, 'lxml')
        
        crumb = re.findall('"CrumbStore":{"crumb":"(.+?)"}', str(soup))
        output=(header, crumb[0], website.cookies)
        return output   


In [47]:
day_begin_unix = convert_to_unix("01-12-2018")
day_end_unix = convert_to_unix("21-09-2020")

In [50]:
header, crumb, cookies = get_crumbs_and_cookies(lista_indices[1])

In [70]:
with requests.session():
        
            url = 'https://query1.finance.yahoo.com/v7/finance/download/' \
                '{stock}?period1={day_begin}&period2={day_end}&interval={interval}&events=history&crumb={crumb}' \
                .format(stock=lista_indices[1], day_begin=day_begin_unix, day_end=day_end_unix, interval='1d', crumb=crumb)
                
            website = requests.get(url, headers=header, cookies=cookies)

In [93]:
def get_historical_data(tickers: List[str], day_begin: str, day_end: str, interval='1d'):

    historical_prices = None
    df_create = False


    for ticker in tickers:
        error1='404 Not Found: Timestamp data missing.' 
    
        day_begin_unix = convert_to_unix(day_begin)
        day_end_unix = convert_to_unix(day_end)   
    
        header, crumb, cookies = get_crumbs_and_cookies(ticker)
    
        with requests.session():
        
            url = 'https://query1.finance.yahoo.com/v7/finance/download/' \
                '{stock}?period1={day_begin}&period2={day_end}&interval={interval}&events=history&crumb={crumb}' \
                .format(stock=ticker, day_begin=day_begin_unix, day_end=day_end_unix, interval=interval, crumb=crumb)
                
            website = requests.get(url, headers=header, cookies=cookies)
            if website.status_code == 200:
                if not df_create:
                    historical_prices = pd.read_csv(io.StringIO(website.text))
                    historical_prices["ticker"] = ticker
                    df_create = True
                else:
                    temp_df = pd.read_csv(io.StringIO(website.text))
                    temp_df["ticker"] = ticker
                    historical_prices = historical_prices.append(temp_df, ignore_index=True)
                
                print("Obteniendo histórico para {}".format(ticker))
                print("longitud del df: {}".format(len(historical_prices.Date)))
            
            else:
                print("Error downloading data from ticker {}".format(ticker))
                print("Response was {}".format(website.text))


        

          # make request.
          # read df.
          # save each df into a single csv.

    return historical_prices 

In [146]:
def get_currency(tickers: List[str]):
    
    indice_divisa = []

    for ticker in tickers:

        header, crumb, cookies = get_crumbs_and_cookies(ticker)
            
        with requests.session():
                
            url = "https://es.finance.yahoo.com/quote/"+ticker+"/components/"            
            website = requests.get(url, headers=header, cookies=cookies)

        soup = BeautifulSoup(website.text)
        divisa = re.findall('Divisa en [a-zA-Z]{3}', str(soup))
        divisa = divisa[0][len(divisa[0])-3:]
        indice_divisa.append((ticker, divisa))

    return indice_divisa




In [201]:
def get_components(tickers=List[str]):

    componentes = {}

    for ticker in tickers:

        header, crumb, cookies = get_crumbs_and_cookies(ticker)
                    
        with requests.session():
                        
            intentos = 3
            while intentos > 0:
                url = "https://es.finance.yahoo.com/quote/"+ticker+"/components/"            
                website = requests.get(url, headers=header, cookies=cookies)

                print("Obteniendo componentes de {}".format(ticker))
                if website.status_code == 200:

                    soup = BeautifulSoup(website.text)
                    try:
                        df = pd.read_html(str(soup))[0]
                        df = df.iloc[:,[0,1]]
                        componentes[ticker] = df.to_dict()

                    except ValueError as ve:
                        print(ve)
                        print(url)
                        next
                    intentos = 0
                else:
                    print("Error al conectar con yahoo, intentos restantes: {}".format(intentos))
                    intentos -= 1
                    if intentos == 0:
                        print("No se ha conseguido descargar información para el ticker: {}".format(ticker))


        
    return componentes



In [170]:
df = pd.read_html(str(soup))[0]

In [202]:
c = get_components(lista_indices)

Obteniendo componentes de %5EIBEX
Obteniendo componentes de %5EBFX
Obteniendo componentes de %5EBVSP
Obteniendo componentes de %5EDJI
No tables found
https://es.finance.yahoo.com/quote/%5EDJI/components/
Obteniendo componentes de %5EFCHI
Obteniendo componentes de %5EFTSE
Obteniendo componentes de %5EGDAXI
Obteniendo componentes de %5EHSI
Obteniendo componentes de %5EIBEX
Obteniendo componentes de %5EMXX
Obteniendo componentes de %5EJKSE
Obteniendo componentes de %5EMERV
Obteniendo componentes de %5EOMXSPI
Obteniendo componentes de %5EOSEAX
Obteniendo componentes de %5ESSMI
Obteniendo componentes de %5ESTI


In [205]:
c.keys()

dict_keys(['%5EIBEX', '%5EBFX', '%5EBVSP', '%5EDJI', '%5EFCHI', '%5EFTSE', '%5EGDAXI', '%5EHSI', '%5EMXX', '%5EJKSE', '%5EMERV', '%5EOMXSPI', '%5EOSEAX', '%5ESSMI', '%5ESTI'])

In [None]:
df = get_historical_data(lista_indices, "01-01-2018", "01-01-2020", interval='1d')
divisas = get_currency(lista_indices)