# ParseTickers Module

## Load get_wikitable_from_url

In [343]:
import re
import os
import bs4 as bs
import pandas as pd
import requests
from datetime import date
from datetime import datetime

In [344]:
wiki_metadata = [{'market':'IBEX35','url':'https://es.wikipedia.org/wiki/IBEX_35','pos_table':{'ticker':0, 'company':1,'sector':4,'entry_date':3,'ISIN':5}},
                {'market':'DAX30','url':'https://de.wikipedia.org/wiki/DAX','pos_table':{'ticker':1, 'company':0,'sector':2,'entry_date':5}},
                {'market':'CAC40','url':'https://es.wikipedia.org/wiki/CAC_40','pos_table':{'ticker':2,'company':0, 'sector':1}},
                {'market':'SP500','url':'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies','pos_table':{'ticker':0, 'company':1,'sector':3,'sub_industry':4,'entry_date':6,'CIK':7}}
                ]

In [345]:
wiki_metadata[0]

{'market': 'IBEX35',
 'url': 'https://es.wikipedia.org/wiki/IBEX_35',
 'pos_table': {'ticker': 0,
  'company': 1,
  'sector': 4,
  'entry_date': 3,
  'ISIN': 5}}

In [358]:
def get_wikitable_from_url(wiki_metadata) -> dict:
    resp = requests.get(wiki_metadata['url'])
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    wikitable_data = []
    for row in table.findAll('tr')[1:]:
        wikitable_dict = {}
        for key in wiki_metadata['pos_table']:
            data = row.findAll('td')[wiki_metadata['pos_table'][key]].text
            wikitable_dict[key] = data.strip()
        wikitable_dict['market'] = wiki_metadata['market']
        wikitable_dict['active_type'] = 'stock'
        wikitable_data.append(wikitable_dict)
    wikitable_data.append({'ticker':wiki_metadata['market'] ,'market':wiki_metadata['market'], 'active_type':'ETF'})
    return wikitable_data

In [359]:
raw_ticker_list = []
print('Start raw ticker data extraction from Wikipedia ')
for i,metadata in enumerate(wiki_metadata):
    print('Getting data from market:{}, site:{}'.format(metadata['market'], metadata['url']))
    wikitable_data = get_wikitable_from_url(metadata)
    raw_ticker_list += wikitable_data
print('Extraction completed')

Start raw ticker data extraction from Wikipedia 
Getting data from market:IBEX35, site:https://es.wikipedia.org/wiki/IBEX_35
Getting data from market:DAX30, site:https://de.wikipedia.org/wiki/DAX
Getting data from market:CAC40, site:https://es.wikipedia.org/wiki/CAC_40
Getting data from market:SP500, site:http://en.wikipedia.org/wiki/List_of_S%26P_500_companies
Extraction completed


In [360]:
raw_ticker_list

[{'ticker': 'ANA',
  'company': 'Acciona',
  'sector': 'Construcción',
  'entry_date': '2015',
  'ISIN': 'ES0125220311',
  'market': 'IBEX35',
  'active_type': 'stock'},
 {'ticker': 'ACX',
  'company': 'Acerinox',
  'sector': 'Mineral, metales y transformación',
  'entry_date': '2015',
  'ISIN': 'ES0132105018',
  'market': 'IBEX35',
  'active_type': 'stock'},
 {'ticker': 'ACS',
  'company': 'Grupo ACS',
  'sector': 'Construcción',
  'entry_date': '1998',
  'ISIN': 'ES0167050915',
  'market': 'IBEX35',
  'active_type': 'stock'},
 {'ticker': 'AENA',
  'company': 'Aena',
  'sector': 'Transporte y distribución',
  'entry_date': '2015',
  'ISIN': 'ES0105046009',
  'market': 'IBEX35',
  'active_type': 'stock'},
 {'ticker': 'ALM',
  'company': 'Almirall',
  'sector': 'Productos farmacéuticos y biotecnología',
  'entry_date': '2020',
  'ISIN': 'ES0157097017',
  'market': 'IBEX35',
  'active_type': 'stock'},
 {'ticker': 'AMS',
  'company': 'Amadeus IT Group',
  'sector': 'Electrónica y software

## Parse_tickers minimum code


- BaseParser(): 
- WikiToYahooFinanceParser()
- get_parsed_tickers(): método de la clase TickerManager_Wiki. Es la api para parse_Tickers.


In [410]:
class BaseParser():
    
    def __init__(self, source, target):
        self.__source = source
        self.__target = target
        self.__exceptions ={('Wiki','YahooFinance'):{'IBEX35': '^IBEX','CAC40': '^FCHI','DAX30': '^GDAXI', 'SP500':'^GSPC'},
                            ('Wiki','AlphaVantage'):{}}
        
    
    def get_exceptions(self):
        return self.__exceptions
    
    def set_source(self, source):
        print("Set ticker source in parser")
        self.__source = source
    
    def get_source(self):
        return self.__source
    
    def set_target(self, target):
        print("Set ticker target in parser")
        self.__target = target
    
    def get_target(self):
        return self.__target
       
    def parse(self, name, prefix, suffix, excedent_string):
        key_tuple = (self.__source,self.__target) # Tuple key to select Source-Target exceptions
        
        exception_dict = self.__exceptions.get(key_tuple, {}) #FIX inform not properly loaded. Mirar longitud del dict.

        if name in exception_dict:
            return exception_dict.get(name, name) 
        
        name = name.replace(excedent_string,'')
        return prefix + name + suffix

In [419]:
class WikiToYahooFinance_Parser(BaseParser):
    
    def __init__(self):
        super().__init__(source='Wiki', target='YahooFinance')
        
        self.__parser_keys = {'IBEX35':{'maket':'IBEX35', 'prefix':'','suffix':'.MC','excedent_string':''},
              'CAC40':{'maket':'CAC40','prefix':'','suffix':'.PA','excedent_string':'Euronext: '},
              'DAX30':{'maket':'DAX30','prefix':'','suffix':'.DE', 'excedent_string':''},
              'SP500':{'market':'SP500','prefix':'','suffix':'', 'excedent_string':''}}
    
    def parse_markets(self, raw_ticker): # Feeds tickers for Yahoo finance downloader based on market. Uses parse from BaseParser
        if raw_ticker['market'] == 'IBEX35': #Fix: Repensar esto 
            return super().parse(raw_ticker['ticker'],
                                 self.__parser_keys['IBEX35']['prefix'], 
                                 self.__parser_keys['IBEX35']['suffix'],
                                 self.__parser_keys['IBEX35']['excedent_string'])
        if raw_ticker['market'] == 'CAC40': 
            return super().parse(raw_ticker['ticker'], 
                                 self.__parser_keys['CAC40']['prefix'], 
                                 self.__parser_keys['CAC40']['suffix'],
                                 self.__parser_keys['CAC40']['excedent_string'])
        if raw_ticker['market'] == 'DAX30':
            return  super().parse(raw_ticker['ticker'], 
                                  self.__parser_keys['DAX30']['prefix'], 
                                  self.__parser_keys['DAX30']['suffix'], 
                                  self.__parser_keys['DAX30']['excedent_string'])
        if raw_ticker['market'] == 'SP500':
            return  super().parse(raw_ticker['ticker'], 
                                  self.__parser_keys['SP500']['prefix'], 
                                  self.__parser_keys['SP500']['suffix'],
                                  self.__parser_keys['SP500']['excedent_string'])
        else:
            return raw_ticker['ticker']
        
        
    def feeder_ticker(self,feed_ticker): # Appends feeds to original dictionary, under 'feeds' label. Uses parse_market.
        feed_ticker['feeds'] = {'name':self.get_target(),'ticker':self.parse_markets(feed_ticker)}
        return feed_ticker
    
    
    def parse_all(self, raw_ticker_list): # Loop of feeder_ticker to feed all the tickers in raw_ticker_list
        feed_ticker_list = []
        for raw_ticker in raw_ticker_list:
            feed_ticker_list.append(self.feeder_ticker(raw_ticker))
        print('All tickers fed')
        return feed_ticker_list

In [417]:
def get_parsed_tickers(): # Función con la que hay que engancahr
        # Actualiza los par-value del diccionario: "tickerYahoofinance":"value", "tickerAlphaVantage":"value"
        print('Activation: get_parsed_tickers')
        my_parser_W2YF = WikiToYahooFinance_Parser()
        feeder_ticker_list = my_parser_W2YF.parse_all(raw_ticker_list)
        return feeder_ticker_list

In [418]:
get_parsed_tickers()

Activation: get_parsed_tickers
All tickers fed


[{'ticker': 'ANA',
  'company': 'Acciona',
  'sector': 'Construcción',
  'entry_date': '2015',
  'ISIN': 'ES0125220311',
  'market': 'IBEX35',
  'active_type': 'stock',
  'feeds': {'name': 'YahooFinance', 'ticker': 'ANA.MC'}},
 {'ticker': 'ACX',
  'company': 'Acerinox',
  'sector': 'Mineral, metales y transformación',
  'entry_date': '2015',
  'ISIN': 'ES0132105018',
  'market': 'IBEX35',
  'active_type': 'stock',
  'feeds': {'name': 'YahooFinance', 'ticker': 'ACX.MC'}},
 {'ticker': 'ACS',
  'company': 'Grupo ACS',
  'sector': 'Construcción',
  'entry_date': '1998',
  'ISIN': 'ES0167050915',
  'market': 'IBEX35',
  'active_type': 'stock',
  'feeds': {'name': 'YahooFinance', 'ticker': 'ACS.MC'}},
 {'ticker': 'AENA',
  'company': 'Aena',
  'sector': 'Transporte y distribución',
  'entry_date': '2015',
  'ISIN': 'ES0105046009',
  'market': 'IBEX35',
  'active_type': 'stock',
  'feeds': {'name': 'YahooFinance', 'ticker': 'AENA.MC'}},
 {'ticker': 'ALM',
  'company': 'Almirall',
  'sector': 

### Parse_tickers: Development layer

In [398]:
def parse_string_manipulation(name, prefix, suffix):
    print(name)
    name = name.replace('Euronext ','')
    return prefix + name + suffix

In [399]:
parse_string_manipulation('Euronext IBEX','','.MC')

Euronext IBEX


'IBEX.MC'

In [293]:
baseParser = BaseParser('Wiki', 'YahooFinance')
name = 'CAC40_'
ticker = baseParser.parse(name, '', '.MC')
ticker
#feeder_ticker_list = my_parser_W2YF.parse_all(raw_ticker_list)

'CAC40_.MC'

In [294]:
dict_3 ={('Wiki','YahooFinance'):{'IBEX_1': '^IBEX35_1','CAC40': '^GSPC','DAX30': '^GDAXI'},
        ('Wiki','AlphaVantage'):{'IBEX_2': '^IBEX35_2','CAC40': '^GSPC','DAX30': '^GDAXI'},
        ('Wiki','Root'):{'IBEX_3': '^IBEX35_3','CAC40': '^GSPC','DAX30': '^GDAXI'}}

In [295]:
target = 'Wiki'
source = 'YahooFinance'
#source = 'AlphaVantage'
#source = 'no'
name = 'IBEX_1'
print(name)

key_tuple = (target, source)
try:
    exception_dict = dict_3[key_tuple]
    #print(exception_dict)
except:
    exception_dict = {}
    print('!No exceptions loaded! Invalid source-target tuple key')
    print('Review possible data not loaded properly')

if name in exception_dict:
    name = exception_dict.get(name, name)

name

IBEX_1


'^IBEX35_1'

In [124]:
parser_keys = {'IBEX35':{'maket':'IBEX35', 'prefix':'','suffix':'.MC'},
              'CAC40':{'maket':'CAC40','prefix':'','suffix':'.PA'},
              'DAX30':{'maket':'DAX30','prefix':'','suffix':'.DE'},
              'SP500':{'market':'SP500','prefix':'','suffix':''}}

parser_keys

{'IBEX35': {'maket': 'IBEX35', 'prefix': '', 'suffix': '.MC'},
 'CAC40': {'maket': 'CAC40', 'prefix': '', 'suffix': '.PA'},
 'DAX30': {'maket': 'DAX30', 'prefix': '', 'suffix': '.DE'},
 'SP500': {'market': 'SP500', 'prefix': '', 'suffix': ''}}

In [125]:
parser_keys['IBEX35']['prefix']

''

In [7]:
raw_ticker_list[0]

{'ticker': 'ANA',
 'company': 'Acciona',
 'sector': 'Construcción',
 'entry_date': '2015',
 'ISIN': 'ES0125220311',
 'market': 'IBEX35',
 'active_type': 'stock'}

In [8]:
raw_ticker_list[0]['market'], raw_ticker_list[0]['ticker'] 

('IBEX35', 'ANA')

In [9]:
enriched_ticker_list = []
raw_ticker_list[0]
raw_ticker_list[0]["Yahoo_API"] = raw_ticker_list[0]['ticker'] + ".MC"
raw_ticker_list[0]

{'ticker': 'ANA',
 'company': 'Acciona',
 'sector': 'Construcción',
 'entry_date': '2015',
 'ISIN': 'ES0125220311',
 'market': 'IBEX35',
 'active_type': 'stock',
 'Yahoo_API': 'ANA.MC'}

In [10]:
enriched_ticker_list.append(raw_ticker_list[0])
enriched_ticker_list

[{'ticker': 'ANA',
  'company': 'Acciona',
  'sector': 'Construcción',
  'entry_date': '2015',
  'ISIN': 'ES0125220311',
  'market': 'IBEX35',
  'active_type': 'stock',
  'Yahoo_API': 'ANA.MC'}]

In [36]:
def parse_tickers(raw_ticker_list):
    enriched_ticker_list = []
    for i, ticker_raw_metadata in enumerate(raw_ticker_list):
        if ticker_raw_metadata['market'] == 'IBEX35':
            #enriched['market'] = wiki_metadata['market']
            print("Enriched data for IBEX35")
        else:
            print("This ticker:", raw_ticker_list[0]['ticker'], "is not clasified and enriched" )
            
            
        #print(i, ticker_raw_metadata)

In [37]:
#parse_tickers(raw_ticker_list)

In [38]:
class BaseParser():
    
    def __init__(self, source = '', target =''):
        self.__source = ''
        self.__target = ''
        self.__exceptions = {
            'exception_1': 'fixed_1',
            'exception_2': 'fixed_2',
            'exception_3': 'fixed_3',
            
        }
    
    def parse(self, name, prefix, suffix):
        name = self.__exceptions.get(name, name) #Substitute ticker exception for requested ticker
        name = prefix + name + suffix
        #print(prefix, name, suffix)
        return name
            

class WikiToYahooFinanceParser(BaseParser):
    
    def __init__(self):
        super().__init__(source='Wiki', target='YahooFinance')
        
        self.__parser_keys = [{'market':'IBEX35', 'prefix':'', 'suffix':'.MC' }]
        
    def parse(self, raw_ticker):
        if raw_ticker['market'] == 'IBEX35':
            feed = super().parse(raw_ticker['ticker'], self.__parser_keys[0]['prefix'], self.__parser_keys[0]['suffix'])
            return feed
        else:
            return raw_ticker['ticker']
        
    def feeder_ticker(self,feed_ticker):
        feed_ticker['feeds'] = {'name':'CAMBIAR','ticker':self.parse(feed_ticker)}
        return feed_ticker
    
    def parse_all(self, raw_ticker_list):
        feed_ticker_list = []
        for raw_ticker in raw_ticker_list:
            feed_ticker_list.append(self.feeder_ticker(raw_ticker))
        return feed_ticker_list

In [39]:
my_parser = BaseParser()
x = my_parser.parse('ELTON', '', '')

In [40]:
print(x)

ELTON


In [41]:
exception_1 = {'ticker': 'exception_1',
 'company': 'Acciona',
 'sector': 'Construcción',
 'entry_date': '2015',
 'ISIN': 'ES0125220311',
 'market': 'IBEX35',
 'active_type': 'stock'}

In [42]:
raw_ticker_list[0]

{'ticker': 'ANA',
 'company': 'Acciona',
 'sector': 'Construcción',
 'entry_date': '2015',
 'ISIN': 'ES0125220311',
 'market': 'IBEX35',
 'active_type': 'stock',
 'Yahoo_API': 'ANA.MC',
 'feeds': {'name': 'CAMBIAR', 'ticker': 'ANA.MC'}}

In [43]:
my_parser_W2YF = WikiToYahooFinanceParser()
x = my_parser_W2YF.parse(raw_ticker_list[0])
print(x)
x = my_parser_W2YF.parse(exception_1)
print(x)


ANA.MC
fixed_1.MC


In [44]:
my_list = raw_ticker_list[0]
print(my_list)
x = my_parser_W2YF.feeder_ticker(my_list)
x


{'ticker': 'ANA', 'company': 'Acciona', 'sector': 'Construcción', 'entry_date': '2015', 'ISIN': 'ES0125220311', 'market': 'IBEX35', 'active_type': 'stock', 'Yahoo_API': 'ANA.MC', 'feeds': {'name': 'CAMBIAR', 'ticker': 'ANA.MC'}}


{'ticker': 'ANA',
 'company': 'Acciona',
 'sector': 'Construcción',
 'entry_date': '2015',
 'ISIN': 'ES0125220311',
 'market': 'IBEX35',
 'active_type': 'stock',
 'Yahoo_API': 'ANA.MC',
 'feeds': {'name': 'CAMBIAR', 'ticker': 'ANA.MC'}}

In [45]:
my_parser_W2YF = WikiToYahooFinanceParser()
feeder_ticker_list = my_parser_W2YF.parse_all(raw_ticker_list)


In [46]:
print(feeder_ticker_list)

[{'ticker': 'ANA', 'company': 'Acciona', 'sector': 'Construcción', 'entry_date': '2015', 'ISIN': 'ES0125220311', 'market': 'IBEX35', 'active_type': 'stock', 'Yahoo_API': 'ANA.MC', 'feeds': {'name': 'CAMBIAR', 'ticker': 'ANA.MC'}}, {'ticker': 'ACX', 'company': 'Acerinox', 'sector': 'Mineral, metales y transformación', 'entry_date': '2015', 'ISIN': 'ES0132105018', 'market': 'IBEX35', 'active_type': 'stock', 'feeds': {'name': 'CAMBIAR', 'ticker': 'ACX.MC'}}, {'ticker': 'ACS', 'company': 'Grupo ACS', 'sector': 'Construcción', 'entry_date': '1998', 'ISIN': 'ES0167050915', 'market': 'IBEX35', 'active_type': 'stock', 'feeds': {'name': 'CAMBIAR', 'ticker': 'ACS.MC'}}, {'ticker': 'AENA', 'company': 'Aena', 'sector': 'Transporte y distribución', 'entry_date': '2015', 'ISIN': 'ES0105046009', 'market': 'IBEX35', 'active_type': 'stock', 'feeds': {'name': 'CAMBIAR', 'ticker': 'AENA.MC'}}, {'ticker': 'ALM', 'company': 'Almirall', 'sector': 'Productos farmacéuticos y biotecnología', 'entry_date': '202

In [47]:
def feeder_ticker(feed_ticker):
    feed_ticker['feeds'] = {'name':'AlphaVantage','ticker':my_parser_W2YF.parse(feed_ticker)}
    return feed_ticker

In [48]:
feeder_ticker(raw_ticker_list[14])

{'ticker': 'CIE',
 'company': 'CIE Automotive',
 'sector': 'Mineral, metales y transformación',
 'entry_date': '2018',
 'ISIN': 'ES0105630315',
 'market': 'IBEX35',
 'active_type': 'stock',
 'feeds': {'name': 'AlphaVantage', 'ticker': 'CIE.MC'}}

In [49]:
feed_ticker_list = []
for raw_ticker in raw_ticker_list[0:10]:
    feed_ticker_list.append(feeder_ticker(raw_ticker))

In [50]:
feed_ticker_list

[{'ticker': 'ANA',
  'company': 'Acciona',
  'sector': 'Construcción',
  'entry_date': '2015',
  'ISIN': 'ES0125220311',
  'market': 'IBEX35',
  'active_type': 'stock',
  'Yahoo_API': 'ANA.MC',
  'feeds': {'name': 'AlphaVantage', 'ticker': 'ANA.MC'}},
 {'ticker': 'ACX',
  'company': 'Acerinox',
  'sector': 'Mineral, metales y transformación',
  'entry_date': '2015',
  'ISIN': 'ES0132105018',
  'market': 'IBEX35',
  'active_type': 'stock',
  'feeds': {'name': 'AlphaVantage', 'ticker': 'ACX.MC'}},
 {'ticker': 'ACS',
  'company': 'Grupo ACS',
  'sector': 'Construcción',
  'entry_date': '1998',
  'ISIN': 'ES0167050915',
  'market': 'IBEX35',
  'active_type': 'stock',
  'feeds': {'name': 'AlphaVantage', 'ticker': 'ACS.MC'}},
 {'ticker': 'AENA',
  'company': 'Aena',
  'sector': 'Transporte y distribución',
  'entry_date': '2015',
  'ISIN': 'ES0105046009',
  'market': 'IBEX35',
  'active_type': 'stock',
  'feeds': {'name': 'AlphaVantage', 'ticker': 'AENA.MC'}},
 {'ticker': 'ALM',
  'company':