In [None]:
##  import libraries

import pandas as pd
import numpy as np
from yahooquery import Ticker
import yfinance as yf
import datetime as dt

In [None]:
##  get tickers from file 

raw_data = pd.read_excel('tickers.xlsx') #  <-- loading file with tickers
# raw_data = pd.read_excel('All Public comps with tickers.xlsx') #  <-- loading file with tickers
tickers = raw_data['Public - Ticker (Yahoo)'][:100] #  <-- getting tickers column from file 

In [None]:
##  needed information

summary_detail_keys = ['previousClose', 'marketCap', 'currency']
financial_data_keys = ['totalRevenue', 'totalCash', 'totalDebt', 'ebitda', 'freeCashflow', 'operatingCashflow',
                  'grossProfits', 'revenueGrowth']
key_stats_keys = ['enterpriseValue', 'sharesOutstanding', 'floatShares', 'lastFiscalYearEnd', 'profitMargins']

keys = summary_detail_keys + financial_data_keys + key_stats_keys + ['growthFromFeb']

In [None]:
## parsing
    
def to_dict(x):
    if isinstance(x, dict):
        return x
    else:
        return dict()
    
    
def parse(tickers):
    counter = 0
    empty_row = pd.DataFrame({ key: [np.nan] for key in keys })
    result = pd.DataFrame()
    prices = []
    for ticker in tickers:
        
        if ticker is np.nan:
            result = result.append(empty_row, ignore_index = True)
            continue
            
        try:
            ticker_info = Ticker(ticker)
        except TypeError:
            result = result.append(empty_row, ignore_index = True)
            continue
            
        summary_detail = to_dict(ticker_info.summary_detail.get(ticker))
        financial_data = to_dict(ticker_info.financial_data.get(ticker))
        key_stats = to_dict(ticker_info.key_stats.get(ticker))
                    
        row = dict()
        row.update({ key: summary_detail.get(key) for key in summary_detail_keys })
        row.update({ key: financial_data.get(key) for key in financial_data_keys })
        row.update({ key: key_stats.get(key) for key in key_stats_keys })
            
        try:
            price = yf.download(ticker, start=dt.date(2020,2,4), end=dt.date(2020,2,7)).Close[0]
            row['growthFromFeb'] = row['previousClose'] / price - 1
        except:
            row['growthFromFeb'] = np.nan
            
        row_df =  pd.DataFrame(row, index=[0])
        result = result.append(row_df)
        counter += 1
        if counter % 100 == 0:
            print(f'{counter} tickers downloaded')
    result.reset_index(drop=True, inplace=True)
    return result
     
parsed_df = parse(tickers)

In [None]:
def to_dollar():
    not_dividing = ['previousClose', 'currency', 'lastFiscalYearEnd', 'revenueGrowth', 'profitMargins', 'growthFromFeb']
    
    def remove_cents(x):
        try:
            return float(x) / 100
        except TypeError:
            return x
        
    currencies = ['GBp', 'ZAc', 'ILA']
    
    for currency in currencies:
        currency_data = parsed_df[parsed_df['currency'] == currency]
        for column in currency_data.columns:
            if column not in not_dividing:
                currency_data[column] = currency_data[column].apply(remove_cents)
            else:
                currency_data[column] = currency_data[column]
        parsed_df[parsed_df['currency'] == currency] = currency_data
        
to_dollar()

In [None]:
## adding data to file

def devide(x):
    try:
        return float(x) / 1000000
    except TypeError:
        return x
    
not_dividing = ['previousClose', 'currency', 'lastFiscalYearEnd', 'revenueGrowth', 'profitMargins', 'growthFromFeb']
for column in parsed_df.columns:
    if column not in not_dividing:
        raw_data[column] = parsed_df[column].apply(devide)
    else:
        raw_data[column] = parsed_df[column]

In [None]:
## Save result

raw_data.to_excel('Comps_statistics.xlsx')

In [None]:
# currencies = ['GBp', 'ZAc', 'ILA']
# parsed_df[parsed_df['currency'] == 'ILA']