# ATH finder

Script to find new ATH in the WIG20, MWIG40 and SWIG80 companies.

In [1]:
import pandas as pd
from yahoofinancials import YahooFinancials
import numpy as np
import requests
from bs4 import BeautifulSoup
from lxml import html
import re
from datetime import datetime
import os

Get the tickers.

In [2]:
def get_wig_tickers(wig='wig20'):
    ''' Get WIG components list. Only the wig20, mwig40 and swig80 are supported. '''
    parsed_content = parse_wig_comps_data(wig=wig)
    tickers = [ticker.get_text()[1:]+'.WA' for ticker in parsed_content]
    return tickers

def parse_wig_comps_data(wig):
    ''' Parse data from https://strefainwestorow.pl/notowania/gpw/{wig}/komponenty '''
    sublink = get_wig_sublink(wig)
    source = requests.get(f'https://strefainwestorow.pl/notowania/gpw/{sublink}/komponenty').text
    soup = BeautifulSoup(source, 'lxml')
    table_w_symbols = soup.find_all('a', class_="instrument-symbol")
    return table_w_symbols

def get_wig_sublink(wig):
    if wig == 'wig20':
        sublink = 'wig20-wig20'
    elif wig == 'mwig40':
        sublink = 'mwig40-mwig40'
    elif wig == 'swig80':
        sublink = 'swig80-swig80'
    return sublink

In [4]:
three_wigs_tickers = get_wig_tickers('wig20')+get_wig_tickers('mwig40')+get_wig_tickers('swig80')

In [5]:
three_wigs_tickers[:3]

['ALE.WA', 'ACP.WA', 'CCC.WA']

Gather the data.

In [2]:
def get_data(start_date, end_date, tickers, interval):
    yahoo_financials = YahooFinancials(tickers)
    stats = yahoo_financials.get_historical_price_data(start_date, end_date, time_interval=interval)
    return stats

In [7]:
start_date = '1971-01-01'
end_date = '2022-04-27'
interval = 'daily'
data = get_data(start_date, end_date, three_wigs_tickers, interval)

In [8]:
data[three_wigs_tickers[0]]['prices'][:3]

[{'date': 1602572400,
  'high': 80.0,
  'low': 71.08000183105469,
  'open': 72.0,
  'close': 80.0,
  'volume': 18452974,
  'adjclose': 80.0,
  'formatted_date': '2020-10-13'},
 {'date': 1602658800,
  'high': 90.30000305175781,
  'low': 71.25,
  'open': 84.0,
  'close': 75.95999908447266,
  'volume': 16731591,
  'adjclose': 75.95999908447266,
  'formatted_date': '2020-10-14'},
 {'date': 1602745200,
  'high': 80.0,
  'low': 71.5199966430664,
  'open': 76.0,
  'close': 79.22000122070312,
  'volume': 6235974,
  'adjclose': 79.22000122070312,
  'formatted_date': '2020-10-15'}]

Create the combined data with close prices.

In [3]:
def get_close_df(dist_data):
    df_f = pd.DataFrame()
    for ticker, hist_data in dist_data.items():
        if 'prices' in hist_data.keys(): 
            df = pd.DataFrame(hist_data['prices'], columns=['close', 'formatted_date'])
            df.set_index('formatted_date', inplace=True)
            df.index = pd.to_datetime(df.index)
            df.rename(columns={'close': f'{ticker}'}, inplace=True)
            df_f = pd.concat([df_f, df], axis=1)
    return df_f

In [10]:
close_df = get_close_df(data)

In [11]:
close_df

Unnamed: 0_level_0,ALE.WA,ACP.WA,CCC.WA,CDR.WA,CPS.WA,DNP.WA,JSW.WA,KGH.WA,LTS.WA,LPP.WA,...,TOA.WA,TRK.WA,UNT.WA,VRC.WA,VGO.WA,VOX.WA,VRG.WA,WWL.WA,WLT.WA,ZEP.WA
formatted_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03,,73.323006,,38.848099,,,,26.700001,,,...,,,,,,,1.415743,,,
2000-01-04,,73.323006,,38.012699,,,,25.400000,,,...,,,,,,,1.346441,,,
2000-01-05,,69.619820,,37.803799,,,,25.299999,,,...,,,,,,,,,,
2000-01-06,,72.088608,,37.970901,,,,25.000000,,,...,,,,,,,1.227637,,,
2000-01-07,,76.779305,,43.443001,,,,27.900000,,,...,,,,,,,1.376142,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-20,27.905001,79.449997,51.720001,137.440002,26.580000,327.000000,68.900002,168.100006,72.260002,11240.0,...,6.25,1.836,47.799999,44.599998,686.0,41.299999,3.740000,471.0,7.41,17.799999
2022-04-21,28.045000,78.150002,55.080002,128.639999,26.040001,318.899994,69.019997,161.699997,70.000000,10400.0,...,6.26,1.840,46.799999,44.799999,696.0,41.200001,3.850000,470.0,7.42,17.760000
2022-04-22,27.000000,76.900002,53.020000,127.599998,25.820000,316.500000,64.339996,150.750000,69.739998,10100.0,...,6.30,1.756,47.000000,44.599998,678.0,40.500000,3.920000,468.0,7.50,17.340000
2022-04-25,27.055000,77.949997,52.599998,130.720001,25.799999,315.600006,63.860001,140.600006,68.220001,9990.0,...,6.05,1.682,48.000000,44.700001,676.0,40.500000,3.830000,468.0,7.25,16.200001


For WIG there are quotes since 2000 on yahoo so it is needed to define the ATHs for companies with longer history than 2000-present. Let's take it from Strategies_Simulations/ATH.ipynb

In [12]:
aths_for_older = {
    'ACP.WA': 70.00,
    'CDR.WA': 125.5,
    'KGH.WA': 9.95,
    'MBK.WA': 121.3,
    'OPL.WA': 13.8,
    'PEO.WA': 33.15,
    'PKN.WA': 18.09,
    'BDX.WA': 36.87,
    'CMR.WA': 56,
    'BHW.WA': 29.7,
    'ING.WA': 26.94,
    'KTY.WA': 35,
    'MIL.WA': 9.93,
    'AGO.WA': 42.77,
    'AMC.WA': 52.2,
    'BRS.WA': 0.28,
    'BOS.WA': 74.53,
    'ECH.WA': 0.28,
    'FTE.WA': 12.56,
    'RFK.WA': 13.55,
    'SNK.WA': 3.79,
    'STX.WA': 37.09,
    'VRG.WA': 7.41}

Create the ATH list based on historical data.

In [13]:
ath_ser = close_df.max()

In [14]:
ath_ser

ALE.WA      94.639999
ACP.WA     176.765228
CCC.WA     309.000000
CDR.WA     460.799988
CPS.WA      37.860001
             ...     
VOX.WA      55.599998
VRG.WA      16.790915
WWL.WA    1459.000000
WLT.WA      18.320000
ZEP.WA      33.500000
Length: 140, dtype: float64

If any value from aths_for_older is greater than the one in ath_df for particular company it needs to be overrited.

In [4]:
def update_ath_by_dict(ser_in, ath_dict):
    ser = ser_in.copy()
    mask = ser.loc[ath_dict.keys()] > list(ath_dict.values())
    index_to_replace = list(mask[mask == False].index)
    values_to_update = [ath_dict[k] for k in index_to_replace]
    ser.loc[index_to_replace] = values_to_update
    return ser

In [16]:
ath_ser = update_ath_by_dict(ath_ser, aths_for_older)

In [19]:
ath_ser

ALE.WA      94.639999
ACP.WA     176.765228
CCC.WA     309.000000
CDR.WA     460.799988
CPS.WA      37.860001
             ...     
VOX.WA      55.599998
VRG.WA      16.790915
WWL.WA    1459.000000
WLT.WA      18.320000
ZEP.WA      33.500000
Length: 140, dtype: float64

Add columns names and save to csv.

In [5]:
def save_ath_data(series, file_path):
    series.index.name = 'Ticker'
    series.name = 'ATH'
    series.to_csv(file_path)

In [30]:
save_ath_data(ath_ser, 'ATH_Data/ATH_WIG20_40_80.csv')

Let's save also a tickers list.

In [98]:
with open("ATH_Data/WIG_tickers.txt", "w") as output:
#     output.write('\n'.join(str(ticker) for ticker in three_wigs_tickers))
    output.write('\n'.join(three_wigs_tickers)) # simpler way - all elements are already strings

Read ATH data to skip gathering all data every time. That is a starting point for finding new potential ATH each day.

In [7]:
def read_csv_data(file_path):
    return pd.read_csv(file_path, index_col='Ticker')

In [8]:
def last_saved_csv(path):
    files = os.listdir(path)
    files_paths = [os.path.join(path, basename) for basename in files]
    return get_last_csv_path(files_paths)

In [9]:
def get_last_csv_path(paths):
    last_file = max(paths, key=os.path.getctime)
    if last_file[-4:] == '.csv':
        return last_file
    else:
        paths.remove(last_file)
        return get_last_csv_path(paths)

In [10]:
last_ath_df = last_saved_csv('ATH_Data/')

In [11]:
ath_df = read_csv_data(last_ath_df)

In [12]:
ath_df

Unnamed: 0_level_0,ATH
Ticker,Unnamed: 1_level_1
ALE.WA,94.639999
ACP.WA,176.765228
CCC.WA,309.000000
CDR.WA,460.799988
CPS.WA,37.860001
...,...
VOX.WA,55.599998
VRG.WA,16.790915
WWL.WA,1459.000000
WLT.WA,18.320000


Get tickers if there was no changes in indexes skip next two bloks. Otherwise skip third one.

In [84]:
three_wigs_tickers = get_wig_tickers('wig20')+get_wig_tickers('mwig40')+get_wig_tickers('swig80')

In [None]:
with open("ATH_Data/WIG_tickers.txt", "w") as output:
    output.write('\n'.join(three_wigs_tickers))

In [2]:
with open("ATH_Data/WIG_tickers.txt", "r") as file:
    three_wigs_tickers = file.read().splitlines()

In [3]:
three_wigs_tickers[:3]

['ALE.WA', 'ACP.WA', 'CCC.WA']

Get data from today. Make sure the market is already closed.

In [4]:
def get_current_prices_dict(tickers):
    yahoo_financials = YahooFinancials(tickers)
    return yahoo_financials.get_current_price()

In [5]:
today_price_dict = get_current_prices_dict(three_wigs_tickers)

In [6]:
len(today_price_dict)

140

Compare with ATH data frame and show new ATH (if any).

In [13]:
def get_new_ATHs(cur_ath_df, new_prices_dict):
    mask = cur_ath_df.loc[new_prices_dict.keys(), 'ATH'] < list(new_prices_dict.values())
    return list(cur_ath_df.loc[new_prices_dict.keys()][mask].index)

In [14]:
new_ath = get_new_ATHs(ath_df, today_price_dict)
f'There is new ATH for: {new_ath}'

'There is new ATH for: []'

If any new ATH - update and save new ath csv file.

In [68]:
def update_ath_df_by_dict(df_in, ath_dict):
    df = df_in.copy()
    mask = df.loc[ath_dict.keys(), 'ATH'] < list(ath_dict.values())
    index_to_replace = list(mask[mask].index)
    values_to_update = [ath_dict[k] for k in index_to_replace]
    df.loc[index_to_replace] = values_to_update
    return df

In [69]:
def update_and_save(cur_ath_df, new_prices_dict, file_path):
    df = update_ath_df_by_dict(cur_ath_df, new_prices_dict)
    df.to_csv(file_path)
    return df

In [70]:
if new_ath: 
    today = datetime.today().strftime('%Y-%m-%d')
    new_ATH_df = update_and_save(ath_df, today_price_dict, f'ATH_Data/ATH_WIG20_40_80_{today}.csv')
else:
    print('There is no need to save updated ATH csv because of none new ATH.')

There is no need to save updated ATH csv because of none new ATH.
