In [18]:
import os

import numpy as np
import pandas as pd

In [19]:
data_dir = './data_stocks'
stocks_csv_files = [os.path.join(data_dir, x) for x in os.listdir(data_dir) if x.endswith('.L.csv')]
BARC = os.path.join(data_dir, 'BARC.L.csv')
barc_df = pd.read_csv(BARC)

In [20]:
barc_df.head()

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
0,2014-01-02,274.651001,268.350006,273.0,271.049988,30563664.0,235.943085
1,2014-01-03,273.910004,270.399994,271.25,272.850006,23678963.0,237.51001
2,2014-01-06,278.666992,271.200012,271.549988,277.5,47008835.0,241.557693
3,2014-01-07,282.778015,275.0,276.950012,280.950012,44374624.0,244.560913
4,2014-01-08,285.899994,281.25,282.350006,283.700012,45068534.0,246.954666


In [41]:
def get_data(code):
    return pd.read_csv(data_dir + '/' + code + '.csv')

In [21]:
def highest_close_price(df):
    return df.loc[df['Close'].idxmax(), 'Close']
highest_close_price(barc_df)

296.5

In [22]:
def highest_close_date(df):
    return df.loc[df['Close'].idxmax(), 'Date']
highest_close_date(barc_df)

'2014-01-15'

In [23]:
def latest_close_price(df):
    return df.loc[df.index[-1], 'Close']
latest_close_price(barc_df)

165.94000244140625

In [24]:
def latest_close_date(df):
    return df.loc[df.index[-1], 'Date']
latest_close_date(barc_df)

'2019-03-15'

In [25]:
def highest_current_percentage(df):
    """
    Returns the percentage (2 d.p.) difference between
    the highest and current closing price
    """
    highest_close = df.loc[df['Close'].idxmax(), 'Close']
    current_close = df.loc[df.index[-1], 'Close']
    return round((current_close - highest_close) / current_close, 2) * 100

highest_current_percentage(barc_df)

-79.0

In [26]:
# Percentage difference between highest close and current close for all stocks
result = []

for file in stocks_csv_files:
    df = pd.read_csv(file)
    percentage = highest_current_percentage(df)
    code = file.rsplit('/')[-1].replace('.csv', '')
    result.append((percentage, code))

In [27]:
agg_df = pd.DataFrame(result, columns=['percentage', 'code'])

In [28]:
agg_df = agg_df.sort_values(by='percentage', ascending=False)

In [55]:
highest_close_price(barc_df)
latest_close_price(barc_df)
agg_df[(agg_df['percentage'] < -10) & (agg_df['percentage'] > -200)].sort_values(by='percentage')

Unnamed: 0,percentage,code
139,-186.0,CNA.L
148,-169.0,WMH.L
216,-166.0,IPO.L
50,-164.0,ESNT.L
159,-157.0,SGC.L
345,-157.0,ISAT.L
222,-154.0,PLUS.L
21,-150.0,FRES.L
314,-149.0,CTEC.L
68,-146.0,AGK.L


In [48]:
mslh_df = get_data('MSLH.L')
highest_close_price(mslh_df)

598.5

In [56]:
latest_close_price(mslh_df)

598.5

In [66]:
barc_df['MA_50'] = barc_df['Close'].rolling(window=50).mean()
barc_df['MA_200'] = barc_df['Close'].rolling(window=200).mean()

In [67]:
barc_df.head(30)

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close,MA_50,MA_200
0,2014-01-02,274.651001,268.350006,273.0,271.049988,30563664.0,235.943085,,
1,2014-01-03,273.910004,270.399994,271.25,272.850006,23678963.0,237.51001,,
2,2014-01-06,278.666992,271.200012,271.549988,277.5,47008835.0,241.557693,,
3,2014-01-07,282.778015,275.0,276.950012,280.950012,44374624.0,244.560913,,
4,2014-01-08,285.899994,281.25,282.350006,283.700012,45068534.0,246.954666,,
5,2014-01-09,289.710999,282.477997,283.899994,284.399994,57522092.0,247.564011,,
6,2014-01-10,287.136993,282.199005,286.0,283.600006,66050186.0,246.867661,,
7,2014-01-13,294.25,286.5,286.5,291.700012,57764266.0,253.918503,,
8,2014-01-14,293.153992,286.149994,287.600006,291.75,39872294.0,253.962021,,
9,2014-01-15,298.031006,292.350006,293.299988,296.5,53319972.0,258.096802,,


In [30]:
agg_df.to_csv('./data_aggregated/highest_close_percentage.csv')