In [1]:
import os

import numpy as np
import pandas as pd

os.listdir('./data_symbols/')

['.DS_Store',
 'ftse100_symbols.txt',
 'ftse_all_share_symbols_yahoo.txt',
 'ftse350_symbols.txt',
 'lse_codes_companies.csv',
 'ftse250_symbols.txt']

In [2]:
# Get paths of ftse100 stocks data
data_dir = './data_stocks/ftse100/'
stocks_csv_files = [os.path.join(data_dir, x) for x in os.listdir(data_dir) if x.endswith('.L.csv')]
companies_df = pd.read_csv('./data_symbols/lse_codes_companies.csv', usecols=[0, 1])

# read barclays data for testing
barc_df = pd.read_csv('./data_stocks/ftse100/BARC.L.csv')

In [3]:
def file_to_symbol(file):
    """Takes a filepath and returns the symbol"""
    return file.rsplit('/', 1)[1].rsplit('.', 1)[0]

In [4]:
def get_data(code):
    return pd.read_csv(data_dir + '/' + code + '.csv')

In [5]:
def highest_close_price(df):
    return df.loc[df['Close'].idxmax(), 'Close']
highest_close_price(barc_df)

296.5

In [6]:
def highest_close_date(df):
    return df.loc[df['Close'].idxmax(), 'Date']
highest_close_date(barc_df)

'2014-01-15'

In [7]:
def latest_close_price(df):
    return df.loc[df.index[-1], 'Close']
latest_close_price(barc_df)

154.67999267578125

In [8]:
def latest_close_date(df):
    return df.loc[df.index[-1], 'Date']
latest_close_date(barc_df)

'2019-03-29'

In [11]:
def highest_current_percentage_diff(df):
    """
    Returns the percentage (2 d.p.) difference between
    the highest and current closing price
    """
    highest_close = df.loc[df['Close'].idxmax(), 'Close']
    current_close = df.loc[df.index[-1], 'Close']
    return round((current_close - highest_close) / current_close, 2) * 100

highest_current_percentage_diff(barc_df)

-92.0

In [14]:
# Percentage difference between highest close and current close for all stocks
def percentage_diff_df(stocks_csv_files):
    result = []
    for file in stocks_csv_files:
        df = pd.read_csv(file)
        percentage = highest_current_percentage_diff(df)
        code = file.rsplit('/')[-1].replace('.csv', '')
        result.append((percentage, code))
    agg_df = (
        pd.DataFrame(result, columns=['percentage', 'code'])
        .sort_values(by='percentage', ascending=True)
    )
    return agg_df

agg_df = percentage_diff_df(stocks_csv_files)
agg_df.head()

Unnamed: 0,percentage,code
37,-202.0,CNA.L
40,-146.0,TUI.L
88,-137.0,WPP.L
5,-131.0,FRES.L
89,-124.0,BT-A.L


In [16]:
# Merge company names
if 'company' not in agg_df.columns:
    agg_df = pd.merge(agg_df, companies_df, on='code', how='left', validate='one_to_one')
agg_df.head()

Unnamed: 0,percentage,code,Company_x,Company_y
0,-202.0,CNA.L,Centrica plc,Centrica plc
1,-146.0,TUI.L,TUI AG,TUI AG
2,-137.0,WPP.L,WPP plc,WPP plc
3,-131.0,FRES.L,Fresnillo PLC,Fresnillo PLC
4,-124.0,BT-A.L,BT Group plc,BT Group plc


In [17]:
# # Export agg_df to csv file
# agg_df.to_csv('./data_aggregated/highest_close_percentage_difference.csv')

In [18]:
def add_moving_average(df, days):
    column = 'ma_{}'.format(days)
    df[column] = df['Close'].rolling(window=days).mean()
    return df

In [19]:
def add_ma_position(df, ma_1=50, ma_2=200):
    """
    Creates a new column 'ma_position' with possible
    values -1, +1.
    -1 when shorter SMA is below longer SMA
    +1 when shorter SMA is above longer SMA
    """
    column = 'ma_position'
    ma_1_df = df['Close'].rolling(window=ma_1).mean()
    ma_2_df = df['Close'].rolling(window=ma_2).mean()
    df[column] = np.where(ma_1_df > ma_2_df, 1, -1)
    return df

In [20]:
def add_ma_crossover(df, ma_1=50, ma_2=200, close_column='Close'):
    """
    Takes stock data and adds a column 'ma_crossover'
    which is 1 on the day where two moving averages cross over. 
    """
    column = 'ma_crossover'
    
    # Get moving averages
    ma_1_df = df['Close'].rolling(window=ma_1).mean()
    ma_2_df = df['Close'].rolling(window=ma_2).mean()

    # MA_1 below MA_2 >>> -1, MA_1 above MA_2 >>> +1
    position = pd.Series(np.where(ma_1_df > ma_2_df, 1, -1))
    
    # 1 if there is a change in position, otherwise 0
    df[column] = np.where(position != position.shift(1).fillna(method='bfill'), 1, 0)
    
    return df

def ma_crossover_dates(df):
    df = add_ma_crossover(df)
    return df[df['ma_crossover'] == 1]

In [23]:
# Name of stock and dates of moving average crossover
def get_crossovers_df(stocks_csv_files):
    """
    Loop over all csv files containing stock data
    and return a dataframe on days where crossover
    took place.
    """
    crossover_df = pd.DataFrame()
    for file in stocks_csv_files:
        df = pd.read_csv(file)
        df = add_ma_crossover(df)
        df = add_ma_position(df)
        df = df[(df['ma_crossover'] == 1) & (df['ma_position'] == 1)]
        df['code'] = file_to_symbol(file)
        crossover_df = pd.concat([crossover_df, df])
    return crossover_df

crossover_df = get_crossovers_df(stocks_csv_files)
crossover_df.sort_values(by='Date').tail()

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close,ma_crossover,ma_position,code
1251,2019-03-15,751.599976,736.599976,743.0,742.599976,12936967.0,742.599976,1,1,JE.L
1316,2019-03-15,7160.0,7070.0,7110.0,7080.0,235280.0,7080.0,1,1,SPX.L
1317,2019-03-18,66.209999,64.849998,65.0,65.370003,230980421.0,65.370003,1,1,LLOY.L
1318,2019-03-22,4647.5,4529.5,4640.0,4538.5,354587.0,4459.984863,1,1,IHG.L
1320,2019-03-22,930.400024,912.200012,929.0,913.400024,2141005.0,913.400024,1,1,LAND.L


In [None]:
# crossover_df.sort_values(by='Date').to_csv('./data_aggregated/moving_average_crossovers.csv')

In [22]:
crossover_df.sort_values(by='Date').tail()

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close,ma_crossover,ma_position,code
1251,2019-03-15,751.599976,736.599976,743.0,742.599976,12936967.0,742.599976,1,1,JE.L
1316,2019-03-15,7160.0,7070.0,7110.0,7080.0,235280.0,7080.0,1,1,SPX.L
1317,2019-03-18,66.209999,64.849998,65.0,65.370003,230980421.0,65.370003,1,1,LLOY.L
1318,2019-03-22,4647.5,4529.5,4640.0,4538.5,354587.0,4459.984863,1,1,IHG.L
1320,2019-03-22,930.400024,912.200012,929.0,913.400024,2141005.0,913.400024,1,1,LAND.L


### Largest gap between long term moving average and very short term moving average

Long term MA = 200 days<br>
Short term MA = 5 days

There is bound to be a pull back or short term price reversal when the gap between MA 200 and MA 5 is large.

Strategy is to itterate through all stocks and find the ones with the largest gap between the two moving averages.

The gap needs to be in percentage, so that we can compare the gap between different stocks e.g. 

    symbol: LLOY.L
    date: 28/12/2018
    MA 200 = 61.01 
    MA 5 = 50.94
    gap = 16.5%
    calculation: (61.01 - 50.94)/61.01