In [1]:
# Denpendances
import numpy as np
import pandas as pd
from dLoader import DataLoader

# Load Russell 2000 CSV

In [16]:
r2000 = pd.read_csv('Russell2000.csv', header=0, index_col=0)

In [17]:
r2000

Unnamed: 0,Ticker,Company,Sector,Skipped
0,AMC,AMC ENTERTAINMENT HOLDINGS INC CLA,Communication,False
1,OVV,OVINTIV INC,Energy,False
2,NTLA,INTELLIA THERAPEUTICS INC,Health Care,False
3,LSCC,LATTICE SEMICONDUCTOR CORP,Information Technology,False
4,TTEK,TETRA TECH INC,Industrials,False
...,...,...,...,...
2029,GBL,GAMCO INVESTORS INC,Financials,False
2030,PDLI,PDL BIOPHARMA INC,Health Care,True
2031,GTXI,GTXI INC - CVR,Health Care,True
2032,P5N994,Petrocorp Inc Escrow,Financials,True


# Download Data

In [18]:
tickers = np.array(r2000['Ticker'])
done = []
skip = r2000['Ticker'][r2000['Skipped']].to_list()

In [19]:
import time

while True:    
    wait = False

    db = os.path.abspath('Database')
    for ticker in tickers:
        if ticker in done or ticker in skip:
            continue
        elif os.path.exists(os.path.join(db, ticker + '.csv')):
            done.append(ticker)
            continue
        
        print('Downloading...', end='')
        try:
            DataLoader(ticker)
            if os.path.exists(os.path.join(db, ticker + '.csv')):
                print(ticker, 'Download finish!')
            done.append(ticker)
        except Exception as e:
            if 'Unauthorized' in str(e):
                wait = True
                print(ticker, e)
                break
            else:
                skip.append(ticker)
                print(ticker, e)
    
    if wait:
        time.sleep(60*3)
    else:
        break

### Save modify Russell  2000 DataFrame to csv

In [8]:
r2000['Skipped'] = np.isin(r2000['Ticker'], skip)

In [9]:
# r2000.to_csv('Russell2000.csv')

# Create leverage and risk data

In [10]:
# Generating gain and loss percentage data
def clip_dataframe(df, p):
    # Batching for test data
    batches = len(df) // p
    max_length = batches * p
    return df.iloc[-max_length:].copy()

def simple_gain_loss(data, period):
    # Get gain and loss values base on start of period and
    # end of period prices
    ndf = clip_dataframe(data, period)
    # Base period prices
    base = np.array(ndf['Close']).reshape(-1, period)[:, 0]
    # Shifting one day to avoid the end period price is the
    # base price price
    shifted = ndf.shift(-1).fillna(method='ffill')
    target = np.array(shifted['Close']).reshape(-1, period)
    # Calculate gain and loss price array
    gain = target.max(1) / base - 1
    loss = target.min(1) / base - 1
    return gain.mean(), loss.mean()

def average_daily_fluctuation(data, minute=30):
    # Calculate an average daily fluctuation percentage value
    hl = data['High'] / data['Low'] - 1
    minutes = 7.5 * (60 / minute)
    return hl.mean() / minutes

def get_values(data, period, minute=30):
    # Getting values of sell limit, stop loss and fluctuation percentage
    sell_limit, stop_loss = simple_gain_loss(data, period)
    fluct = average_daily_fluctuation(data, minute)
    return sell_limit, stop_loss, fluct

In [11]:
# Static back test
def not_so_simple_gain_loss(data, period):
    ndf = clip_dataframe(data, period)
    # Base
    base = np.array(ndf['Close']).reshape(-1, period)[:, 0]
    # Expanding dimension
    base = np.expand_dims(base, 1)
    ndf = clip_dataframe(data, period)
    # base
    base = np.array(ndf['Close']).reshape(-1, period)[:, 0]
    base = np.expand_dims(base, 1)
    # shifted
    shift = ndf.shift(-1).fillna(method='ffill')
    # High
    high = np.array(shift['High']).reshape(-1, period)
    # Low
    low = np.array(shift['Low']).reshape(-1, period)
    # Open
    Open = np.array(shift['Open']).reshape(-1, period)
    # Close
    close = np.array(shift['Close']).reshape(-1, period)
    # Gaining percentage
    high_gains = high / base - 1
    low_gains = low / base - 1
    close_gains = close / base - 1
    open_gains = Open / base - 1
    return open_gains, high_gains, low_gains, close_gains, base.reshape(-1)

def calculate_gains(data, period, sell_limit, stop_loss, fluct):
    open_gains, high_gains, low_gains, close_gains, base = not_so_simple_gain_loss(data, period)
    # Getting gained percentage 
    length = len(open_gains)
    arr = []
    for i in range(length):
        sold = False
        for p in range(period):
            # Main stategy
            if open_gains[i, p] < stop_loss:
                arr.append(open_gains[i, p])
                sold = True
                break
            elif high_gains[i, p] > sell_limit:
                arr.append(high_gains[i, p] - fluct)
                sold = True
                break
            elif low_gains[i, p] < stop_loss:
                arr.append(stop_loss)
                sold = True
                break
        if not sold:
            arr.append(close_gains[i, -1])
    return base, np.array(arr)

def calculate_capital_gain(base, gains, capital=1000, max_share=1000):
    if base[0] > capital:
        capital = base[0] * 10
    original_capital = capital
    length = len(base)
    for i in range(length):
        gain = base[i] * gains[i]
        share = capital // base[i]
        if share > max_share:
            share = max_share
        capital += gain * share
    return capital / original_capital - 1

def check_for_update(ticker, days=30):
    m = pd.Timestamp.now().date() - pd.Timedelta(days=30)
    # Check if csv file need to be updated
    df = pd.read_csv(os.path.join(db, ticker + '.csv'), index_col=0)
    if pd.to_datetime(df.index[-1]).date() <= m:
        del df
        DataLoader(ticker).update_database()
        print(ticker, 'updated!')

In [12]:
# Get stock symbol in Database
import os
folder = os.path.abspath('Database')
files = os.listdir(folder)
symbols = [f.split('.')[0] for f in files]
symbols = sorted(symbols)
len(symbols)

1747