In [1]:
# Denpendances
import numpy as np
import pandas as pd
from dLoader import DataLoader

# Load Russell 2000 CSV

In [2]:
r2000 = pd.read_csv('Russell2000.csv', header=0, index_col=0)

In [3]:
r2000

Unnamed: 0,Ticker,Company,Sector,Skipped
0,AMC,AMC ENTERTAINMENT HOLDINGS INC CLA,Communication,False
1,OVV,OVINTIV INC,Energy,False
2,NTLA,INTELLIA THERAPEUTICS INC,Health Care,False
3,LSCC,LATTICE SEMICONDUCTOR CORP,Information Technology,False
4,TTEK,TETRA TECH INC,Industrials,False
...,...,...,...,...
2029,GBL,GAMCO INVESTORS INC,Financials,False
2030,PDLI,PDL BIOPHARMA INC,Health Care,True
2031,GTXI,GTXI INC - CVR,Health Care,True
2032,P5N994,Petrocorp Inc Escrow,Financials,True


# Download Data

In [31]:
tickers = np.array(r2000['Ticker'])
done = []
skip = []

In [32]:
m = pd.Timestamp.now().date() - pd.Timedelta(days=30)

In [42]:
import time

while True:    
    wait = False

    db = os.path.abspath('Database')
    for ticker in tickers:
        if ticker in done or ticker in skip:
            continue
        elif os.path.exists(os.path.join(db, ticker + '.csv')):
            done.append(ticker)
            continue
        print('Downloading...', end='')
        try:
            DataLoader(ticker)
            if os.path.exists(os.path.join(db, ticker + '.csv')):
                print(ticker, 'Download finish!')
            done.append(ticker)
        except Exception as e:
            if 'Unauthorized' in str(e):
                wait = True
                print(e)
                break
            else:
                skip.append(ticker)
                print(e)
    
    if wait:
        time.sleep(60*5)
    else:
        break

### Save modify Russell  2000 DataFrame to csv

In [6]:
# r2000.insert(3, 'Skipped', np.isin(r2000['Ticker'], skip))

In [7]:
# r2000.to_csv('Russell2000.csv')

# Create leverage and risk data

In [43]:
# Generating gain and loss percentage data
def clip_dataframe(df, p):
    # Batching for test data
    batches = len(df) // p
    max_length = batches * p
    return df.iloc[-max_length:].copy()

def simple_gain_loss(data, period):
    # Get gain and loss values base on start of period and
    # end of period prices
    ndf = clip_dataframe(data, period)
    # Base period prices
    base = np.array(ndf['Close']).reshape(-1, period)[:, 0]
    # Shifting one day to avoid the end period price is the
    # base price price
    shifted = ndf.shift(-1).fillna(method='ffill')
    target = np.array(shifted['Close']).reshape(-1, period)
    # Calculate gain and loss price array
    gain = target.max(1) / base - 1
    loss = target.min(1) / base - 1
    return gain.mean(), loss.mean()

def average_daily_fluctuation(data, minute=30):
    # Calculate an average daily fluctuation percentage value
    hl = data['High'] / data['Low'] - 1
    minutes = 7.5 * (60 / minute)
    return hl.mean() / minutes

def get_values(data, period, minute=30):
    # Getting values of sell limit, stop loss and fluctuation percentage
    sell_limit, stop_loss = simple_gain_loss(data, period)
    fluct = average_daily_fluctuation(data, minute)
    return sell_limit, stop_loss, fluct

In [65]:
# Static back test
def not_so_simple_gain_loss(data, period):
    ndf = clip_dataframe(data, period)
    # Base
    base = np.array(ndf['Close']).reshape(-1, period)[:, 0]
    # Expanding dimension
    base = np.expand_dims(base, 1)
    ndf = clip_dataframe(data, period)
    # base
    base = np.array(ndf['Close']).reshape(-1, period)[:, 0]
    base = np.expand_dims(base, 1)
    # shifted
    shift = ndf.shift(-1).fillna(method='ffill')
    # High
    high = np.array(shift['High']).reshape(-1, period)
    # Low
    low = np.array(shift['Low']).reshape(-1, period)
    # Open
    Open = np.array(shift['Open']).reshape(-1, period)
    # Close
    close = np.array(shift['Close']).reshape(-1, period)
    # Gaining percentage
    high_gains = high / base - 1
    low_gains = low / base - 1
    close_gains = close / base - 1
    open_gains = Open / base - 1
    return open_gains, high_gains, low_gains, close_gains, base.reshape(-1)

def calculate_gains(data, period, sell_limit, stop_loss, fluct):
    open_gains, high_gains, low_gains, close_gains, base = not_so_simple_gain_loss(data, period)
    # Getting gained percentage 
    length = len(open_gains)
    arr = []
    for i in range(length):
        sold = False
        for p in range(period):
            # Main stategy
            if open_gains[i, p] < stop_loss:
                arr.append(open_gains[i, p])
                sold = True
                break
            elif high_gains[i, p] > sell_limit:
                arr.append(high_gains[i, p] - fluct)
                sold = True
                break
            elif low_gains[i, p] < stop_loss:
                arr.append(stop_loss)
                sold = True
                break
        if not sold:
            arr.append(close_gains[i, -1])
    return base, np.array(arr)

def calculate_capital_gain(base, gains, capital=1000, max_share=1000):
    if base[0] > capital:
        capital = base[0] * 10
    original_capital = capital
    length = len(base)
    for i in range(length):
        gain = base[i] * gains[i]
        share = capital // base[i]
        if share > max_share:
            share = max_share
        capital += gain * share
    return capital / original_capital - 1

def check_for_update(ticker, days=30):
    m = pd.Timestamp.now().date() - pd.Timedelta(days=30)
    # Check if csv file need to be updated
    df = pd.read_csv(os.path.join(db, ticker + '.csv'), index_col=0)
    if pd.to_datetime(df.index[-1]).date() <= m:
        del df
        DataLoader(ticker).update_database()
        print(ticker, 'updated!')

In [66]:
# Get stock symbol in Database
import os
folder = os.path.abspath('Database')
files = os.listdir(folder)
symbols = [f.split('.')[0] for f in files]
symbols = sorted(symbols)
len(symbols)

2002

In [67]:
df = {}
dic = {}
for symbol in symbols:
    check_for_update(symbol)
    # Loading Data
    d1 = DataLoader(symbol).get_data('2018-01-01', '2020-12-31')
    d2 = DataLoader(symbol).get_data('2021-01-01', '2021-12-31')
    # Check if data meet minimum length
    if len(d1) < (5 * 52) or len(d2) < (5 * 50):
        continue
    # Create table
    d = {}
    for i in range(3, 21):
        sell_limit, stop_loss, fluct = get_values(d1, i)
        base, gains = calculate_gains(d2, i, sell_limit, stop_loss, fluct)
        capital_gain_percentage = calculate_capital_gain(base, gains)
        # Update dictionary
        d[i] = {'Sell Limit': sell_limit,
                'Stop Loss': stop_loss,
                'Fluctuation': fluct,
                'Cap Gain': capital_gain_percentage}
    # Storing data
    df[symbol] = pd.DataFrame.from_dict(d, orient='index')
    risk = (df[symbol]['Cap Gain'] < 0).mean()
    leverage = df[symbol]['Cap Gain'].max() / df[symbol]['Cap Gain'].mean()
    dic[symbol] = {'Risk': risk,
                   'Leverage': leverage}

AMD updated!


  share = capital // base[i]


FCEL updated!
MDP updated!
MSFT updated!
NVDA updated!
TSLA updated!


In [68]:
rldf = pd.DataFrame.from_dict(dic, orient='index')

In [69]:
# Create new DataFrame 
new_df = None
for k in df.keys():
    d = df[k].copy()
    name = d.columns
    col = pd.MultiIndex.from_product([[k], name], names=['Ticker', 'Data'])
    d.columns = col
    if new_df is None:
        new_df = d
    else:
        new_df = pd.concat([new_df, d], axis=1)
print(new_df)

Ticker       AAON                                       AAPL            \
Data   Sell Limit Stop Loss Fluctuation  Cap Gain Sell Limit Stop Loss   
3        0.014991 -0.012151    0.002185  0.337040   0.015163 -0.009744   
4        0.019921 -0.015262    0.002185  0.170887   0.020324 -0.012035   
5        0.026294 -0.019279    0.002185 -0.027333   0.026250 -0.015426   
6        0.027649 -0.023289    0.002185  0.605341   0.028170 -0.019179   
7        0.037034 -0.020663    0.002185  0.421614   0.036528 -0.017724   
8        0.036419 -0.025190    0.002185  0.184958   0.039084 -0.018844   
9        0.044228 -0.028377    0.002185  0.211877   0.043520 -0.022489   
10       0.045045 -0.031158    0.002185 -0.117041   0.046778 -0.024328   
11       0.042776 -0.037804    0.002185  0.273413   0.045636 -0.029224   
12       0.044186 -0.040533    0.002185  0.110166   0.046699 -0.031943   
13       0.049947 -0.035075    0.002185  0.380124   0.051845 -0.034283   
14       0.061275 -0.031352    0.00218

In [70]:
new_df['TSLA']

Data,Sell Limit,Stop Loss,Fluctuation,Cap Gain
3,0.032704,-0.018921,0.003113,0.452545
4,0.043373,-0.026941,0.003113,0.272965
5,0.057816,-0.028874,0.003113,0.377399
6,0.061486,-0.039063,0.003113,0.294844
7,0.077501,-0.037629,0.003113,-0.304194
8,0.085184,-0.045427,0.003113,-0.099145
9,0.094559,-0.049812,0.003113,0.485274
10,0.109724,-0.042715,0.003113,0.11001
11,0.104649,-0.063695,0.003113,0.428105
12,0.109732,-0.064256,0.003113,0.18298


In [71]:
# Update Skip list
r2000['Skipped'] = ~np.isin(r2000['Ticker'], rldf.index)

In [72]:
# Write DataFrame to csv
# new_df.to_csv('statictest.csv')
# rldf.to_csv('riskmanagement.csv')
# r2000.to_csv('Russell2000.csv')