In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from random import choices, sample
import matplotlib.pyplot as plt
from IPython.display import clear_output
import os

In [2]:
def return_stat(df, start, period):
    end = start - period
    filt = (df['Date'].max() - df['Date'] <= pd.Timedelta(days = 30 * (start))) & (df['Date'].max() - df['Date'] >= pd.Timedelta(days = (end) * 30))
    one_month = df.loc[filt]
    if one_month.shape[0] < 2:
        return (None, None)
    volatility = one_month['Close'].std()
    if volatility < 0.00001:
        return (None, None)
    return_ = (one_month['Close'].iloc[-1] - one_month['Close'].iloc[0])/one_month['Close'].iloc[0]*100 
    sharpe_ratio = return_/volatility
    return (return_, sharpe_ratio)

In [3]:
def cum_daily_returns(df, start):
    filt = (df['Date'].max() - df['Date'] <= pd.Timedelta(days = 30 * (start))) \
        & (df['Date'].max() - df['Date'] >= pd.Timedelta(days = (start - 1) * 30))
    one_month = df.loc[filt]
    if one_month.shape[0] < 2:
        return []
    volatility = one_month['Close'].std()
    if volatility < 0.00001:
        return []
    daily_returns = []
    for i in range(len(one_month)):
        daily_returns.append((one_month['Close'].iloc[i] - one_month['Close'].iloc[0])/one_month['Close'].iloc[0]*100 )
    return daily_returns

In [4]:
def last_ten_days(df, start):
    filt = (df['Date'].max() - df['Date'] <= pd.Timedelta(days = 30 * (start) + 10)) & (df['Date'].max() - df['Date'] >= pd.Timedelta(days = (start) * 30))
    ten_days = df.loc[filt]
    if ten_days.shape[0] < 2:
        return (None, None)
    volatility = ten_days['Close'].std()
    if volatility < 0.00001:
        return (None, None)
    return_ = (ten_days['Close'].iloc[-1] - ten_days['Close'].iloc[0])/ten_days['Close'].iloc[0]*100 
    sharpe_ratio = return_/volatility
    return (return_, sharpe_ratio)

In [5]:
# Filters
def is_legit(df):
    dates = pd.to_datetime(df['Date']).tolist()
#     print(dates[-1].month)
#     is_recent = (dates[-1].month == pd.Timestamp.today().month)
    is_recent = (dates[-1].month == 3)
    in_business = (df.iloc[:-100]['Volume'].mean() > 1000)
    return is_recent and in_business

In [6]:
r = requests.get('https://scanner.tradingview.com/america/scan')
soup = BeautifulSoup(r.text, 'lxml')

ticker_list = soup.p.text

ticker_list = ticker_list.split('"s":')
ticker_list = [x.split(',')[0].lstrip('"').rstrip('"') for x in ticker_list[1:]]
ticker_list = [x.split(':')[1] for x in ticker_list]
ticker_list = sorted(ticker_list)
ticker_list = [x.replace('/','-') for x in ticker_list]

In [6]:
# Dictionary of stock data
ticker_dict={}
files = os.listdir('Stock data/')
ticker_list = [file.split('.')[0] for file in files]
n_stocks = len(ticker_list)
for counter, ticker in enumerate(ticker_list):
    if counter % 1000 == 0:
        print(f'{int(counter/n_stocks*100)}% completed ...')
    try:
        df = pd.read_csv(f'Stock data/{ticker}.csv')
#         print(ticker)
        if is_legit(df):
            ticker_dict[ticker] = df
    except:
        continue
#     if is_legit(df):
#         ticker_dict[ticker] = df
print('Finished')
print(f'\n Number of tickers: {len(ticker_dict)}')

0% completed ...
5% completed ...
10% completed ...
16% completed ...
21% completed ...
27% completed ...
32% completed ...
37% completed ...
43% completed ...
48% completed ...
54% completed ...
59% completed ...
64% completed ...
70% completed ...
75% completed ...
81% completed ...
86% completed ...
92% completed ...
97% completed ...
Finished

 Number of tickers: 15780


In [None]:
# Backtesting strategy
expected_annual_return = 0
for month in range(14, 26):
    summary = pd.DataFrame(columns = ['Ticker', 'SR_10d', 'Return_10d', 'SR_1m', 'Return_1m', 'SR_1m_recent', 'Return_1m_recent', 'Max_1m_recent'])
    mths = month
    n_stocks = len(ticker_dict)
    sample_size = 30
    for ticker, df in ticker_dict.items():
        if df.shape[0]<2:
            continue
        df['Date'] = pd.to_datetime(df['Date'])
        (ten_day_return, ten_day_SR) = last_ten_days(df,mths - 1)
        (one_month_return, one_month_SR) = return_stat(df,mths, 1)
        (one_month_return_recent, one_month_SR_recent) = return_stat(df,mths - 1, 1)
        daily_returns = cum_daily_returns(df, mths - 1)
        if len(daily_returns) > 0:
            max_daily_returns = max(daily_returns)
        else:
            max_daily_returns = None    
        summary.loc[summary.shape[0]] = [ticker, ten_day_SR, ten_day_return, one_month_SR,  \
                                         one_month_return, one_month_SR_recent,  one_month_return_recent, max_daily_returns]
        
    analysis_test = summary.dropna().sort_values('Return_10d', ascending = False)
    filt = (analysis_test['SR_1m'].abs() <= 10) & (analysis_test['SR_1m'].abs() > 0) & (analysis_test['Return_10d'] > 0) 
    filtered_df = analysis_test.loc[filt]
    # print(filtered_df.head(50))
    list_of_indices = list(filtered_df.index)

    max_return_list = []
    net_return_list = []
    success_rate_list = []

    for _ in range(100):
        sampled_list = sample(list_of_indices[:100], k = sample_size)
    #     sampled_list = list_of_indices[:30]
        sampled_stock_list = filtered_df.loc[sampled_list].sort_values('Return_10d', ascending = False)


        num_success = len(sampled_stock_list.loc[(sampled_stock_list['Return_1m_recent'] >= 0)])
        total = len(sampled_stock_list)
        success_pc = num_success/total*100
        returns_list = sampled_stock_list['Return_1m_recent'].to_list()
        returns_list = [return_ if return_> -10 else -10 for return_ in returns_list  ]
        max_return = max(returns_list)
        return_net = sum(returns_list)/sample_size
        success_rate_list.append(success_pc)
        max_return_list.append(max_return)
        net_return_list.append(return_net)

    #     print(f'Max return: {max_return}')
    #     print(f'Net return: {return_net}')
    #     print(f'Success rate: {success_pc}')
    # plt.plot(max_return_list)
#     print(sampled_stock_list)
#     plt.plot(net_return_list)
#     plt.plot(success_rate_list)
#     plt.legend(['Net return', 'Success rate'])

    expected_return = sum(net_return_list)/100
    expected_annual_return += expected_return
    max_net_return = max(net_return_list)
    print(f'Month: {month}\n---------------')
    print(f'Max Net return: {max_net_return}\nExpected return: {expected_return}\n---------------\n')
print(f'\nExpected Annual Return: {expected_annual_return}')

Month: 14
---------------
Max Net return: 2.9665472824106613
Expected return: -2.6645080365156506
---------------

Month: 15
---------------
Max Net return: 5.004174178160428
Expected return: 0.1682326819127675
---------------

Month: 16
---------------
Max Net return: 9.649960334740616
Expected return: 0.04048847652146148
---------------



  return_ = (one_month['Close'].iloc[-1] - one_month['Close'].iloc[0])/one_month['Close'].iloc[0]*100


Month: 17
---------------
Max Net return: 1.9464747949197474
Expected return: -3.9141733805023438
---------------



  return_ = (one_month['Close'].iloc[-1] - one_month['Close'].iloc[0])/one_month['Close'].iloc[0]*100
  daily_returns.append((one_month['Close'].iloc[i] - one_month['Close'].iloc[0])/one_month['Close'].iloc[0]*100 )
  daily_returns.append((one_month['Close'].iloc[i] - one_month['Close'].iloc[0])/one_month['Close'].iloc[0]*100 )


Month: 18
---------------
Max Net return: 11.494780855074376
Expected return: 5.732758980035369
---------------

Month: 19
---------------
Max Net return: 2.735365120061069
Expected return: -1.1454376087452736
---------------

Month: 20
---------------
Max Net return: 10.189807347833042
Expected return: 4.591125742793942
---------------

Month: 21
---------------
Max Net return: 3.2721892112127935
Expected return: -0.6937544631886806
---------------

Month: 22
---------------
Max Net return: 1.1414292232168108
Expected return: -1.897912900603369
---------------

Month: 23
---------------
Max Net return: 20.5040476855259
Expected return: 12.775438817360111
---------------



In [9]:
# Apply strategy
summary = pd.DataFrame(columns = ['Ticker', 'SR_10d', 'Return_10d', 'SR_1m', 'Return_1m'])
n_stocks = len(ticker_dict)
counter = 0
for ticker, df in ticker_dict.items():
    counter += 1
    if counter % 1000 == 0:
        print(f'{int(counter/n_stocks*100)}% completed ...')
    if df.shape[0]<2:
        continue
    df['Date'] = pd.to_datetime(df['Date'])
    (ten_day_return, ten_day_SR) = last_ten_days(df, 0)
    (one_month_return, one_month_SR) = return_stat(df, 1, 1)
#     (one_month_return_recent, one_month_SR_recent) = one_month_recent_stat(df)
    summary.loc[summary.shape[0]] = [ticker, ten_day_SR, ten_day_return, one_month_SR,  \
                                     one_month_return]
print('Finished')

5% completed ...
10% completed ...
16% completed ...
21% completed ...
27% completed ...
32% completed ...
37% completed ...
43% completed ...
48% completed ...
54% completed ...
59% completed ...
64% completed ...
70% completed ...
75% completed ...
81% completed ...
86% completed ...
92% completed ...
97% completed ...
Finished


In [10]:
analysis = summary.dropna().sort_values('Return_10d', ascending = False)
filt = (analysis['SR_1m'].abs() <= 10.0) & (analysis['SR_1m'].abs() > 0.0) & (analysis['Return_10d'] > 0) 
filtered_df = analysis.loc[filt]
# print(filtered_df.head(100))
list_of_indices = list(filtered_df.index)
sampled_list = sample(list_of_indices[:100], k = 30)
sampled_stock_list = filtered_df.loc[sampled_list].sort_values('Return_10d', ascending = False)
print(sampled_stock_list)

      Ticker      SR_10d  Return_10d     SR_1m  Return_1m
12979  PSTNY  126.491106   36.666667  3.962782   0.985222
6849   GLAPY    3.986040   34.815251  5.337317  34.815251
13577    RGF   84.745366   29.487179  0.448755   0.198413
12663   PMTS    6.838525   25.689498  8.300233  22.352937
11586   NVEI    7.638705   22.479612  3.397347   7.718804
9548    LCFY   21.270861   22.473246 -7.901137  -9.728309
13149    PYR  274.168249   21.783439 -3.876368  -0.416667
2986   CFNCF    2.918288   18.584626  4.562661  18.584626
14907   SOBR   88.200408   18.562874 -5.401341  -1.980198
16278   TSLI   10.624209   17.791742  9.170647  13.068241
14271     SE    2.301614   17.538798  1.272128   7.461593
76      ABEO   55.656852   17.467249  1.510035   0.373134
10335   MGLD  132.383578   16.666667  5.188327   0.574713
13517   RERE  108.243714   16.064257  8.624183   2.120141
1939    BITI    9.228260   15.737701  7.599295  12.739532
753     AMEN    0.662750   15.044248  0.593525  23.809524
17250    VXX  

In [24]:
print(filtered_df.head(60))

      Ticker      SR_10d  Return_10d     SR_1m  Return_1m
6774   GECFF    2.936858   62.114545  4.180579  62.114545
5783   FCODF    0.185471   56.402737  0.227896  56.402737
15395   SSOK   87.326841   52.307692 -1.862743       -1.0
16382  TRPCF    7.663310   52.267251  9.416222  52.267251
9941   LPRRF    2.314068   35.102035  3.467887  35.102035
7995   HVRRF    1.540093   33.208824  2.307999  33.208824
17486  WBTMU    3.334981   32.352941  4.997837  32.352941
7713   HLAGF    1.186827   32.272727  1.908563       45.5
3899    CTLT    3.766070   30.690947  4.872651  41.498185
17276  VOYJF    7.439646   30.193808  8.160329  30.193808
7846   HPGLY    2.305758   28.650359  3.541051  38.363367
17092  VGPBF    2.818564   27.967681  4.224219  27.967681
13984    RPD    6.953648   26.561325  5.755211   44.70892
5737     FBL    4.534121   26.006292  6.328517  48.389393
12663  PIEJF    4.917903   25.882358  7.027495  29.926644
5116     ELF    4.275962   24.830577  5.743325  46.016249
14912   SMCI  

In [27]:
ticker_dict['GLAPY'].iloc[:-20]['Volume'].mean()

155.79961464354528