# テクニカル分析パターンの自動選別

## ライブラリのインポート

In [2]:
# ライブラリのインポート
import pandas as pd
import datetime
from datetime import date
from datetime import timedelta
import matplotlib.pyplot as plt
import yahoo_fin.stock_info as si
import talib
import numpy as np
import mplfinance as mpf
from talib import abstract
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
#from pycaret.classification import *


## Tickerリストの作成

In [3]:
top_100 = ['AAPL','MSFT','GOOGL','AMZN','NVDA','TSLA','META','V','XOM','JPM',\
           'WMT','JNJ','MA','AVGO','PG','ORCL','COST','ADBE','KO','SHEL','CSCO',\
           'BAC','AZN','NVS','CRM','PFE','MCD','NFLX','DIS','INTC','NKE']

## 原著者による関数群

In [4]:

##Getting Patterns

def get_patterns(data=None,ticker_column = 'ticker',mute=False):
    #from talib import abstract
    result = pd.DataFrame()
    tickers = data[ticker_column].unique()
    data = data.sort_index(ascending=True)
    x=[]
    attr = talib.get_function_groups()['Pattern Recognition']
    for i in tickers:
        df = data[data[ticker_column]==i]
        for a in attr:
            df[a] = (getattr(abstract,a)(df))/100
        result = pd.concat([result,df])
        x.append(i)
        if mute is False:
            print(i + " Patterns Appended Successfully")
            print(result.shape)
            print(str(len(x)) + " of "+ str(len(tickers)) + " Done ")
        else:
            pass
    return result


In [5]:
#Creating Indicators

def get_indicators(data=None,ticker_column = 'ticker',price_column="close",mute=False):
    result = pd.DataFrame()
    tickers = data[ticker_column].unique()
    x=[]
    data = data.sort_index(ascending=True)
    for i in tickers:
        df = data[data[ticker_column]==i]
        df["Vol_over_30DAvg"] = df["volume"]/df["volume"].rolling(window=30).mean()
        df["Vol_over_10DAvg"] = df["volume"]/df["volume"].rolling(window=10).mean()
        df["Perc_From_200D_H"] = ((df[price_column]/df[price_column].rolling(window=200).max())-1)*100
        df["Perc_From_200D_L"] = ((df[price_column]/df[price_column].rolling(window=200).min())-1)*100
        df["Retracement_200D"] = df[price_column]/(df[price_column].rolling(window=200).max()-df[price_column].rolling(window=30).min())
        df["Perc_From_30D_H"] = ((df[price_column]/df[price_column].rolling(window=30).max())-1)*100
        df["Perc_From_30D_L"] = ((df[price_column]/df[price_column].rolling(window=30).min())-1)*100
        df["Retracement_30D"] = df[price_column]/(df[price_column].rolling(window=30).max()-df[price_column].rolling(window=30).min())
        df["30D/200D_High"] = df[price_column].rolling(window=30).max()/df[price_column].rolling(window=200).max()
        df["30D/200D_Low"] = df[price_column].rolling(window=30).min()/df[price_column].rolling(window=200).min()
        df["Perc_from 60DEMAVG"] = ((df[price_column]/df[price_column].rolling(window=60).mean())-1)*100
        df["Perc_from 30DEMAVG"] = ((df[price_column]/df[price_column].rolling(window=30).mean())-1)*100
        df["Perc_from 200DEMAVG"] = ((df[price_column]/df[price_column].rolling(window=200).mean())-1)*100
        df["Variand_30D/200D"] = df[price_column].rolling(window=30).var()/df[price_column].rolling(window=200).var()
        df = df.dropna()
        df = df.sort_index(ascending=True)
        result = pd.concat([result,df])
        
        if mute is False:
            print(i + " Indicators Appended Successfully")
            print(result.shape)
            x.append(i)
            print(str(len(x)) + " of "+ str(len(tickers)) + " Done ")
        else:
            pass

    return result


In [6]:
## Getting Labels
# 列Changeへの2つ目の代入文を無効にすることにより閾値と比率評価が外されて、回帰分析に適したデータとなる

def get_labels(data = None,ticker_column="ticker", fwd_window=1,price_column="close",threshold_perc=1,mute=False):
    result = pd.DataFrame()
    tickers = data[ticker_column].unique()
    x=[]
    data = data.sort_index(ascending=True)
    for i in tickers:
        df = data[data[ticker_column]==i]
        df.sort_index(ascending=False)
        df["Change"] = df[price_column].pct_change(periods=fwd_window).shift(-fwd_window)
        #df["Change"]=np.where(abs(df["Change"])>=(threshold_perc/100),df["Change"]/abs(df["Change"]),0)
        result= pd.concat([result,df],axis=0)
        if mute is False:    
            print(i + " Labels Appended Successfully")
            print(result.shape)
            x.append(i)
            print(str(len(x)) + " of "+ str(len(tickers)) + " Done ")
        else:
            pass
    result.dropna(inplace=True)
    return result


In [7]:
## Compile Data

def compile_data(data=None,ticker_column = 'ticker',price_column="close",train=True,
fwd_window=1,threshold_perc=1,mute=False):
    print("Compiling pattern Signals")
    data_patterns = get_patterns(data=data,ticker_column=ticker_column,mute=mute)
    print ("---- Pattern Signals Attached")
    print("Compiling Indicators")
    data_indicators = get_indicators(data=data_patterns,ticker_column=ticker_column,
    price_column=price_column,mute=mute)
    print ("---- Indicators Attached")
    print ("Creating Labels")
    if train is True:
        data_labelled = get_labels(data=data_indicators,ticker_column=ticker_column,
        price_column="close", fwd_window=fwd_window,threshold_perc=1,mute=mute)
        print("---- All Labels Attached")
        return data_labelled
    else:
        return data_indicators


In [8]:
#Getting Pattern based signals

def get_pattern_signals(tickers=None,top=5):
    d=pd.DataFrame()
    start_date = date.today() - timedelta(days=300)
    start_date= start_date.strftime('%m/%d/%Y')
    end_date = date.today().strftime('%m/%d/%Y')
    for i in tickers:
        
        a = si.get_data(ticker = i, start_date=start_date,end_date=end_date)
        z = get_patterns(data=a,mute=True).iloc[-1,6:]
        
        d= pd.concat((d,z),axis=1)
        print(i + "   ",end="\r")
    d.columns = d.iloc[0]
    d=d.drop("ticker")
    d=d.sum()/d.count()
    d=d.sort_values(ascending=False)
    top_buy = d[d>0].sort_values(ascending=False).head(top)
    top_sell = d[d<0].sort_values(ascending=True).tail(top)
    print("Top Buy Ideas")
    print(top_buy)
    print("Top Sell Ideas")
    print(top_sell)
    return top_buy,top_sell


In [9]:
# Getting predictions

def get_signals (data=None,tickers=None,model=None,ticker_column = 'ticker',price_column="close",top=5):
    
    if data is None:
        data=get_hist_data(tickers=tickers,time_window=300,delta=0)
        
    else:
        data=data
        tickers=data[ticker_column].unique()
   
    result = pd.DataFrame()
    for i in tickers:
        compiled_data = compile_data(data=data[data[ticker_column]==i],ticker_column = 'ticker',
        price_column="close",train=False,mute=True)
        prediction = predict_model(model,data=compiled_data).iloc[-1:,-2:]
        print('Ticker' + i +'is processed')
        result=pd.concat([result,prediction],axis=0)
        print(i + "- Prediction Attached")
    result["ticker"] = tickers
    top_buy = result[result["Label"]>0].sort_values(by=['Score'],ascending=False).head(top)
    top_sell = result[result["Label"]<0].sort_values(by=['Score'],ascending=False).head(top)
    
    return top_buy,top_sell


In [10]:
## Getting Top MCap Tickers

def get_top_tickers (index_name = 'sp500',top=50, tickers = None):
    ''' For index_name, you can choose from the following:\n
        'sp500'   - S&P 500 Index \n
        'dow'     - Dow Jones Industrial Average \n
        'nasdaq'  - Companies listed on Nasdaq\n
        'nifty50' - For NIFTY50\n
        'ftse100' - For FTSE100\n
        'ftse250' - For FTSE250\n
        Or for any other index not listed above, a list of all the tickers can be passed to 
        filter out Top N companies by Market Capitalization '''
    mcap=[]
    if tickers is None:
        t = "tickers_"+index_name
        tickers = getattr(si,t)()
    else:
        pass
    for i in tickers:
        a= si.get_quote_data(i)
        b=round(a['sharesOutstanding']*a['regularMarketPrice']/1000000)
        mcap.append(b)
    mcap_df = pd.DataFrame()
    mcap_df["Tickers"] = tickers
    mcap_df["MCap_mn"] = mcap
    mcap_df=mcap_df.sort_values(by=["MCap_mn"],ascending=False)
    top_mcap_tickers = mcap_df.head(top)["Tickers"]
    return top_mcap_tickers.to_list()


In [11]:
## Getting Historical Bulk Data for Download

def get_hist_data (tickers=None,time_window =2000,start_date=None,delta =1,end_date=None):
    i_list=[]
    data = pd.DataFrame()
    
    if end_date is None:
        end_date = date.today() - timedelta(days=delta)
    else:
        end_date= end_date.strftime('%m/%d/%Y')

    if start_date is None:
        start_date = end_date - timedelta(days= time_window)
        start_date= start_date.strftime('%m/%d/%Y')
    else:
        start_date= start_date.strftime('%m/%d/%Y')
    
    for i in tickers:
        a = si.get_data(ticker = i, start_date=start_date,end_date=end_date)
        data = pd.concat([data,a],axis=0)
        print(i+" Appended Successfully")
        print(data.shape)
        i_list.append(i)
        print(str(len(i_list)) + " of "+ str(len(tickers)) + " Done ")
        data.drop_duplicates(inplace = True)
        data.dropna(inplace=True)
    return data


## 過去データの取り出し

In [12]:
data = get_hist_data(tickers=top_100, time_window=2000, delta=10)


AAPL Appended Successfully
(1378, 7)
1 of 31 Done 
MSFT Appended Successfully
(2756, 7)
2 of 31 Done 
GOOGL Appended Successfully
(4134, 7)
3 of 31 Done 
AMZN Appended Successfully
(5512, 7)
4 of 31 Done 
NVDA Appended Successfully
(6890, 7)
5 of 31 Done 
TSLA Appended Successfully
(8268, 7)
6 of 31 Done 
META Appended Successfully
(9646, 7)
7 of 31 Done 
V Appended Successfully
(11024, 7)
8 of 31 Done 
XOM Appended Successfully
(12402, 7)
9 of 31 Done 
JPM Appended Successfully
(13780, 7)
10 of 31 Done 
WMT Appended Successfully
(15158, 7)
11 of 31 Done 
JNJ Appended Successfully
(16536, 7)
12 of 31 Done 
MA Appended Successfully
(17914, 7)
13 of 31 Done 
AVGO Appended Successfully
(19292, 7)
14 of 31 Done 
PG Appended Successfully
(20670, 7)
15 of 31 Done 
ORCL Appended Successfully
(22048, 7)
16 of 31 Done 
COST Appended Successfully
(23426, 7)
17 of 31 Done 
ADBE Appended Successfully
(24804, 7)
18 of 31 Done 
KO Appended Successfully
(26182, 7)
19 of 31 Done 
SHEL Appended Success

## 評価結果を追加

In [13]:
print(data.shape)
compiled_data = compile_data(data=data, mute=True)
print(compiled_data.shape)


(42718, 7)
Compiling pattern Signals
---- Pattern Signals Attached
Compiling Indicators
---- Indicators Attached
Creating Labels
---- All Labels Attached
(36518, 83)


In [14]:
compiled_data.head()


Unnamed: 0,open,high,low,close,adjclose,volume,ticker,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,...,Perc_From_30D_H,Perc_From_30D_L,Retracement_30D,30D/200D_High,30D/200D_Low,Perc_from 60DEMAVG,Perc_from 30DEMAVG,Perc_from 200DEMAVG,Variand_30D/200D,Change
2019-01-25,38.869999,39.532501,38.580002,39.439999,37.904816,134142000,AAPL,0.0,0.0,0.0,...,-7.715708,10.950131,5.485397,0.736631,1.0,-8.77612,1.281206,-17.731358,0.080552,-0.009255
2019-01-28,38.947498,39.0825,38.415001,39.075001,37.554016,104768400,AAPL,0.0,0.0,0.0,...,-8.569754,9.923342,5.434633,0.736631,1.0,-9.130459,0.619513,-18.458398,0.06803,-0.010365
2019-01-29,39.0625,39.532501,38.5275,38.669998,37.164776,166348800,AAPL,0.0,0.0,-1.0,...,-6.858562,8.784014,6.477385,0.715603,1.0,-9.558585,-0.074511,-19.262573,0.05119,0.068335
2019-01-30,40.8125,41.537498,40.057499,41.3125,39.704426,244439200,AAPL,0.0,0.0,0.0,...,-0.493772,16.217735,6.920015,0.715603,1.0,-2.870869,6.759152,-13.724061,0.050684,0.007201
2019-01-31,41.5275,42.25,41.139999,41.610001,39.990345,162958400,AAPL,0.0,0.0,0.0,...,0.0,17.054645,6.863505,0.717197,1.0,-1.633917,7.47009,-13.081486,0.053802,0.000481


In [15]:
# 分析に用いるために、オリジナルの株価データの列を削除して、各種指標(説明変数)とChange(目的変数)のみにする
df1 = compiled_data.drop(["open","high","low","close","adjclose","volume","ticker"],axis=1)
df1 = df1.reset_index(drop=True)
df1.head()


Unnamed: 0,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,CDL3OUTSIDE,CDL3STARSINSOUTH,CDL3WHITESOLDIERS,CDLABANDONEDBABY,CDLADVANCEBLOCK,CDLBELTHOLD,...,Perc_From_30D_H,Perc_From_30D_L,Retracement_30D,30D/200D_High,30D/200D_Low,Perc_from 60DEMAVG,Perc_from 30DEMAVG,Perc_from 200DEMAVG,Variand_30D/200D,Change
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-7.715708,10.950131,5.485397,0.736631,1.0,-8.77612,1.281206,-17.731358,0.080552,-0.009255
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-8.569754,9.923342,5.434633,0.736631,1.0,-9.130459,0.619513,-18.458398,0.06803,-0.010365
2,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-6.858562,8.784014,6.477385,0.715603,1.0,-9.558585,-0.074511,-19.262573,0.05119,0.068335
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.493772,16.217735,6.920015,0.715603,1.0,-2.870869,6.759152,-13.724061,0.050684,0.007201
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,17.054645,6.863505,0.717197,1.0,-1.633917,7.47009,-13.081486,0.053802,0.000481


## 回帰分析

In [17]:
X = df1.drop('Change', axis=1)
y = df1['Change']
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:                 Change   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                     3.571
Date:                Thu, 12 Oct 2023   Prob (F-statistic):           1.15e-21
Time:                        09:06:03   Log-Likelihood:                 88069.
No. Observations:               36518   AIC:                        -1.760e+05
Df Residuals:                   36448   BIC:                        -1.754e+05
Df Model:                          69                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                  -0.0285    