### 상대모멘텀

- s = 종목, p = 산다, r = 판다
- return 값이 음수가 나오는 경우는 종목들이 다 -이고, 그 중에서 그나마 나은걸 산 것

- 과거의 데이터에 첫 날과 마지막 날의 종가를 비교해서 수익률이 높은 종목을 산다.
- 샀던 종목이 높은 종목이면 그대로 놔두고, 다른 종목이면 자동으로 팔고 다른 종목을 산다.

In [735]:
import os
import pandas as pd
import numpy as np
import datetime
import glob

In [736]:
# 파일 목록 불러오기

files = glob.glob('./csv/*.csv')           # '*' 표시는 해당 파일에서 모두 불러온다는 뜻

files

['./csv/AMZN.csv', './csv/BND.csv', './csv/AAPL.csv']

In [737]:
# 'month_last_df'라는 빈 데이터프레임 지정(컬럼 이름만 지정해둠)

month_last_df = pd.DataFrame(columns = ['Date', 'CODE', '1M_RET'])

month_last_df

Unnamed: 0,Date,CODE,1M_RET


In [738]:
# 종목 데이터프레임 생성

stock_df = pd.DataFrame(columns = ['Date', 'CODE', 'Adj Close'])

In [739]:
for file in files :

    if os.path.isdir(file) :

        print('%s <DIR>' %file)
    
    else :

        folder, name = os.path.split(file)
        head, tail = os.path.splitext(name)

        print(folder, name, head, tail)

        read_df = pd.read_csv(file)

        print(read_df.head(1))

./csv AMZN.csv AMZN .csv
         Date    Open  High       Low     Close  Adj Close    Volume
0  1997-05-15  2.4375   2.5  1.927083  1.958333   1.958333  72156000
./csv BND.csv BND .csv
         Date   Open       High   Low      Close  Adj Close  Volume
0  2007-04-10  75.07  75.260002  75.0  75.239998  51.523693   35000
./csv AAPL.csv AAPL .csv
         Date      Open      High       Low     Close  Adj Close       Volume
0  1980-12-12  0.513393  0.515625  0.513393  0.513393   0.410525  117258400.0


In [740]:
def data_preprocessing(sample, ticker, base_date) :

    # sample = AAPL.csv를 읽은 후 데이터프레임화
    # ticker = AAPL
    # base_date = '2010-01-01'
    
    sample['CODE'] = ticker     # 종목코드 추가

    sample = sample[sample['Date'] >= base_date][ ['Date', 'CODE', 'Adj Close'] ]

    # 'inplace = True'는 기준이 되는 데이터프레임을 수정
    # 'drop = True'는 기존 인덱스를 삭제
    sample.reset_index(inplace = True, drop = True)

    # 기준년월
    sample['STD-YM'] = sample['Date'].map(lambda x : datetime.datetime.strptime(x, '%Y-%m-%d').strftime('%Y-%m'))
    # 수익률
    sample['1M_RET'] = 0.0

    # 기준년월의 중복 데이터를 삭제한 리스트를 출력
    ym_keys = list(sample['STD-YM'].unique())

    return sample, ym_keys

In [741]:
df = pd.read_csv('./csv/AAPL.csv')

price_df, ym_keys = data_preprocessing(df, 'AAPL', '2010-01-01')

print(price_df)
print(ym_keys)

            Date  CODE   Adj Close   STD-YM  1M_RET
0     2010-01-04  AAPL   26.782711  2010-01     0.0
1     2010-01-05  AAPL   26.829010  2010-01     0.0
2     2010-01-06  AAPL   26.402260  2010-01     0.0
3     2010-01-07  AAPL   26.353460  2010-01     0.0
4     2010-01-08  AAPL   26.528664  2010-01     0.0
...          ...   ...         ...      ...     ...
2379  2019-06-18  AAPL  198.449997  2019-06     0.0
2380  2019-06-19  AAPL  197.869995  2019-06     0.0
2381  2019-06-20  AAPL  199.460007  2019-06     0.0
2382  2019-06-21  AAPL  198.779999  2019-06     0.0
2383  2019-06-24  AAPL  199.169998  2019-06     0.0

[2384 rows x 5 columns]
['2010-01', '2010-02', '2010-03', '2010-04', '2010-05', '2010-06', '2010-07', '2010-08', '2010-09', '2010-10', '2010-11', '2010-12', '2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06', '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12', '2012-01', '2012-02', '2012-03', '2012-04', '2012-05', '2012-06', '2012-07', '2012-08'

In [742]:
def create_trade_book(sample, sample_codes) :

    book = pd.DataFrame()
    book = sample[sample_codes]
    book['STD-YM'] = book.index.map(lambda x : datetime.datetime.strptime(x, '%Y-%m-%d').strftime('%Y-%m'))

    for i in sample_codes :
        
        book['p ' + i] = ''
        book['r ' + i] = ''

    return book

In [743]:
# trading 함수 생성

def tradings(book, s_codes) :

    std_ym = ''
    buy_phase = ''

    for i in s_codes :

        print(i)

        # 종목코드의 인덱스를 기준으로 반복
        for j in book.index :

            if book.loc[j, 'p ' + i] == '' and book.shift(1).loc[j, 'p ' + i] == 'ready ' + i :

                std_ym = book.loc[j, 'STD-YM']
                buy_phase = True


            if book.loc[j, 'p ' + i] == '' and book.loc[j, 'STD-YM'] == std_ym and buy_phase == True :

                book.loc[j, 'p ' + i] = 'buy ' + i


            if book.loc[j, 'p ' + i] == '' :        # 반복문 빠져나오기

                std_ym = None
                buy_phase = False            

    return book

In [744]:
def multi_returns(book, s_codes) :

    rtn = 1.0
    num = len(s_codes)
    buy_dict = {}
    sell_dict = {}

    for i in book.index :

        for s in s_codes :        # s는 종목 ex) AAPL, AMZN..

            if book.loc[i, 'p ' + s] == 'buy ' + s and \
            book.shift(1).loc[i, 'p ' + s] == 'ready ' + s and \
            book.shift(2).loc[i, 'p ' + s] == '' :          # 중복매수 방지

                buy_dict[s] = book.loc[i, s]

                print('진입일: ', i, '종목코드: ', s, 'long 진입 가격: ', buy_dict[s])

            elif book.loc[i, 'p ' + s] == '' and \
                book.shift(1).loc[i, 'p ' + s] == 'buy ' + s :

                sell_dict[s] = book.loc[i, s]

                # 손익계산
                rtn = (sell_dict[s] / buy_dict[s]) - 1
                book.loc[i, 'r ' + s] = rtn

                print('개별 청산일: ', i, '종목코드: ', s, 'long 진입 가격: ', \
                    buy_dict[s], 'long 청산 가격: ', sell_dict[s], 'return: ', round(rtn * 100, 2))


            if book.loc[i, 'p ' + s] == '' :

                buy_dict[s] = 0.0
                sell_dict[s] = 0.0

    acc_rtn = 1.0

    for i in book.index :

        rtn = 0.0
        count = 0

        for s in s_codes :

            if book.loc[i, 'p ' + s] == '' and \
                book.shift(1).loc[i, 'p ' + s] == 'buy ' + s :

                count += 1
                rtn += book.loc[i, 'r ' + s]


        if (rtn != 0.0) and (count != 0) :

            acc_rtn *= (rtn / count) + 1

            print('누적 청산일: ', i, '청산 종목 수: ', count, \
                '청산 수익률: ', round((rtn / count), 4), '누적 수익률: ', round(acc_rtn, 4))

            book.loc[i, 'acc_rtn'] = acc_rtn

    print('누적 수익률: ', round(acc_rtn, 4))

In [745]:
for file in files :
    
    if os.path.isdir(file) :

        print('%s <DIR>' %file)
    
    else :

        # file = './csv\AAPL.csv'
        # split() → folder = './csv', name = 'AAPL.csv'
        # splitext() → head = 'AAPL', tail = '.csv'

        folder, name = os.path.split(file)
        head, tail = os.path.splitext(name)

        #print(folder, name, head, tail)

        read_df = pd.read_csv(file, encoding = 'CP949')

        #print(read_df.head(1))
        
        price_df, ym_keys = data_preprocessing(read_df, head, '2010-01-01')

        stock_df = pd.concat([stock_df, price_df[['Date', 'CODE', 'Adj Close']]])

        for ym in ym_keys :

            # 기준년월의 마지막 종가 / 기준년월의 첫 종가를 나누는 기준

            # m_ret이라는 변수를 만들어서
            # ym = '2020-01'

            # [price_df['STD-YM'] == '2020-01] → 'STD-YM'의 값이 2020-01이면 T, 아니면 F
            # price_df.loc[price_df[price_df['STD-YM'] == '2020-01'] → [] 안의 조건식이 T 출력, F 미출력
            # price_df.loc[price_df[price_df['STD-YM'] == '2020-01'].index[-1] → 'STD-YM'의 값이 2020-01인 값들 중에 가장 마지막에 있는 인덱스 값
            
            # price_df.loc[price_df[price_df['STD-YM'] == '2020-01'].index[-1], 'Adj Close']
            # price_df.loc[index, col] → index = price_df[price_df['STD-YM'] == '2020-01].index[-1], col = Adj Close

            # 결과적으로 price_df에 있는 STD-YM 칼럼의 값이
            # 2020-01인 행들 중에 가장 마지막에 있는 행의 Adj Close 값

            # 기준년월의 제일 마지막에 있는 값에 인덱스
            m_ret = price_df.loc[price_df[price_df['STD-YM'] == ym].index[-1], 'Adj Close'] \
                    / price_df.loc[price_df[price_df['STD-YM'] == ym].index[0], 'Adj Close']

            price_df.loc[price_df['STD-YM'] == ym, ['1M_RET']] = m_ret

            month_last_df = month_last_df.append(price_df.loc[price_df[price_df['STD-YM'] == ym].index[-1], ['Date', 'CODE', '1M_RET']])

  month_last_df = month_last_df.append(price_df.loc[price_df[price_df['STD-YM'] == ym].index[-1], ['Date', 'CODE', '1M_RET']])
  month_last_df = month_last_df.append(price_df.loc[price_df[price_df['STD-YM'] == ym].index[-1], ['Date', 'CODE', '1M_RET']])
  month_last_df = month_last_df.append(price_df.loc[price_df[price_df['STD-YM'] == ym].index[-1], ['Date', 'CODE', '1M_RET']])
  month_last_df = month_last_df.append(price_df.loc[price_df[price_df['STD-YM'] == ym].index[-1], ['Date', 'CODE', '1M_RET']])
  month_last_df = month_last_df.append(price_df.loc[price_df[price_df['STD-YM'] == ym].index[-1], ['Date', 'CODE', '1M_RET']])
  month_last_df = month_last_df.append(price_df.loc[price_df[price_df['STD-YM'] == ym].index[-1], ['Date', 'CODE', '1M_RET']])
  month_last_df = month_last_df.append(price_df.loc[price_df[price_df['STD-YM'] == ym].index[-1], ['Date', 'CODE', '1M_RET']])
  month_last_df = month_last_df.append(price_df.loc[price_df[price_df['STD-YM'] == ym].index[-1], ['Date', 'COD

In [746]:
month_last_df.head(10)

Unnamed: 0,Date,CODE,1M_RET
18,2010-01-29,AMZN,0.936595
37,2010-02-26,AMZN,0.996046
60,2010-03-31,AMZN,1.090172
81,2010-04-30,AMZN,1.040134
101,2010-05-28,AMZN,0.912503
123,2010-06-30,AMZN,0.886563
144,2010-07-30,AMZN,1.062455
166,2010-08-31,AMZN,1.039644
187,2010-09-30,AMZN,1.185448
208,2010-10-29,AMZN,1.074946


In [747]:
month_ret_df = month_last_df.pivot('Date', 'CODE', '1M_RET')

month_ret_df

CODE,AAPL,AMZN,BND
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-29,0.897435,0.936595,1.011692
2010-02-26,1.050789,0.996046,1.003909
2010-03-31,1.124456,1.090172,0.996728
2010-04-30,1.106454,1.040134,1.011023
2010-05-28,0.964445,0.912503,1.011821
...,...,...,...
2019-02-28,1.044274,1.008363,1.001883
2019-03-29,1.085615,1.065214,1.022547
2019-04-30,1.049310,1.061917,1.003719
2019-05-31,0.834810,0.928617,1.018200


In [748]:
# 투자종목 선택할 rank

month_ret_df = month_ret_df.rank(axis = 1, ascending = False, method = 'max', pct = True)

In [749]:
# 상위 40%에 드는 종목을 체크

month_ret_df = month_ret_df.where(month_ret_df < 0.4, np.nan).fillna(0)

month_ret_df[month_ret_df != 0] = 1

month_ret_df

CODE,AAPL,AMZN,BND
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-29,0.0,0.0,1.0
2010-02-26,1.0,0.0,0.0
2010-03-31,1.0,0.0,0.0
2010-04-30,1.0,0.0,0.0
2010-05-28,0.0,0.0,1.0
...,...,...,...
2019-02-28,1.0,0.0,0.0
2019-03-29,1.0,0.0,0.0
2019-04-30,0.0,1.0,0.0
2019-05-31,0.0,0.0,1.0


In [750]:
stock_list = list(stock_df['CODE'].unique())

stock_list

['AMZN', 'BND', 'AAPL']

In [751]:
# 비어있는 딕셔너리 형태 데이터

sig_dict = dict()

for date in month_ret_df.index :

    ticker_list = list(month_ret_df.loc[date, month_ret_df.loc[date, :] >= 1.0].index)

    # 날짜별로 종목코드 저장
    sig_dict[date] = ticker_list

stock_c_matrix = stock_df.pivot('Date', 'CODE', 'Adj Close')

book = create_trade_book(stock_c_matrix, stock_list)

book.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  book['STD-YM'] = book.index.map(lambda x : datetime.datetime.strptime(x, '%Y-%m-%d').strftime('%Y-%m'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  book['p ' + i] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  book['r ' + i] = ''
A value is trying to be set on a copy of a slice from a DataF

CODE,AMZN,BND,AAPL,STD-YM,p AMZN,r AMZN,p BND,r BND,p AAPL,r AAPL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2010-01-04,133.899994,60.611969,26.782711,2010-01,,,,,,
2010-01-05,134.690002,60.789135,26.82901,2010-01,,,,,,
2010-01-06,132.25,60.766037,26.40226,2010-01,,,,,,
2010-01-07,130.0,60.719822,26.35346,2010-01,,,,,,
2010-01-08,133.520004,60.78141,26.528664,2010-01,,,,,,


In [752]:
# 반복문이 처음 시작하는 경우
# date = 2010-01-29
# values = ['BND']
# stock = 'BND'(리스트에서 빠져나온 str 형태의 데이터로 바뀜)
# bool.loc['2010-01-29', 'p BND'] = 'readyBND'

for date, values in sig_dict.items() :

    for stock in values :

        book.loc[date, 'p ' + stock] = 'ready ' + stock 

book.value_counts('p AAPL')

p AAPL
              2345
ready AAPL      39
dtype: int64

In [753]:
stock_list

['AMZN', 'BND', 'AAPL']

In [754]:
book = tradings(book, stock_list)

AMZN
BND
AAPL


In [755]:
multi_returns(book, stock_list)

진입일:  2010-02-01 종목코드:  BND long 진입 가격:  61.280487
개별 청산일:  2010-03-01 종목코드:  BND long 진입 가격:  61.280487 long 청산 가격:  61.585163 return:  0.5
진입일:  2010-03-01 종목코드:  AAPL long 진입 가격:  26.154476
진입일:  2010-06-01 종목코드:  BND long 진입 가격:  62.79007
개별 청산일:  2010-06-01 종목코드:  AAPL long 진입 가격:  26.154476 long 청산 가격:  32.642082 return:  24.8
진입일:  2010-08-02 종목코드:  AMZN long 진입 가격:  120.07
개별 청산일:  2010-08-02 종목코드:  BND long 진입 가격:  62.79007 long 청산 가격:  64.18457 return:  2.22
개별 청산일:  2011-02-01 종목코드:  AMZN long 진입 가격:  120.07 long 청산 가격:  172.110001 return:  43.34
진입일:  2011-02-01 종목코드:  AAPL long 진입 가격:  43.179466
진입일:  2011-04-01 종목코드:  AMZN long 진입 가격:  180.130005
개별 청산일:  2011-04-01 종목코드:  AAPL long 진입 가격:  43.179466 long 청산 가격:  43.120655 return:  -0.14
개별 청산일:  2011-06-01 종목코드:  AMZN long 진입 가격:  180.130005 long 청산 가격:  192.399994 return:  6.81
진입일:  2011-06-01 종목코드:  BND long 진입 가격:  66.480408
진입일:  2011-07-01 종목코드:  AMZN long 진입 가격:  209.490005
개별 청산일:  2011-07-01 종목코드:  BND long 진입 가

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  book.loc[i, 'acc_rtn'] = acc_rtn
