In [2]:
import pandas as pd
import statistics as st # 표준편차 계산용 모듈

# 1. Data Manufacturing

In [3]:
bdf = pd.read_csv('./dummyData/testdata.csv',encoding='cp949')
bdf = bdf[['candel_date_time', 'open', 'high', 'low', 'close', 'volume']]
bdf

Unnamed: 0,candel_date_time,open,high,low,close,volume
0,2020-10-11 22:00:00,12936000.0,12962000.0,12910000.0,12941000.0,77.927065
1,2020-10-11 23:00:00,12945000.0,12993000.0,12906000.0,12945000.0,88.762869
2,2020-10-12 00:00:00,12945000.0,12994000.0,12931000.0,12953000.0,61.051636
3,2020-10-12 01:00:00,12968000.0,12977000.0,12921000.0,12950000.0,54.494628
4,2020-10-12 02:00:00,12950000.0,12980000.0,12944000.0,12974000.0,20.422461
...,...,...,...,...,...,...
994,2020-11-22 08:00:00,20309000.0,20460000.0,20200000.0,20351000.0,516.917593
995,2020-11-22 09:00:00,20348000.0,20595000.0,20196000.0,20274000.0,809.041188
996,2020-11-22 10:00:00,20276000.0,20402000.0,19525000.0,20034000.0,1357.390535
997,2020-11-22 11:00:00,20034000.0,20399000.0,20032000.0,20393000.0,496.604723


## 1-1. MA(Moving Average)

In [59]:
# 종가기준 5일 이동평균선
def mac(ndf):
    ndf['MA_C5D'] = 0
    csum = 0
    n = 0
    for i in range(119,len(ndf),1):
        for j in range(0+n,120+n,1):
            csum = csum + ndf['close'][j]
        ma = int(csum / 120)
        ndf['MA_C5D'][i] = ma
        csum = 0
        n = n + 1
    return ndf

## 1-2. MACD(Moving Average Convergence Divergence)

In [7]:
# MACD : 12일 지수이동평균 - 26일 지수이동평균
# 시그널 : MACD의 9일 지수이동평균
# 오실레이터 : MACD값 - 시그널값
def macd(ndf):
    ndf['MACD'] = 0
    i = 0
    for l in range(623, len(ndf), 1):
        if l > 997:
            print(l)
        sum12 = 0   
        sum26 = 0 
        avg12 = 0
        avg26 = 0
        macd = 0
        for n in range(0+i,624+i,1):
            sum26 = sum26 + ndf['close'][n]
        for m in range(336+i,624+i,1):
            sum12 = sum12 + ndf['close'][m]
        avg12 = sum12 / 12
        avg26 = sum26 / 26
        macd = avg12 - avg26
        ndf['MACD'][l] = int(macd)
        i = i + 1
    return ndf

## 1-3. RSI(Relative Strength Index)

In [9]:
# RSI 계산공식
# 가격이 전일 가격보다 상승한 날의 상승분은 U(up) 값이라고 하고,
# 가격이 전일 가격보다 하락한 날의 하락분은 D(down) 값이라고 한다.
# U값과 D값의 평균값을 구하여 그것을 각각 AU(average ups)와 AD(average downs)라 한다.
# AU를 AD값으로 나눈 것을 RS(relative strength) 값이라고 한다. RS 값이 크다는 것은 일정 기간 하락한 폭보다 상승한 폭이 크다는 것을 의미한다.
# RSI = AU / (AU + AD)
# RSI는 15일 기준, 가격은 종가를 기준으로 한다.

def rsi(ndf):
    ndf['RSI'] = 0
    day = 24
    k = 0
    for n in range(360,len(ndf),1):
        # if n >= 998: # check point
        #     print(n)
        U = []
        D = []
        AU = 0
        AD = 0
        RSI = 0
        for m in range(0+k,360+k,day):
            if k >= (len(ndf)-360):
                break
            else:
                value = ndf['close'][m+day] - ndf['close'][m]
            if value >= 0:
                U.append(value)
            else:
                D.append(abs(value))
        if sum(U) == 0 and sum(D) == 0:
            break
        else:
            if len(U) == 0:
                AU = 0
            else:
                AU = sum(U)/len(U)
            if len(D) == 0:
                AD = 0
            else:
                AD = sum(D)/len(D)
            RSI = AU / (AU + AD) * 100
            ndf['RSI'][n] = RSI
            k = k + 1
    return ndf

## 1-4. BB(Bollinger Bands)

In [62]:
# BB 계산공식
# 종가의 20일 이동평균선(MA)을 계산합니다.
# 종가의 20일 표준편차(SD)를 계산합니다.
# 상한선을 계산합니다: 상한선 = MA + (2 x 표준편차)
# 하한선을 계산합니다: 하한선 = MA - (2 x 표준편차)
# 종가기준으로 계산
def bb(ndf):
    ndf['BB+'] = 0
    ndf['BB-'] = 0
    k = 0
    for i in range(479,len(ndf),1):
        al = []
        for j in range(0+k,480+k,1):
            al.append(ndf['close'][j])
        MA = sum(al) / len(al)
        SD = st.stdev(al)
        ndf['BB+'][i] = MA + (2 * SD)
        ndf['BB-'][i] = MA - (2 * SD)
        k = k + 1
    return ndf

## 1-5. Make Data

In [6]:
tdf = bdf.copy()
tdf = macd(tdf)
tdf

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndf['RSI'][n] = RSI


998


Unnamed: 0,candel_date_time,open,high,low,close,volume,RSI
0,2020-10-11 22:00:00,12936000.0,12962000.0,12910000.0,12941000.0,77.927065,0.000000
1,2020-10-11 23:00:00,12945000.0,12993000.0,12906000.0,12945000.0,88.762869,0.000000
2,2020-10-12 00:00:00,12945000.0,12994000.0,12931000.0,12953000.0,61.051636,0.000000
3,2020-10-12 01:00:00,12968000.0,12977000.0,12921000.0,12950000.0,54.494628,0.000000
4,2020-10-12 02:00:00,12950000.0,12980000.0,12944000.0,12974000.0,20.422461,0.000000
...,...,...,...,...,...,...,...
994,2020-11-22 08:00:00,20309000.0,20460000.0,20200000.0,20351000.0,516.917593,65.432630
995,2020-11-22 09:00:00,20348000.0,20595000.0,20196000.0,20274000.0,809.041188,62.951930
996,2020-11-22 10:00:00,20276000.0,20402000.0,19525000.0,20034000.0,1357.390535,66.733552
997,2020-11-22 11:00:00,20034000.0,20399000.0,20032000.0,20393000.0,496.604723,65.721074


In [77]:
tdf.to_csv('test.csv', index=False, encoding='cp949')

# 2. Data Preprocessing

In [3]:
bdf = pd.read_csv('./dummyData/dummydata.csv',encoding='cp949')
bdf.head()

Unnamed: 0,open,high,low,close,volume,candel_date_time
0,8892000.0,8900000.0,8818000.0,8844000.0,187.461975,2020-04-07 20:00:00
1,8844000.0,8880000.0,8839000.0,8839000.0,96.131647,2020-04-07 20:30:00
2,8839000.0,8910000.0,8828000.0,8853000.0,280.787491,2020-04-07 21:00:00
3,8853000.0,8854000.0,8771000.0,8789000.0,263.36859,2020-04-07 21:30:00
4,8790000.0,8822000.0,8783000.0,8800000.0,165.317984,2020-04-07 22:00:00


In [13]:
df = []
bdf.columns
for i in range(0,len(bdf.columns)-1, 1):
    text = str(bdf.columns[i])
    ndf = bdf[['candel_date_time',text]]
    df.append(ndf)

In [16]:
df[2].head()

Unnamed: 0,candel_date_time,low
0,2020-04-07 20:00:00,8818000.0
1,2020-04-07 20:30:00,8839000.0
2,2020-04-07 21:00:00,8828000.0
3,2020-04-07 21:30:00,8771000.0
4,2020-04-07 22:00:00,8783000.0


In [17]:
for j in range(0,len(df),1):
    df[j].to_csv(str(bdf.columns[j])+'.csv',index=False,encoding='cp949')