In [24]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
import os

In [26]:
raw_csv_data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Data/삼성전자_3M_NonST_Version1.csv", encoding='CP949')

df_comp = raw_csv_data.copy()
df_comp = df_comp.drop(df_comp.columns[0], axis=1)
df_comp

Unnamed: 0,날짜,종가
0,2021-05-01 00:00:00,81700.0
1,2021-05-01 01:00:00,81700.0
2,2021-05-01 02:00:00,81700.0
3,2021-05-01 03:00:00,81700.0
4,2021-05-01 04:00:00,81700.0
...,...,...
2180,2021-07-30 20:00:00,78600.0
2181,2021-07-30 21:00:00,78600.0
2182,2021-07-30 22:00:00,78600.0
2183,2021-07-30 23:00:00,78600.0


In [27]:
# Date가 더 이상 텍스트가 아닌 실제 시간으로 저장된다.

df_comp.날짜 = pd.to_datetime(df_comp.날짜, dayfirst = True)

df_comp.head()

Unnamed: 0,날짜,종가
0,2021-05-01 00:00:00,81700.0
1,2021-05-01 01:00:00,81700.0
2,2021-05-01 02:00:00,81700.0
3,2021-05-01 03:00:00,81700.0
4,2021-05-01 04:00:00,81700.0


In [28]:
df_comp.set_index('날짜', inplace=True)

df_comp.head()

Unnamed: 0_level_0,종가
날짜,Unnamed: 1_level_1
2021-05-01 00:00:00,81700.0
2021-05-01 01:00:00,81700.0
2021-05-01 02:00:00,81700.0
2021-05-01 03:00:00,81700.0
2021-05-01 04:00:00,81700.0


In [29]:
df_comp = df_comp[['종가']]

df = df_comp

df

Unnamed: 0_level_0,종가
날짜,Unnamed: 1_level_1
2021-05-01 00:00:00,81700.0
2021-05-01 01:00:00,81700.0
2021-05-01 02:00:00,81700.0
2021-05-01 03:00:00,81700.0
2021-05-01 04:00:00,81700.0
...,...
2021-07-30 20:00:00,78600.0
2021-07-30 21:00:00,78600.0
2021-07-30 22:00:00,78600.0
2021-07-30 23:00:00,78600.0


In [30]:
# ADF Test

# ADF 검정은 시계열이 안정적인지 여부를 확인하는데 이용되는 방법이다.
# Null hypothesis : 기각 실패시 시계열이 안정적이지 않다.
# Alternative hypothesis : 귀무 가설이 기각되고 시계열이 안정적이다.
# p-value > 0.05 이므로 귀무 가설을 기각할 수 없다. 따라서 시계열은 안정적이지 않다.

n_obs = 20
df_train, df_test = df[0:-n_obs], df[-n_obs:]

from statsmodels.tsa.stattools import adfuller

def adf_test(df):
    result = adfuller(df.values)
    print('ADF Statistics: %f' % result[0])
    print('p-value: %f' % result[1])
    print('Critical values:')
    for key, value in result[4].items():
        print('\t%s: %.3f' % (key, value))
    
print('ADF Test: Samsung 3M 종가 Dataset')
adf_test(df['종가'])

ADF Test: Samsung 3M 종가 Dataset
ADF Statistics: -2.412502
p-value: 0.138239
Critical values:
	1%: -3.433
	5%: -2.863
	10%: -2.567


In [31]:
# KPSS Test

from statsmodels.tsa.stattools import kpss

# KPSS 검정은 시계열이 평균 또는 선형 추세 주변에 고정되어 있는지 또는 단위 루트로 인해 고정되지 않은지 확인한다.
# Null hypothesis : 시계열이 안정적이다.
# Alternative hypothesis : 시계열이 안정적이지 않다.
# p-value < 0.05 이므로 귀무 가설을 기각하며 시계열은 안정적이지 않다.

def kpss_test(df):
    statistic, p_value, n_lags, critical_values = kpss(df.values)
    
    print(f'KPSS Statistic: {statistic}')
    print(f'p-value: {p_value}')
    print(f'num lags: {n_lags}')
    print('Critial Values:')
    for key, value in critical_values.items():
        print(f'{key} : {value}')
        
print('KPSS Test: Samsung 3M 종가 Dataset')
kpss_test(df['종가'])

KPSS Test: Samsung 3M 종가 Dataset
KPSS Statistic: 2.616896546078708
p-value: 0.01
num lags: 26
Critial Values:
10% : 0.347
5% : 0.463
2.5% : 0.574
1% : 0.739




In [14]:
raw_csv_data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Data/삼성전자_3M_ST_Version1.csv", encoding='CP949')

df_comp = raw_csv_data.copy()
df_comp = df_comp.drop(df_comp.columns[0], axis=1)
df_comp

Unnamed: 0,날짜,종가
0,2021-05-01 0:00,0.611995
1,2021-05-01 1:00,0.611995
2,2021-05-01 2:00,0.611995
3,2021-05-01 3:00,0.611995
4,2021-05-01 4:00,0.611995
...,...,...
2180,2021-07-30 20:00,-0.127065
2181,2021-07-30 21:00,-0.127065
2182,2021-07-30 22:00,-0.127065
2183,2021-07-30 23:00,-0.127065


In [15]:
# Date가 더 이상 텍스트가 아닌 실제 시간으로 저장된다.

df_comp.날짜 = pd.to_datetime(df_comp.날짜, dayfirst = True)

df_comp.head()

Unnamed: 0,날짜,종가
0,2021-05-01 00:00:00,0.611995
1,2021-05-01 01:00:00,0.611995
2,2021-05-01 02:00:00,0.611995
3,2021-05-01 03:00:00,0.611995
4,2021-05-01 04:00:00,0.611995


In [16]:
df_comp.set_index('날짜', inplace=True)

df_comp.head()

Unnamed: 0_level_0,종가
날짜,Unnamed: 1_level_1
2021-05-01 00:00:00,0.611995
2021-05-01 01:00:00,0.611995
2021-05-01 02:00:00,0.611995
2021-05-01 03:00:00,0.611995
2021-05-01 04:00:00,0.611995


In [17]:
df_comp = df_comp[['종가']]

df = df_comp

df

Unnamed: 0_level_0,종가
날짜,Unnamed: 1_level_1
2021-05-01 00:00:00,0.611995
2021-05-01 01:00:00,0.611995
2021-05-01 02:00:00,0.611995
2021-05-01 03:00:00,0.611995
2021-05-01 04:00:00,0.611995
...,...
2021-07-30 20:00:00,-0.127065
2021-07-30 21:00:00,-0.127065
2021-07-30 22:00:00,-0.127065
2021-07-30 23:00:00,-0.127065


In [22]:
# ADF Test

# ADF 검정은 시계열이 안정적인지 여부를 확인하는데 이용되는 방법이다.
# Null hypothesis : 기각 실패시 시계열이 안정적이지 않다.
# Alternative hypothesis : 귀무 가설이 기각되고 시계열이 안정적이다.
# p-value < 0.05 이므로 시계열은 안정적이다.

n_obs = 20
df_train, df_test = df[0:-n_obs], df[-n_obs:]

from statsmodels.tsa.stattools import adfuller

def adf_test(df):
    result = adfuller(df.values)
    print('ADF Statistics: %f' % result[0])
    print('p-value: %f' % result[1])
    print('Critical values:')
    for key, value in result[4].items():
        print('\t%s: %.3f' % (key, value))
    
print('ADF Test: Samsung 3M 수익률 Dataset')
adf_test(df['종가'])

ADF Test: Samsung 3M 수익률 Dataset
ADF Statistics: -7.333311
p-value: 0.000000
Critical values:
	1%: -3.433
	5%: -2.863
	10%: -2.567


In [23]:
# KPSS Test

from statsmodels.tsa.stattools import kpss

# KPSS 검정은 시계열이 평균 또는 선형 추세 주변에 고정되어 있는지 또는 단위 루트로 인해 고정되지 않은지 확인한다.
# Null hypothesis : 시계열이 안정적이다.
# Alternative hypothesis : 시계열이 안정적이지 않다.
# p-value > 0.05 이므로 시계열은 안정적이다.

def kpss_test(df):
    statistic, p_value, n_lags, critical_values = kpss(df.values)
    
    print(f'KPSS Statistic: {statistic}')
    print(f'p-value: {p_value}')
    print(f'num lags: {n_lags}')
    print('Critial Values:')
    for key, value in critical_values.items():
        print(f'{key} : {value}')
        
print('KPSS Test: Samsung 3M 수익률 Dataset')
kpss_test(df['종가'])

KPSS Test: Samsung 3M 수익률 Dataset
KPSS Statistic: 0.35114602671234807
p-value: 0.09821291952053962
num lags: 26
Critial Values:
10% : 0.347
5% : 0.463
2.5% : 0.574
1% : 0.739


