In [2]:
import os
import pandas as pd
from pandas_datareader import data
import plotly.express as px
import plotly.graph_objects as go

In [3]:
path_dir = os.getcwd() + '\\기초 데이터\\'
file_list = os.listdir(path_dir)

file_list

['Clarkson 신조선가 지수.xlsx',
 '리튬 가격.csv',
 '미국 10년물 국채 금리 채권 수익율.csv',
 '미국 2년 채권 수익율.csv',
 '미국 소비자물가지수(CPI).csv',
 '미국 연준 금리.csv',
 '한국 경기종합지수.csv',
 '한국 금리.csv']

In [5]:
df_index = pd.read_csv('{0}{1}'.format(path_dir, file_list[6]))

df_index['일자'] = pd.to_datetime(df_index['일자'], format="%Y년 %m월 %d일")

df_index

Unnamed: 0,일자,동행지수,선행지수
0,2022-04-01,102.1,99.3
1,2022-03-01,102.4,99.6
2,2022-02-01,102.6,99.8
3,2022-01-01,102.4,100.1
4,2021-12-01,101.8,100.2
...,...,...,...
623,1970-05-01,103.2,100.5
624,1970-04-01,103.2,100.7
625,1970-03-01,101.9,100.8
626,1970-02-01,101.5,101.0


In [6]:
import plotly.express as px


df_index_test = df_index.drop(columns='일자')

fig = px.imshow(df_index_test, text_auto=True, aspect="auto")
fig.show()

In [7]:
import plotly.graph_objects as go
import plotly.express as px

fig = px.line(df_index, x='일자', y='동행지수', hover_data={"일자": "|%Y %B %d"})

fig.add_trace(go.Scatter(
    name="선행지수",
    mode="markers+lines", x=df_index['일자'], y=df_index["선행지수"],
    xperiod="M1",
    xperiodalignment="middle"
))

fig.update_xaxes(
    rangeslider_visible=False,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)

In [29]:
df_us_rate = pd.read_csv('{0}{1}'.format(path_dir, file_list[5]))
# 데이터 전처리 : 결측치 제거
df_us_rate = df_us_rate.dropna(subset = ['발표'])
df_us_rate['발표일'] = pd.to_datetime(df_us_rate['발표일'], format="%Y-%m-%d")

df_us_rate

Unnamed: 0,발표일,발표,예측,이전
4,2022-06-16,1.75,1.50%,1.00%
5,2022-05-05,1.00,1.00%,0.50%
6,2022-03-17,0.50,0.50%,0.25%
7,2022-01-27,0.25,0.25%,0.25%
8,2021-12-16,0.25,0.25%,0.25%
...,...,...,...,...
265,1982-12-15,8.50,,9.00%
266,1982-11-20,9.00,,9.50%
267,1982-10-08,9.50,,10.00%
268,1982-10-02,10.00,,10.25%


In [12]:
df_ko_rate = pd.read_csv('{0}{1}'.format(path_dir, file_list[7]))
# 데이터 전처리 : 결측치 제거
df_ko_rate = df_ko_rate.dropna(subset = ['발표'])
df_ko_rate['발표일'] = pd.to_datetime(df_ko_rate['발표일'], format="%Y-%m-%d")

df_ko_rate

Unnamed: 0,발표일,발표,예측,이전
1,2022-05-26,1.75,1.75%,1.50%
2,2022-04-14,1.50,1.25%,1.25%
3,2022-02-24,1.25,1.25%,1.25%
4,2022-01-14,1.25,1.25%,1.00%
5,2021-11-25,1.00,1.00%,0.75%
...,...,...,...,...
252,1999-10-01,4.75,,4.75%
253,1999-09-01,4.75,,4.75%
254,1999-08-01,4.75,,4.75%
255,1999-07-01,4.75,,4.75%


In [13]:
from datetime import datetime
import time

now = time.strftime('%Y-%m-%d')
start = datetime(1975,1,1)
end = datetime(2022,6,14)

df_kospi = data.DataReader("KOSPI", "naver", start='1975-01-01', end=now)
df_kospi = df_kospi.reset_index()

df_kospi

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1990-01-04,911.21,933.24,911.21,928.82,18094
1,1990-01-05,926.56,931.56,913.66,915.11,22179
2,1990-01-06,915.13,921.51,912.86,912.86,12519
3,1990-01-08,916.33,920.48,909.22,919.46,17577
4,1990-01-09,923.82,923.82,914.64,920.21,18646
...,...,...,...,...,...,...
8433,2022-06-21,2402.99,2423.48,2385.6,2408.93,479678
8434,2022-06-22,2417.11,2418.05,2342.81,2342.81,532138
8435,2022-06-23,2347.84,2361.23,2306.48,2314.32,749660
8436,2022-06-24,2327.11,2373.48,2319.99,2366.6,654445


In [20]:
df_kospi = df_kospi.astype({'Open':'float', 'High':'float', 'Low':'float', 'Close':'float', 'Volume':'int'})

In [21]:
df_kospi.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8438 entries, 0 to 8437
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    8438 non-null   datetime64[ns]
 1   Open    8438 non-null   float64       
 2   High    8438 non-null   float64       
 3   Low     8438 non-null   float64       
 4   Close   8438 non-null   float64       
 5   Volume  8438 non-null   int32         
dtypes: datetime64[ns](1), float64(4), int32(1)
memory usage: 362.6 KB


In [30]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=df_kospi['Date'], y=df_kospi['Close'], name="지수"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df_ko_rate['발표일'], y=df_ko_rate["발표"], name="한국 금리"),
    secondary_y=True,
)

# fig.add_trace(
#     go.Scatter(x=df_index['일자'], y=df_index["동행지수"], name="경기지수"),
#     secondary_y=True,
# )

fig.update_layout(
    margin=dict(l=20, r=20, t=60, b=20),
)

fig.update_xaxes(
    rangeslider_visible=False,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)

# Add figure title
# fig.update_layout(
#     title_text="Double Y Axis Example"
# )

# Set x-axis title
# fig.update_xaxes(title_text="xaxis title")


In [67]:
path_dir_1 = os.getcwd() + '\\가공 데이터\\'
file_list_1 = os.listdir(path_dir_1)

df_corr = pd.read_csv('{0}{1}'.format(path_dir_1, file_list_1[0]))
df_corr.set_index('일자', inplace=True)

In [68]:
df_corr.info()

<class 'pandas.core.frame.DataFrame'>
Float64Index: 360 entries, 202206.0 to nan
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   종가지수    277 non-null    float64
 1   평균지수    360 non-null    float64
 2   한국금리    276 non-null    float64
 3   미국금리    277 non-null    float64
 4   동행지수    275 non-null    float64
 5   선행지수    275 non-null    float64
dtypes: float64(6)
memory usage: 19.7 KB


In [72]:
df_corr.corr().style.background_gradient()

Unnamed: 0,종가지수,평균지수,한국금리,미국금리,동행지수,선행지수
종가지수,1.0,0.997886,-0.78517,-0.506168,0.184118,-0.039744
평균지수,0.997886,1.0,-0.785675,-0.505074,0.19215,-0.036623
한국금리,-0.78517,-0.785675,1.0,0.685668,0.12743,0.163555
미국금리,-0.506168,-0.505074,0.685668,1.0,0.14044,0.254227
동행지수,0.184118,0.19215,0.12743,0.14044,1.0,0.367815
선행지수,-0.039744,-0.036623,0.163555,0.254227,0.367815,1.0


In [54]:
import plotly.graph_objects as go


fig = go.Figure()

# Add traces
fig.add_trace(
    go.Scatter(x=df_kospi['Date'], y=df_kospi['Close'], name="지수")
)

fig.add_trace(
    go.Scatter(x=df_ko_rate['발표일'], y=df_ko_rate["발표"], name="한국 금리", yaxis="y2")
)

fig.add_trace(
    go.Scatter(x=df_us_rate['발표일'], y=df_us_rate["발표"], name="미국 금리", yaxis="y3")
)

fig.add_trace(
    go.Scatter(x=df_index['일자'], y=df_index["선행지수"], name="경기지수", yaxis="y4")
)

fig.update_layout(
    margin=dict(l=20, r=20, t=50, b=20),
)

# Create axis objects
fig.update_layout(
    xaxis=dict(
        domain=[0.1, 0.9]
    ),
    yaxis=dict(
        # title="yaxis title",
        titlefont=dict(
            color="#1f77b4"
        ),
        tickfont=dict(
            color="#1f77b4"
        )
    ),
    yaxis2=dict(
        # title="yaxis2 title",
        titlefont=dict(
            color="#d62728"
        ),
        tickfont=dict(
            color="#d62728"
        ),
        anchor="free",
        overlaying="y",
        side="left",
        position=0.04
    ),
    yaxis3=dict(
        # title="yaxis3 title",
        titlefont=dict(
            color="#00cc96"
        ),
        tickfont=dict(
            color="#00cc96"
        ),
        anchor="x",
        overlaying="y",
        side="right"
    ),
    yaxis4=dict(
        # title="yaxis4 title",
        titlefont=dict(
            color="#9467bd"
        ),
        tickfont=dict(
            color="#9467bd"
        ),
        anchor="free",
        overlaying="y",
        side="right",
        position=0.95
    )
)

fig.update_xaxes(
    rangeslider_visible=False,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)

fig.show()

In [16]:
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
import numpy as np


testx = pd.DataFrame(df_ko_rate["발표"])
testy = pd.DataFrame(df_index['동행지수'])
testy = testy.reset_index()
# testy = testy.drop(columns=["일자"])

testxydf = pd.concat([testx, testy], axis=1)
testxydf['intercept'] = 1 #intercept(절편) 추가
testxydf = testxydf.replace([np.inf, -np.inf], np.nan) # 무한값(inf) 행 제거
testxydf = testxydf.astype(float)
testxydf = testxydf.dropna(axis=0) # 결측값(NaN) 행(가로) 제거

model1 = sm.OLS(df_ko_rate["발표"], testxydf[['intercept', '동행지수']])
result1 = model1.fit()
print(result1.summary())

                            OLS Regression Results                            
Dep. Variable:                     발표   R-squared:                       0.032
Model:                            OLS   Adj. R-squared:                  0.029
Method:                 Least Squares   F-statistic:                     8.512
Date:                Mon, 27 Jun 2022   Prob (F-statistic):            0.00384
Time:                        10:00:10   Log-Likelihood:                -440.73
No. Observations:                 256   AIC:                             885.5
Df Residuals:                     254   BIC:                             892.6
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
intercept     26.1659      7.940      3.295      0.0