In [None]:
# matplotlib rebuild
import matplotlib as mpl
import FinanceDataReader as fdr
import pandas as pd
import numpy as np
import pandas_datareader as pdr
import yfinance as yf
import datetime

# mpl.font_manager._rebuild()

# matplotlib 환경 설정
import matplotlib.pyplot as plt
mpl.rcParams['font.family'] = 'Malgun Gothic' # window
plt.rcParams["axes.grid"] = True
plt.rcParams["figure.figsize"] = (12,6)
plt.rcParams["axes.formatter.useoffset"] = False
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams["axes.formatter.limits"] = -10000, 10000


## 데이터 가져오기

### Treasury pandas core ndarray dataframe 형태

In [None]:
dx = ['DX-Y.NYB']
# commodity_symbol = "GC=F"  # Gold symbol
start_date = "2020-01-01"
end_date = "2023-06-27"

for tn in dx:
    data_dx = yf.download(dx, start=start_date, end=end_date)['Close']
print(data_dx.tail(10))

In [None]:
Treasury_list = ['^TNX','^TYX']
# commodity_symbol = "GC=F"  # Gold symbol
start_date = "2020-01-01"
end_date = "2023-06-27"

for tn in Treasury_list:
    data_treasury = yf.download(Treasury_list, start=start_date, end=end_date)['Close']
print(data_treasury.tail(10))

### ETF pandas core ndarray dataframe 

In [None]:
Etf_list = ['SOXX','SOXL','QQQ','TQQQ','SPY','TLT']
# commodity_symbol = "GC=F"  # Gold symbol
start_date = "2020-01-01"
end_date = "2023-06-27"

for etf in Etf_list:
    data_etf = yf.download(Etf_list, start=start_date, end=end_date)['Close']
print(data_etf)

### 개별 pandas core ndarray dataframe 병합

In [None]:
df_list = [data_dx, data_treasury]
merged_df = pd.concat(df_list, axis=1)
merged_df

In [None]:
merged_df = merged_df.dropna()
merged_df.isnull()

usdkrw = fdr.DataReader('USD/KRW', '2020')
usdkrw['Close'].tail()

usdeur = fdr.DataReader('USD/EUR', '2020')
usdeur['Close'].tail()

usdjpy = fdr.DataReader('USD/JPY', '2020')
usdjpy['Close'].tail()

eurjpy = fdr.DataReader('EUR/JPY', '2020')
eurjpy['Close'].tail()

usdgbp = fdr.DataReader('USD/GBP', '2020')
usdgbp['Close'].tail()

## 데이터 시각화

In [None]:
merged_df.plot(secondary_y=['^TNX'])

## y 보조축 만들기

In [None]:
df[['SOXL', 'TQQQ']].plot(secondary_y=['TQQQ'])

## 비교를 위해 데이터 정규화
- 전체 데이터를 첫번째 row로 나누고 -1.0

In [None]:
df2 = df[['LG화학', '삼성전자']]

df_plot = df2 / df2.iloc[0] - 1.0
df_plot.plot()

## 여러 종목 정규화 및 비교 차트

In [None]:
merged_df_norm = merged_df / merged_df.iloc[0] - 1
merged_df_norm.plot()

In [None]:
merged_df_norm.iloc[-1].sort_values(ascending=False)

## 회귀분석 하기
### DX/TNX,TYX
- 상관관계를 분석해서 업종 지수에 대한 비교 및 특정 종목이 따라가지 못 하는 경우 내.외부 요인 판단 가능

In [None]:
from sklearn.linear_model import LinearRegression #sklearn 싸이킷런에서 회귀모델을 가져오겠다.
import numpy as np
from sklearn.impute import SimpleImputer

In [None]:
X = merged_df.iloc[:,np.ravel(0)].values # SOXL
y = merged_df.iloc[:,-1].values # ^TNX
X,y
# len(X), len(y)

In [None]:
y.shape, X.shape

In [None]:
imputer = SimpleImputer(strategy='mean')
y = imputer.fit_transform(y)

In [None]:
reg = LinearRegression() # 객체 생성
reg.fit(X,y) # fit이 학습모델 -> 학습하면서 모델생성

In [None]:
y_pred = reg.predict(X) # X에 대한 예측 값
y_pred

In [None]:
X.shape, y.shape

In [None]:
plt.scatter(X,y, color='blue') # 산점도 그래프 (실제 값 그래프)
plt.plot(X, y_pred, color='green')  # 선 그래프 (싸이킷런에서 회귀모델로 학습한 훟 예측한 그래프)
plt.title('^TYX BY DX')
plt.xlabel('DX')
plt.ylabel('^TYX')
plt.show()

In [None]:
print('SOXL에 따른 TNX 예측 값 :', reg.predict([[103]]))
# print('9,8,10시간 공부했을 때 예상 점수 :', reg.predict([[9],[8],[10]]))

In [None]:
reg.coef_, reg.intercept_

## 데이터 세트 분리

In [None]:
from sklearn.model_selection import train_test_split  # 데이터 분리는 sklearn.moder_selection 이고 리니어는 sklearn.linear_modeol import LinearRegression
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)  # 테스트 사이즈를 8:2로 하겠다 test_size = 0.2 random_state = 0 은 그냥 확인용

In [None]:
len(X_train), len(X_test)

In [None]:
len(y_train), len(y_test)

### 분리된 데이터를 통해 모델링

In [None]:
reg = LinearRegression()
reg.fit(X_train, y_train) # 훈련세트로 학습

In [None]:
plt.scatter(X_train,y_train, color='blue') # 산점도 그래프 -> 실제 값?
plt.plot(X_train, reg.predict(X_train), color='green')  # 선 그래프  -> 예측 값?
plt.title('^TNX BY SOXL')
plt.xlabel('SOXL')
plt.ylabel('^TNX')
plt.show()

In [None]:
plt.scatter(X_test,y_test, color='blue') # 산점도 그래프
plt.plot(X_train, reg.predict(X_train), color='green')  # 선 그래프 
plt.title('^TYX BY DX')
plt.xlabel('DX')
plt.ylabel('^TYX')
plt.show()

In [None]:
reg.coef_, reg.intercept_

### 모델평가

In [None]:
reg.score(X_train, y_train)

In [None]:
reg.score(X_test, y_test)

### 경사하강법

In [None]:
from sklearn.linear_model import SGDRegressor # SGF 확률적 경사하강법 
#지수 표기법
#1e-3 = 0.001 10^-3
#1e-4 = 0.0001 10^-4
#1e+3 = 1000 10^3
#1e+4 = 10000 10^4


# sr = SGDRegressor()
sr = SGDRegressor(max_iter=1000, eta0=1e-4, random_state=0, verbose=1)  #에포크 방식
sr.fit(X_train, y_train)

In [None]:
plt.scatter(X_test,y_test, color='blue') # 산점도 그래프
plt.plot(X_train, sr.predict(X_train), color='green')  # 선 그래프 
plt.title('하이닉스 BY 삼성전자(train data)')
plt.xlabel('삼성전자')
plt.ylabel('하이닉스')
plt.show()

In [None]:
sr.coef_, sr.intercept_

In [None]:
sr.score(X_test, y_test) # 테스트 세트를 통한 모델평가

In [None]:
sr.score(X_train, y_train) # 룬련 세트를 통한 모델평가

In [None]:
pip install pandas-datareader

In [None]:
import pandas as pd
import numpy as np
import pandas_datareader as pdr
import yfinance as yf
import datetime

commodity_symbol = "GC=F"  # Gold symbol
start_date = "2023-01-01"
end_date = "2023-06-27"

data = yf.download(commodity_symbol, start=start_date, end=end_date)
print(data.tail(10))