In [1]:
from abc import *
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import statsmodels as sm
import xlwings as xw
import openpyxl
from fredapi import Fred
from datetime import datetime, timedelta
from tqdm import tqdm
import time
import investpy as inv
import sqlite3

class DataMachine(metaclass=ABCMeta):
    def __init__(self):
        self._contents = dict()
        self._startDate = "2000-01-01"
        self._endDate = datetime.now().strftime('%Y-%m-%d')
        self._apiKey = None
        self._dataFrame = None

    @abstractmethod
    def parser(self):
        pass

    @property
    def contents(self):
        return self._contents

    @contents.setter
    def contents(self, value):
        self._contents = value

class FRED(DataMachine):
    """
    ------------------------------------
    FRED 메크로 데이터 크롤링 클래스

                 T10Y    T2Y
    2020-01-01    2.5    1.5
    2020-01-02    2.4    1.3
    2020-01-02    2.3    1.5
    
    Variables:
        apiKey: str, Fred api 사용을 위한 Personal Api Key
        contents: dict, {'사용자 지정 이름': 'FRED 코드'}
        dataFrame: Pandas DataFrame, 데이터 저장 객체
    ------------------------------------
    """
    def __init__(self, apiKey, contents, dataFrame=None):
        super().__init__()
        self._apiKey = apiKey
        self._frd = Fred(api_key=apiKey)
        self.contents(contents)

        if dataFrame != None:
            self._dataFrame = dataFrame

        self.parser()

    def parser(self):
        key = [key for key in self._contents.keys()]
        value = [value for value in self._contents.values()]

        # update
        if self._dataFrame != None:
            tmpFrame = pd.DataFrame()
            lastUpdate = datetime.strftime(self._dataFrame.index[-1]+timedelta(days=1),'%Y-%m-%d')
            for i in tqdm(range(len(value)), desc='Update'):
                tmpFrame[key[i]] = self._frd.get_series(value[i], lastUpdate, self._endDate)
            self._dataFrame = pd.concat([self._dataFrame, tmpFrame])

        #parsing
        else:
            for i in tqdm(range(len(value)), desc='Crawling'):
                self._dataFrame[key[i]] = self._frd.get_series(value[i], self._startDate, self._endDate)

    @property
    def dataFrame(self):
        return self._dataFrame

class KRX(DataMachine):
    """
    ------------------------------------
    KRX 국내 상장 주식, 채권 OHLCV 크롤링 클래스

                  Open   High   Low  Close   Volume
    2020-01-01      1     2      3     4       5
    2020-01-02      1     2      3     4       5
    2020-01-03      1     2      3     4       5
    
    Variables:
        contents: dict, {'사용자 지정 이름': '기업 코드'}
        dataFrame: Pandas DataFrame, 데이터 저장 객체
    ------------------------------------
    """
    def __init__(self, contents, dataFrame=None):
        super().__init__()
        self.contents(contents)

        if dataFrame != None:
            self._dataFrame = dataFrame

    def parser(self):
        key = [key for key in self._contents.keys()]
        value = [value for value in self._contents.values()]

        # update
        if self._dataFrame != None:
            tmpFrame = pd.DataFrame()
            lastUpdate = datetime.strftime(self._dataFrame.index[-1]+timedelta(days=1),'%Y-%m-%d')
            for i in tqdm(range(len(value)), desc='Update'):
                tmpFrame[key[i]] = self._frd.get_series(value[i], lastUpdate, self._endDate)
            self._dataFrame = pd.concat([self._dataFrame, tmpFrame])

        #parsing
        else:
            for i in tqdm(range(len(value)), desc='Crawling'):
                self._dataFrame[key[i]] = self._frd.get_series(value[i], self._startDate, self._endDate)

    @property
    def dataFrame(self):
        return self._dataFrame


In [2]:
from pykrx import stock, bond

stock.get_index_ohlcv_by_date('20000101','20221218','1001')

코스피,시가,고가,저가,종가,거래량,거래대금,상장시가총액
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-01-04,1028.33,1066.18,1016.59,1059.04,195898660,3771013000000,0
2000-01-05,1006.87,1047.70,984.05,986.31,257696170,4798126000000,0
2000-01-06,1013.95,1014.90,953.50,960.79,203523570,3726898000000,0
2000-01-07,949.17,970.16,930.84,948.65,215664150,3696686000000,0
2000-01-10,979.67,994.94,965.02,987.24,240175480,3815037000000,0
...,...,...,...,...,...,...,...
2022-12-12,2373.58,2381.87,2368.47,2373.02,499224684,5822259612423,1875700660827307
2022-12-13,2385.05,2388.27,2364.87,2372.40,447565388,6659361034224,1874838727660788
2022-12-14,2380.81,2400.18,2379.44,2399.25,409799402,5724107192073,1895655040773681
2022-12-15,2383.83,2392.11,2360.95,2360.97,375386441,6134217159398,1865842784271843


In [3]:
a = stock.get_index_ohlcv_by_date('20000101','20221218','1001')['종가']
a

날짜
2000-01-04    1059.04
2000-01-05     986.31
2000-01-06     960.79
2000-01-07     948.65
2000-01-10     987.24
               ...   
2022-12-12    2373.02
2022-12-13    2372.40
2022-12-14    2399.25
2022-12-15    2360.97
2022-12-16    2360.02
Name: 종가, Length: 5669, dtype: float64

In [4]:
import FinanceDataReader as fdr
fdr.DataReader('GC=F','2018-01-01', '2018-03-30')['Adj Close']

Date
2018-01-02    1313.699951
2018-01-03    1316.199951
2018-01-04    1319.400024
2018-01-05    1320.300049
2018-01-08    1318.599976
                 ...     
2018-03-23    1349.300049
2018-03-26    1354.400024
2018-03-27    1341.300049
2018-03-28    1324.199951
2018-03-29    1322.800049
Name: Adj Close, Length: 61, dtype: float64

In [5]:
import pandas as pd
import datetime as dt
import pandas_datareader.data as web

start_time = dt.datetime(2000, 1, 1)
end_time = dt.datetime(2022, 2, 1)
df = web.DataReader('MEI_CTRY_WEIGHTS', 'oecd', start_time, end_time)

print(df.shape)
df.info()
df.head()

(21, 82)
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 21 entries, 2000-01-01 to 2020-01-01
Data columns (total 82 columns):
 #   Column                                                                                                   Non-Null Count  Dtype  
---  ------                                                                                                   --------------  -----  
 0   (Greece, PPI country weights in percentage of OECD Total, in percentage of OECD Total, Annual)           21 non-null     float64
 1   (Greece, CPI country weights in percentage of OECD Total, in percentage of OECD Total, Annual)           21 non-null     float64
 2   (Switzerland, PPI country weights in percentage of OECD Total, in percentage of OECD Total, Annual)      21 non-null     float64
 3   (Switzerland, CPI country weights in percentage of OECD Total, in percentage of OECD Total, Annual)      21 non-null     float64
 4   (Czech Republic, PPI country weights in percentage of OECD 

Country,Greece,Greece,Switzerland,Switzerland,Czech Republic,Czech Republic,Australia,Australia,Netherlands,Netherlands,...,Chile,Chile,Latvia,Latvia,Lithuania,Lithuania,Colombia,Colombia,Costa Rica,Costa Rica
Country weights,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total,...,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total
Measure,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,...,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total,in percentage of OECD Total
Frequency,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual,...,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual
Time,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
2000-01-01,0.719193,0.744075,0.898936,0.823315,0.567801,0.453541,1.83813,1.724117,1.73093,1.487708,...,,0.419814,0.064912,0.060601,0.100797,0.100532,0.896242,0.965766,0.109418,0.113779
2001-01-01,0.747185,0.773662,0.8974,0.820695,0.590974,0.470074,1.871561,1.716521,1.747754,1.473084,...,,0.417201,0.069392,0.063891,0.107511,0.104118,0.89642,0.982731,0.111353,0.112683
2002-01-01,0.779334,0.8138,0.891217,0.811645,0.588295,0.459063,1.898395,1.74556,1.758328,1.489761,...,,0.416122,0.073614,0.067343,0.114061,0.109694,0.898752,0.997795,0.112561,0.118147
2003-01-01,0.795389,0.812828,0.86585,0.791145,0.606634,0.471653,1.94379,1.748197,1.687636,1.41312,...,,0.426609,0.07708,0.070183,0.125528,0.121837,0.916689,1.015396,0.115171,0.124375
2004-01-01,0.801133,0.811302,0.852714,0.779116,0.613069,0.472032,1.945083,1.775367,1.674649,1.39843,...,,0.447028,0.079814,0.074016,0.126391,0.126592,0.93507,1.013443,0.116369,0.126293


In [6]:
country = "Japan"
df_japan_total = df[country]
df_japan_total.head()

Country weights,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total
Measure,in percentage of OECD Total,in percentage of OECD Total
Frequency,Annual,Annual
Time,Unnamed: 1_level_3,Unnamed: 2_level_3
2000-01-01,11.80374,9.457836
2001-01-01,11.66288,9.408059
2002-01-01,11.53052,9.486904
2003-01-01,11.43365,9.516927
2004-01-01,11.31362,9.353314


In [7]:
pd.DataFrame(df_japan_total)

Country weights,PPI country weights in percentage of OECD Total,CPI country weights in percentage of OECD Total
Measure,in percentage of OECD Total,in percentage of OECD Total
Frequency,Annual,Annual
Time,Unnamed: 1_level_3,Unnamed: 2_level_3
2000-01-01,11.80374,9.457836
2001-01-01,11.66288,9.408059
2002-01-01,11.53052,9.486904
2003-01-01,11.43365,9.516927
2004-01-01,11.31362,9.353314
2005-01-01,11.1939,9.413013
2006-01-01,10.90938,9.332022
2007-01-01,10.76494,9.174304
2008-01-01,10.48933,9.095966
2009-01-01,10.19044,9.194797
