<h2>해외 지수 수집하기</h2>

In [1]:
import pandas as pd
from urllib.request import urlopen
import json #해외지수는 json형태로 표출

In [2]:
symbol = 'NII@NI225'
page = 1

In [3]:
url = 'https://finance.naver.com/world/worldDayListJson.nhn?symbol='+symbol+'&fdtc=0&page='+str(page)
raw = urlopen(url)
data = json.load(raw)

In [4]:
data[0]

{'clos': 21713.21,
 'diff': 207.9,
 'gvol': 70125300,
 'high': 21722.72,
 'low': 21489.95,
 'open': 21563.64,
 'rate': 0.97,
 'symb': 'NII@NI225',
 'xymd': '20190403'}

In [5]:
data[0]['symb']

'NII@NI225'

In [6]:
data[0]['xymd']

'20190403'

In [7]:
data[0]['clos']

21713.21

In [8]:
len(data)

9

In [9]:
d = dict()
for n in range(len(data)):
    date = pd.to_datetime(data[n]['xymd']).date()
    price = float(data[n]['clos'])
    d[date] = price
print(d)

{datetime.date(2019, 4, 3): 21713.21, datetime.date(2019, 4, 2): 21505.31, datetime.date(2019, 4, 1): 21509.03, datetime.date(2019, 3, 29): 21205.81, datetime.date(2019, 3, 28): 21033.76, datetime.date(2019, 3, 27): 21378.73, datetime.date(2019, 3, 26): 21428.39, datetime.date(2019, 3, 25): 20977.11, datetime.date(2019, 3, 22): 21627.34}


In [10]:
def read_json(d, symbol, page=1):
    url = 'https://finance.naver.com/world/worldDayListJson.nhn?symbol='+symbol+'&fdtc=0&page='+str(page)
    raw = urlopen(url)
    data = json.load(raw)
    
    for n in range(len(data)):
        date = pd.to_datetime(data[n]['xymd']).date()
        price = float(data[n]['clos'])
        d[date] = price
        
    if len(data) == 10 and page<3:
        page += 1
        read_json(d, symbol, page)
        
    return (d)

In [11]:
historical_index = pd.Series()
historical_index = read_json(historical_index, symbol, page)

In [12]:
historical_index.head()

2019-04-03    21713.21
2019-04-02    21505.31
2019-04-01    21509.03
2019-03-29    21205.81
2019-03-28    21033.76
dtype: float64

In [13]:
indices = {
    'SPI@SPX' : 'S&P 500', 
    'NAS@NDX' : 'Nasdaq 100', 
    'NII@NI225' : 'Nikkei 225'
}

In [14]:
historical_indices = dict()
for key, value in indices.items():
    print (key, value)
    s = dict()
    s = read_json(s, key, 1)
    historical_indices[value] = s
prices_df = pd.DataFrame(historical_indices)

SPI@SPX S&P 500
NAS@NDX Nasdaq 100
NII@NI225 Nikkei 225


In [15]:
prices_df.tail()

Unnamed: 0,Nasdaq 100,Nikkei 225,S&P 500
2019-03-28,7320.47,21033.76,2815.44
2019-03-29,7378.77,21205.81,2834.4
2019-04-01,7478.42,21509.03,2867.19
2019-04-02,7499.64,21505.31,2867.24
2019-04-03,7544.97,21713.21,2873.4


In [16]:
def date_format(d=''):
    if d != '':
        this_date = pd.to_datetime(d).date()
    else:
        this_date = pd.Timestamp.today().date() #오늘 날짜를 지정
    return this_date

In [17]:
def index_global(d, symbol, start_date ='', end_date='', page=1):
    
    end_date = date_format(end_date)
    if start_date =='':
        start_date = end_data = pd.DateOffset(months=1)
    start_date = date_format(start_date)
    
    url = 'https://finance.naver.com/world/worldDayListJson.nhn?symbol='+symbol+'&fdtc=0&page='+str(page)
    raw = urlopen(url)
    data = json.load(raw)
    
    if len(data) > 0 :
        
        for n in range(len(data)):
            date = pd.to_datetime(data[n]['xymd']).date()
            
            if date <= end_date and date >= start_date:
            #start_date와 end_date 사이에서 데이터를 저장
                #종가 처리
                price = float(data[n]['clos'])
                #딕셔너리에 저장
                d[date] = price
            elif date < start_date:
            #start_date 이전이면 함수 종료
                return d
            
            if len(data) == 10:
                page += 1
                index_global(d, symbol, start_date, end_date, page)
                
    return d

In [18]:
historical_indices = dict()
start_date = '2019-01-01'
end_date = '2019-3-31'
for key, value in indices.items():
    s= dict()
    s = index_global(s, key, start_date)
    historical_indices[value] =  s
prices_df = pd.DataFrame(historical_indices)

In [19]:
prices_df

Unnamed: 0,Nasdaq 100,Nikkei 225,S&P 500
2019-01-02,6360.87,,2510.03
2019-01-03,6147.13,,2447.89
2019-01-04,6422.67,,2531.94
2019-01-07,6488.25,,2549.69
2019-01-08,6551.85,,2574.41
2019-01-09,6600.69,,2584.96
2019-01-10,6620.94,,2596.64
2019-01-11,6601.40,,2596.26
2019-01-14,6541.04,,2582.61
2019-01-15,6669.64,,2610.30
