In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
# from pykrx import stock
import io
import boto3

### 미국 주식 주가 데이터 크롤링

In [2]:
# 미국 주식 종목&티커 불러오기
usa = pd.read_csv('해외_산업분류.csv')
usa

Unnamed: 0,Ticker,Company,Industrials,Market
0,AA,Alcoa Corporation,Aluminum,NYSE
1,ACNT,Ascent Industries Co.,Steel,NASDAQ
2,ALB,Albemarle Corporation,Specialty Chemicals,NYSE
3,ALTO,"Alto Ingredients, Inc.",Specialty Chemicals,NASDAQ
4,AMR,"Alpha Metallurgical Resources, Inc.",Coking Coal,NYSE
...,...,...,...,...
4878,OBDC,Owl Rock Capital Corporation,Credit Services,NYSE
4879,HEI.A,HEICO Corporation,Aerospace & Defense,NYSE
4880,MOG.A,Moog Inc.,Aerospace & Defense,NYSE
4881,UHAL.B,U-Haul Holding Company,Rental & Leasing Services,NYSE


In [3]:
# yfinance에서 티커를 기준으로 데이터를 불러오려면 형태 변경이 필요
yf_usa = usa.copy()
yf_usa['Ticker'] = yf_usa['Ticker'].str.replace('.A', '-A', regex = False).str.replace('.B', '-B', regex = False)

In [4]:
yf_usa

Unnamed: 0,Ticker,Company,Industrials,Market
0,AA,Alcoa Corporation,Aluminum,NYSE
1,ACNT,Ascent Industries Co.,Steel,NASDAQ
2,ALB,Albemarle Corporation,Specialty Chemicals,NYSE
3,ALTO,"Alto Ingredients, Inc.",Specialty Chemicals,NASDAQ
4,AMR,"Alpha Metallurgical Resources, Inc.",Coking Coal,NYSE
...,...,...,...,...
4878,OBDC,Owl Rock Capital Corporation,Credit Services,NYSE
4879,HEI-A,HEICO Corporation,Aerospace & Defense,NYSE
4880,MOG-A,Moog Inc.,Aerospace & Defense,NYSE
4881,UHAL-B,U-Haul Holding Company,Rental & Leasing Services,NYSE


In [5]:
usa

Unnamed: 0,Ticker,Company,Industrials,Market
0,AA,Alcoa Corporation,Aluminum,NYSE
1,ACNT,Ascent Industries Co.,Steel,NASDAQ
2,ALB,Albemarle Corporation,Specialty Chemicals,NYSE
3,ALTO,"Alto Ingredients, Inc.",Specialty Chemicals,NASDAQ
4,AMR,"Alpha Metallurgical Resources, Inc.",Coking Coal,NYSE
...,...,...,...,...
4878,OBDC,Owl Rock Capital Corporation,Credit Services,NYSE
4879,HEI.A,HEICO Corporation,Aerospace & Defense,NYSE
4880,MOG.A,Moog Inc.,Aerospace & Defense,NYSE
4881,UHAL.B,U-Haul Holding Company,Rental & Leasing Services,NYSE


In [7]:
def collect_and_upload_stock_data(tickers, start_date, end_date, s3_client, bucket_name, endpoint_url, access_key, secret_key):
    failed_ls = []
    
    for ticker in tickers:
        data = yf.download(ticker, start=start_date, end=end_date)
        data['Change'] = data['Adj Close'].pct_change() * 100  # 등락률 계산
        data = data.drop('Adj Close', axis = 1)
        data['Ticker'] = ticker
        data = data[1:]  # 첫 번째 행 제거
        
        if data['Open'].eq(0).all():
            print(f"Data for {ticker} has all 'Open' values as 0. Skipping upload.")
            failed_ls.append(ticker)
            continue
            
        index = yf_usa[yf_usa['Ticker'] == ticker].index.values[0]
        title_ticker = usa.at[index, 'Ticker']

        # 데이터프레임을 파일로 저장
        xlsx_filename = f'{title_ticker}_주가데이터.xlsx'
        xlsx_buffer = io.BytesIO()
        data.to_excel(xlsx_buffer)
        xlsx_buffer.seek(0)

        # xlsx 파일 클라우드 업로드
        object_name = f'usa_stock_crawling/{xlsx_filename}'
        s3_client.upload_fileobj(xlsx_buffer, bucket_name, object_name)
        print(f"Uploaded {xlsx_filename} to S3")
        
    print('Failed tickers: ', failed_ls)

def main():
    # AWS S3 설정
    service_name = 's3'
    endpoint_url = 'https://kr.object.ncloudstorage.com'
    region_name = 'kr-standard'
    access_key = 'cPKhn3d9z9ZWYMAPPwyq'
    secret_key = 'VFjEkpateO6VFGY0U9op073I4NXTlejMBMIq6FvA'
    bucket_name = 'bucket1chaegpt'

    # 티커 리스트 생성
    
    usa_tickers = yf_usa.iloc[:,0].unique()

    # 오늘 날짜로부터 2년 전의 날짜 계산
    end_date = datetime.today()
    ## end에 설정한 일자의 전일자까지 조회됨.
    start_date = (datetime.today() - timedelta(days=365*2 + 1))
    ## 2년전+1일전 데이터를 수집해야 2년 전날까지의 등락률을 계산할 수 있음.

    # AWS S3 클라이언트 생성
    s3 = boto3.client(service_name, endpoint_url=endpoint_url, aws_access_key_id=access_key,
                      aws_secret_access_key=secret_key)

    # 클라우드 폴더 생성
    object_name = 'usa_stock_crawling/'
    s3.put_object(Bucket=bucket_name, Key=object_name)

    # 주가 데이터 수집 및 파일로 저장
    collect_and_upload_stock_data(usa_tickers, start_date, end_date, s3, bucket_name, endpoint_url,
                                  access_key, secret_key)

if __name__ == '__main__':
    main()

[*********************100%%**********************]  1 of 1 completed
Uploaded AA_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded ACNT_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded ALB_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded ALTO_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded AMR_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded APD_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded ARCH_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded AREC_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded ASH_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded ASIX_주가데이터.xlsx to S3
[*****


1 Failed download:
['GOGO']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2021-08-31 09:55:51.593623 -> 2023-09-01 09:55:51.593623)')



Data for GOGO has all 'Open' values as 0. Skipping upload.
[*********************100%%**********************]  1 of 1 completed
Uploaded GOOG_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded GOOGL_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded GROM_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded GRPN_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded GSAT_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded GTN_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded HOFV_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded IAC_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded IAS_주가데이터.xlsx to S3
[*********************100%%***************


1 Failed download:
['HLX']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2021-08-31 09:55:51.593623 -> 2023-09-01 09:55:51.593623)')



Data for HLX has all 'Open' values as 0. Skipping upload.
[*********************100%%**********************]  1 of 1 completed
Uploaded HNRG_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded HP_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded HPK_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded HUSA_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded ICD_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded IEP_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded INSW_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded KGS_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded KLXE_주가데이터.xlsx to S3
[*********************100%%********************


1 Failed download:
['BCML']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2021-08-31 09:55:51.593623 -> 2023-09-01 09:55:51.593623)')



Data for BCML has all 'Open' values as 0. Skipping upload.
[*********************100%%**********************]  1 of 1 completed
Uploaded BCOW_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded BCSF_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded BCV_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded BCX_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded BDJ_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded BEN_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded BENF_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded BFAC_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded BFC_주가데이터.xlsx to S3
[*********************100%%******************

In [8]:
# failed_ls 다시 돌리기
failed_ls = ['GOGO', 'HLX', 'BCML', 'BREZR']

def main():
    # AWS S3 설정
    service_name = 's3'
    endpoint_url = 'https://kr.object.ncloudstorage.com'
    region_name = 'kr-standard'
    access_key = 'cPKhn3d9z9ZWYMAPPwyq'
    secret_key = 'VFjEkpateO6VFGY0U9op073I4NXTlejMBMIq6FvA'
    bucket_name = 'bucket1chaegpt'

    # 티커 리스트 생성
    retry_tic = failed_ls 

    # 오늘 날짜로부터 2년 전의 날짜 계산
    end_date = datetime.today()
    ## end에 설정한 일자의 전일자까지 조회됨.
    start_date = (datetime.today() - timedelta(days=365*2 + 1))
    ## 2년전+1일전 데이터를 수집해야 2년 전날까지의 등락률을 계산할 수 있음.

    # AWS S3 클라이언트 생성
    s3 = boto3.client(service_name, endpoint_url=endpoint_url, aws_access_key_id=access_key,
                      aws_secret_access_key=secret_key)

    # 클라우드 폴더 생성
    object_name = 'usa_stock_crawling/'
    s3.put_object(Bucket=bucket_name, Key=object_name)

    # 주가 데이터 수집 및 파일로 저장
    collect_and_upload_stock_data(retry_tic, start_date, end_date, s3, bucket_name, endpoint_url,
                                  access_key, secret_key)

if __name__ == '__main__':
    main()

[*********************100%%**********************]  1 of 1 completed
Uploaded GOGO_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded HLX_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Uploaded BCML_주가데이터.xlsx to S3
[*********************100%%**********************]  1 of 1 completed
Data for BREZR has all 'Open' values as 0. Skipping upload.
Failed tickers:  ['BREZR']


In [10]:
# BREZR 행 삭제 - 2년 기간 중 하루치 데이터밖에 없어서
usa = usa[usa['Ticker'] != 'BREZR']
usa

Unnamed: 0,Ticker,Company,Industrials,Market
0,AA,Alcoa Corporation,Aluminum,NYSE
1,ACNT,Ascent Industries Co.,Steel,NASDAQ
2,ALB,Albemarle Corporation,Specialty Chemicals,NYSE
3,ALTO,"Alto Ingredients, Inc.",Specialty Chemicals,NASDAQ
4,AMR,"Alpha Metallurgical Resources, Inc.",Coking Coal,NYSE
...,...,...,...,...
4878,OBDC,Owl Rock Capital Corporation,Credit Services,NYSE
4879,HEI.A,HEICO Corporation,Aerospace & Defense,NYSE
4880,MOG.A,Moog Inc.,Aerospace & Defense,NYSE
4881,UHAL.B,U-Haul Holding Company,Rental & Leasing Services,NYSE


In [11]:
usa.to_csv('해외_산업분류(추가제거).csv')