## Import

In [1]:
import numpy as np
import pandas as pd
import FinanceDataReader as fdr
import pandas_datareader.data as pdr
import yfinance as yf
import requests
from bs4 import BeautifulSoup
import pywt
import talib
import seaborn as sns
import matplotlib.pyplot as plt
plt.rc('font', family='Malgun Gothic')
plt.rc('axes', unicode_minus=False)

# ignore warnings
import warnings ; warnings.filterwarnings('ignore')

## Read Data

In [2]:
# 분석 시점은 2013년 10월부터 2023년 6월까지로 설정한다.
start, end = '2013-10-01', '2023-07-02'

- #### 주식 데이터
  Microsoft, Amazon, Intel, McDonald, Apple 주식을 수집한다.

In [3]:
msft = fdr.DataReader('MSFT', start, end)
amzn = fdr.DataReader('AMZN', start, end)
intc = fdr.DataReader('INTC', start, end)
mcd = fdr.DataReader('MCD', start, end)    
aapl = fdr.DataReader('AAPL', start, end)
display(msft)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-10-01,33.349998,33.610001,33.299999,33.580002,28.120390,36718700
2013-10-02,33.360001,34.029999,33.290001,33.919998,28.405107,46946800
2013-10-03,33.880001,34.000000,33.419998,33.860001,28.354864,38703800
2013-10-04,33.689999,33.990002,33.619999,33.880001,28.371611,33008100
2013-10-07,33.599998,33.709999,33.200001,33.299999,27.885908,35069300
...,...,...,...,...,...,...
2023-06-26,333.720001,336.109985,328.489990,328.600006,327.905762,21520600
2023-06-27,331.859985,336.149994,329.299988,334.570007,333.863159,24354100
2023-06-28,334.660004,337.980011,333.809998,335.850006,335.140442,20259500
2023-06-29,334.709991,336.109985,332.619995,335.049988,334.342102,16997000


- #### 경제지표
GDP(국내총생산), CPI(소비자물가지수), PPI(생산자물가지수), 실업률, 3년금리, 환율,<br>
소매 판매 지수, 무역 수지, S&P500의 변동지수, 나스닥, 원유가격, S&P500, SOX(반도체지수)
   - 경기선행지수는 결측치로 제외한다.

In [19]:
econ = ['GDP', 'CPIAUCNS', 'PPIACO', 'UNRATE', 'GS3', 'DEXUSEU', 
        'RSAFS', 'NETEXC', 'VIXCLS', 'NASDAQCOM', 'DCOILWTICO']

economics = pd.DataFrame({'DATE': pd.date_range('2013-10-01','2023-07-01')})
for code in econ:
    economics = economics.merge(pdr.DataReader(code, 'fred', start, end), on='DATE', how='left')

In [20]:
# S&P500, SOX(반도체지수)는 별도로 수집한다.
economics = economics.merge(pd.concat([fdr.DataReader('S&P500', start, end)['Adj Close'], 
                                       yf.download("^SOX", start, end)['Adj Close']], axis=1).reset_index(),
                            left_on='DATE', right_on='Date', how='left')
economics.set_index('DATE', inplace=True)
economics.drop(['Date'], axis=1, inplace=True)
economics.columns=['GDP','CPI','PPI','UNRATE', 'IR', 'ER', 'RS', 'TB', 'VIX', 'NASDAQ', 'OP', 'SP500', 'SOX']
economics = economics.fillna(method='backfill')
display(economics)

[*********************100%%**********************]  1 of 1 completed


  economics = economics.fillna(method='backfill')


Unnamed: 0_level_0,GDP,CPI,PPI,UNRATE,IR,ER,RS,TB,VIX,NASDAQ,OP,SP500,SOX
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2013-10-01,17192.019,233.546,202.500,7.2,0.63,1.3534,419288.0,-275.552,15.54,3817.98,102.09,1695.000000,495.769989
2013-10-02,17197.738,233.069,201.200,6.9,0.58,1.3592,420468.0,-329.736,16.60,3815.02,104.15,1693.869995,495.100006
2013-10-03,17197.738,233.069,201.200,6.9,0.58,1.3642,420468.0,-329.736,17.67,3774.34,103.29,1678.660034,493.799988
2013-10-04,17197.738,233.069,201.200,6.9,0.58,1.3582,420468.0,-329.736,16.74,3807.75,103.83,1690.500000,497.720001
2013-10-05,17197.738,233.069,201.200,6.9,0.58,1.3569,420468.0,-329.736,19.41,3770.38,103.07,1676.119995,495.089996
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-27,27623.543,305.691,253.889,3.5,4.47,1.0961,694415.0,-937.655,13.74,13555.67,67.68,4378.410156,3642.409912
2023-06-28,27623.543,305.691,253.889,3.5,4.47,1.0904,694415.0,-937.655,13.43,13591.75,69.54,4376.859863,3609.479980
2023-06-29,27623.543,305.691,253.889,3.5,4.47,1.0881,694415.0,-937.655,13.54,13591.33,69.85,4396.439941,3614.179932
2023-06-30,27623.543,305.691,253.889,3.5,4.47,1.0920,694415.0,-937.655,13.59,13787.92,70.66,4450.379883,3673.060059


- #### 재무제표
  stockanalysis 사이트를 통해 10년치 재무제표를 크롤링한다.

In [30]:
def FS(ticker):
    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'}
    url = f"https://stockanalysis.com/stocks/{ticker}/financials/?p=quarterly"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    element_tables = soup.select("table[data-test='financials']")
    # 매출성장률, EPS 성장률
    df = pd.read_html(str(element_tables))[0].set_index('Quarter Ended')
    df = df.loc[['Revenue Growth (YoY)', 'EPS Growth']]

    url = f"https://stockanalysis.com/stocks/{ticker}/financials/ratios/?p=quarterly"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    element_tables = soup.select("table[class='contain']")
    df2 = pd.read_html(str(element_tables))[0].set_index('Quarter Ended')
    df2 = df2.loc[['PE Ratio', 'PB Ratio','Debt / Equity Ratio', 'Quick Ratio', 'Dividend Yield']]
    df = pd.concat([df, df2])
    return df

In [34]:
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'}
url = "https://stockanalysis.com/stocks/aapl/financials/ratios/?p=quarterly"
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

In [46]:
soup.select('id'='main')
# //*[@id="main"]

SyntaxError: expression cannot contain assignment, perhaps you meant "=="? (92998088.py, line 1)

In [11]:

amzn_fs = FS('amzn')
intc_fs = FS('intc')
mcd_fs = FS('mcd')
aapl_fs = FS('aapl')
display(msft_fs)

  return pd.read_html(str(element_tables))[0]
  return pd.read_html(str(element_tables))[0]
  return pd.read_html(str(element_tables))[0]
  return pd.read_html(str(element_tables))[0]
  return pd.read_html(str(element_tables))[0]


Unnamed: 0,Quarter Ended,2023-09-30,2023-06-30,2023-03-31,2022-12-31,2022-09-30,2022-06-30,2022-03-31,2021-12-31,2021-09-30,...,2015-12-31,2015-09-30,2015-06-30,2015-03-31,2014-12-31,2014-09-30,2014-06-30,2014-03-31,2013-12-31,+84 Quarters
0,Revenue,56517,56189,52857,52747,50122,51865,49360,51728,45317,...,23796,20379,22180,21729,26470,23201,23382,20403,24519,Upgrade
1,Revenue Growth (YoY),12.76%,8.34%,7.08%,1.97%,10.60%,12.38%,18.35%,20.09%,21.97%,...,-10.10%,-12.16%,-5.14%,6.50%,7.96%,25.21%,17.52%,-0.42%,14.28%,Upgrade
2,Cost of Revenue,16302,16795,16128,17488,15452,16429,15615,16960,13646,...,9872,7207,7468,7161,10136,8273,7633,5978,8322,Upgrade
3,Gross Profit,40215,39394,36729,35259,34670,35436,33745,34768,31671,...,13924,13172,14712,14568,16334,14928,15749,14425,16197,Upgrade
4,"Selling, General & Admin",6661,8401,7393,8016,6524,8053,7075,6763,5834,...,4998,4417,5233,4800,5412,4879,6017,4708,5480,Upgrade
5,Research & Development,6659,6739,6984,6844,6628,6849,6306,5758,5599,...,2900,2962,3094,2984,2903,3065,3123,2743,2748,Upgrade
6,Other Operating Expenses,0,0,0,0,0,0,0,0,0,...,0,0,8438,190,243,1140,127,0,0,Upgrade
7,Operating Expenses,13320,15140,14377,14860,13152,14902,13381,12521,11433,...,7898,7379,16765,7974,8558,9084,9267,7451,8228,Upgrade
8,Operating Income,26895,24254,22352,20399,21518,20534,20364,22247,20238,...,6026,5793,-2053,6594,7776,5844,6482,6974,7969,Upgrade
9,Interest Expense / Income,525,482,496,490,500,496,503,525,539,...,309,249,247,211,162,161,169,175,135,Upgrade


## Feature Generation

- #### 주가 데이터

In [28]:
def StockFeatures(DATA):
    DATA['next_rtn'] = DATA['Close']/DATA['Open'] - 1
    DATA['log_return'] = np.log(1+DATA['Adj Close'].pct_change())
    
    # 이동평균
    DATA['MA5'] = talib.SMA(DATA['Close'], timeperiod=5)
    DATA['RASD5'] = talib.SMA(talib.STDDEV(DATA['Close'], timeperiod=5, nbdev=1), timeperiod=5)
    
    return DATA

In [30]:
import numpy as np

In [31]:
StockFeatures(msft)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,next_rtn,log_return,MA5,RASD5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2013-10-01,33.349998,33.610001,33.299999,33.580002,28.120386,36718700,0.006897,,,
2013-10-02,33.360001,34.029999,33.290001,33.919998,28.405090,46946800,0.016786,0.010074,,
2013-10-03,33.880001,34.000000,33.419998,33.860001,28.354856,38703800,-0.000590,-0.001770,,
2013-10-04,33.689999,33.990002,33.619999,33.880001,28.371599,33008100,0.005640,0.000590,,
2013-10-07,33.599998,33.709999,33.200001,33.299999,27.885904,35069300,-0.008929,-0.017267,33.708000,
...,...,...,...,...,...,...,...,...,...,...
2023-06-26,333.720001,336.109985,328.489990,328.600006,327.905762,21520600,-0.015342,-0.019349,334.987994,4.316865
2023-06-27,331.859985,336.149994,329.299988,334.570007,333.863159,24354100,0.008166,0.018005,334.291998,4.069054
2023-06-28,334.660004,337.980011,333.809998,335.850006,335.140442,20259500,0.003556,0.003818,334.750000,3.789904
2023-06-29,334.709991,336.109985,332.619995,335.049988,334.342102,16997000,0.001016,-0.002385,333.817999,3.355740


- 재무지표

In [22]:
# 매출성장률
msft_fs.set_index('Quarter Ended', inplace=True)

In [23]:
msft_fs

Unnamed: 0_level_0,2023-09-30,2023-06-30,2023-03-31,2022-12-31,2022-09-30,2022-06-30,2022-03-31,2021-12-31,2021-09-30,2021-06-30,...,2015-12-31,2015-09-30,2015-06-30,2015-03-31,2014-12-31,2014-09-30,2014-06-30,2014-03-31,2013-12-31,+84 Quarters
Quarter Ended,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Revenue,56517,56189,52857,52747,50122,51865,49360,51728,45317,46152,...,23796,20379,22180,21729,26470,23201,23382,20403,24519,Upgrade
Revenue Growth (YoY),12.76%,8.34%,7.08%,1.97%,10.60%,12.38%,18.35%,20.09%,21.97%,21.35%,...,-10.10%,-12.16%,-5.14%,6.50%,7.96%,25.21%,17.52%,-0.42%,14.28%,Upgrade
Cost of Revenue,16302,16795,16128,17488,15452,16429,15615,16960,13646,13991,...,9872,7207,7468,7161,10136,8273,7633,5978,8322,Upgrade
Gross Profit,40215,39394,36729,35259,34670,35436,33745,34768,31671,32161,...,13924,13172,14712,14568,16334,14928,15749,14425,16197,Upgrade
"Selling, General & Admin",6661,8401,7393,8016,6524,8053,7075,6763,5834,7379,...,4998,4417,5233,4800,5412,4879,6017,4708,5480,Upgrade
Research & Development,6659,6739,6984,6844,6628,6849,6306,5758,5599,5687,...,2900,2962,3094,2984,2903,3065,3123,2743,2748,Upgrade
Other Operating Expenses,0,0,0,0,0,0,0,0,0,0,...,0,0,8438,190,243,1140,127,0,0,Upgrade
Operating Expenses,13320,15140,14377,14860,13152,14902,13381,12521,11433,13066,...,7898,7379,16765,7974,8558,9084,9267,7451,8228,Upgrade
Operating Income,26895,24254,22352,20399,21518,20534,20364,22247,20238,19095,...,6026,5793,-2053,6594,7776,5844,6482,6974,7969,Upgrade
Interest Expense / Income,525,482,496,490,500,496,503,525,539,553,...,309,249,247,211,162,161,169,175,135,Upgrade


In [None]:
msft_fs.loc[['Revenue Growth (YoY)', 'EPS Growth'

In [24]:
msft_fs.index

Index(['Revenue', 'Revenue Growth (YoY)', 'Cost of Revenue', 'Gross Profit',
       'Selling, General & Admin', 'Research & Development',
       'Other Operating Expenses', 'Operating Expenses', 'Operating Income',
       'Interest Expense / Income', 'Other Expense / Income', 'Pretax Income',
       'Income Tax', 'Net Income', 'Net Income Growth',
       'Shares Outstanding (Basic)', 'Shares Outstanding (Diluted)',
       'Shares Change', 'EPS (Basic)', 'EPS (Diluted)', 'EPS Growth',
       'Free Cash Flow', 'Free Cash Flow Per Share', 'Dividend Per Share',
       'Dividend Growth', 'Gross Margin', 'Operating Margin', 'Profit Margin',
       'Free Cash Flow Margin', 'Effective Tax Rate', 'EBITDA',
       'EBITDA Margin', 'Depreciation & Amortization', 'EBIT', 'EBIT Margin'],
      dtype='object', name='Quarter Ended')

## Wavelet Transformation

In [None]:
cwtmatr, freqs = pywt.cwt(, np.arange(-1, 1, 0.0001), "morl")

## Auto-encoder