## (함수) csv 파일을 데이터프레임으로 불러오는 함수

In [1]:
import pandas as pd 
def openDataframe(fileName):
    return pd.read_csv(fileName)

In [2]:
import pandas as pd
sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
sp500 = sp500[['Symbol', 'Security']]
sp500 = sp500.rename(columns={'Symbol': 'Ticker', 'Security': 'Company'})
sp500

Unnamed: 0,Ticker,Company
0,MMM,3M
1,AOS,A. O. Smith
2,ABT,Abbott
3,ABBV,AbbVie
4,ACN,Accenture
...,...,...
498,YUM,Yum! Brands
499,ZBRA,Zebra Technologies
500,ZBH,Zimmer Biomet
501,ZION,Zions Bancorporation


# (함수) 기능 설명: 입력->출력 

## (함수) 개별 기업의 데이터셋을 불러오는 함수: 기업 티커심볼->데이터프레임

In [95]:
def openCorpDataframe(ticker):
    folder_path = './SP500-weekly-price/'
    file_name = f'weekly_{ticker}.csv'
    df = pd.read_csv(folder_path + file_name)
    df.reset_index(drop=True, inplace=True)

    return df

## (함수) 주가 정보의 각 날짜에 해당하는 S&P index를 추가하는 함수: 데이터프레임->데이터프레임

In [96]:
import numpy as np

def getDataframe_withSPX(df_ticker):
    df = df_ticker.copy()
    df_SP = pd.read_csv('dataset-SP500index-daily.csv')
    
    bucket_SPX = []
    for i in range(len(df)):
        if df_SP['SP500'][df_SP['DATE']==df['timestamp'].loc[i]].empty:
            bucket_SPX.append(np.NaN)        
        else:
            bucket_SPX.append(float(df_SP['SP500'][df_SP['DATE']==df['timestamp'].loc[i]]))
    df['SPX'] = bucket_SPX
    df.dropna(inplace=True)

    return df

## (함수) 주가 정보에 가격수익률과 인덱스 수익률을 추가하는 함수: 데이터프레임->데이터프레임

In [160]:
def getDataframe_withReturns(df_ticker):
    df = df_ticker.copy()
    df = df.iloc[::-1]
    df.reset_index(drop=True, inplace=True)
    df['return_price'] = round(df['close'].pct_change(), 7)
    df['return_SPX'] = round(df['SPX'].pct_change(), 7)
    df.dropna(inplace=True)
    df = df[df['return_price'] > -0.9] # 예외처리(주식분할 후 수정주가 미반영에 의한 하락 삭제)
    df = df[df['timestamp'] > '2018-01-01']
    df.reset_index(drop=True, inplace=True)

    return df

## (함수) 주가 정보에 정규화된 가격과 정규화된 인덱스를 추가하는 함수: 데이터프레임->데이터프레임

In [183]:
def getDataframe_withNormed(df_ticker):
    df = df_ticker.copy()
    df.reset_index(drop=True, inplace=True)
    df['normed_price'] = df['close']/df.loc[0]['close']
    df['normed_SPX'] = df['SPX']/df.loc[0]['SPX']

    return df

## (함수) : 주가 정보 데이터셋으로부터 주가 캔들차트, 거래량 막대차트를 출력하는 함수: 데이터프레임->플롯

In [368]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime

def showCandleStickChart(df_ticker, ticker='company'):
    # 데이터셋
    df = df_ticker.copy()
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)

    # 서브플롯 생성
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.05)

    # 캔들스틱 차트 생성
    fig.add_trace(go.Candlestick(x=df.index,
                                 open=df['open'],
                                 high=df['high'],
                                 low=df['low'],
                                 close=df['close'],
                                 name='Price'), row=1, col=1)

    # 거래량 바차트 생성
    fig.add_trace(go.Bar(x=df.index, y=df['volume'], name='Volume'), row=2, col=1)

    fig.update_layout(
        title=f'{ticker}: Stock Price Chart with Volume (week)',
        yaxis=dict(title='Price', domain=[0.4, 1]),
        xaxis=dict(rangeslider=dict(visible=False)),
        yaxis2=dict(title='Volume', domain=[0, 0.35]),
        xaxis2=dict(rangeslider=dict(visible=True)),  # 스크롤 기능 활성화
        height=600
    )

    current_time = datetime.now().strftime("%Y%m%d%H%M%S")
    filename = f"{ticker}-price-vol_{current_time}.png"
    fig.write_image(filename)   
    fig.show()
    
    

## (함수) : 주가 정보 데이터셋으로부터 주가와 인덱스 변화를 비교 시각화하는 함수: 데이터프레임->플롯

In [369]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime

def showPriceSPXPlot(df_ticker, ticker='company'):
    # 주어진 데이터프레임 생성
    df = df_ticker

    # 서브플롯 생성
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Trace 생성
    trace_price = go.Scatter(x=df['timestamp'], y=df['normed_price'], name='normed_price')
    trace_spx = go.Scatter(x=df['timestamp'], y=df['normed_SPX'], name='normed_SPX')

    # 데이터 추가
    fig.add_trace(trace_price, secondary_y=False)
    fig.add_trace(trace_spx, secondary_y=False)

    # 축 설정
    fig.update_xaxes(title_text='timestamp')
    fig.update_yaxes(title_text='Normalized value', secondary_y=False)

    # 레이아웃 설정
    fig.update_layout(
        title=f'{ticker}: Price vs SPX',
        legend=dict(
            x=0.01,
            y=0.99,
            traceorder='normal',
            font=dict(family='sans-serif', size=12, color='black')
        )
    )
    
    current_time = datetime.now().strftime("%Y%m%d%H%M%S")
    filename = f"{ticker}-vsSPX_{current_time}.png"
    fig.write_image(filename)   

    # 그래프 출력
    fig.show()


## (함수) 데이터로부터 CAPM beta를 analytic하게 구하기: 데이터프레임->계산결과

### CAPM 계산에 필요한 risk free asset의 수익률은 미 국채1년을 기본 설정으로 함 (수익률 4.79%, 변수 입력으로 설정 변경 가능)

In [347]:
def getCAPMvalues(df_ticker, ticker='company', r_f=4.79/100):
    covariance = df_ticker['return_price'].cov(df_ticker['return_SPX'])
    variance = df_ticker['return_SPX'].var()
    expectation = df_ticker['return_SPX'].mean()
    beta = covariance_AMZN/varianceSPX_AMZN
    r_M = expectation
    r = r_f + beta*(r_M - r_f)
    return {'ticker': ticker, 'beta': beta, 'r_CAPM' : r, 'cov': covariance, 'r_M': r_M, 'r_f': r_f}

## (함수) 데이터로부터 CAPM beta를 선형회귀로 추정하기: 데이터프레임->플롯, 계산결과

In [370]:
import plotly.graph_objects as go
import statsmodels.api as sm
from datetime import datetime

def showLinREgCAPMPlot(df_ticker, ticker='company', r_f = 0.0479):
    # 데이터프레임 설정
    df = df_ticker

    # 회귀 분석
    y = df['return_price']-r_f
    X = df['return_SPX']-r_f
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()

    # 산점도 그리기
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df['return_SPX'],
        y=df['return_price'],
        mode='markers',
        name='data'
    ))

    # 회귀선 그리기
    fig.add_trace(go.Scatter(
        x=df['return_SPX'],
        y=results.fittedvalues,
        mode='lines',
        line=dict(color='red'),
        name='lin.reg. line'
    ))

    # 축 레이블 설정
    fig.update_layout(
        title=f'{ticker}: CAPM by Linear Regression',
        xaxis_title='Return_SPX',
        yaxis_title='Return_Price'
    )

    # 회귀식 텍스트 추가
    fig.add_annotation(
        x=0.01,
        y=0.03,
        text=f"r_Price - r_f = {results.params['return_SPX']:.4f} * (r_SPX - r_f) + {results.params['const']:.4f}",
        showarrow=False,
        font=dict(size=12),
        align='left',
        xref='paper',
        yref='paper',
        xanchor='left',
        yanchor='middle'
    )

    # 그래프 출력
    fig.show()
    
    current_time = datetime.now().strftime("%Y%m%d%H%M%S")
    filename = f"{ticker}-linreg_{current_time}.png"
    fig.write_image(filename)   
    
    print(f'{ticker}: CAPM beta = {results.params["return_SPX"]:.4f}')


# (함수) 데이터셋 변환, 계산 등 모든 기능을 한번에 실행해주는 함수: 티커심볼->데이터프레임과 계산결과

In [279]:
def operateCAPM(ticker):
    df_ticker = openCorpDataframe(ticker)
    df_ticker = getDataframe_withSPX(df_ticker)
    df_ticker = getDataframe_withReturns(df_ticker)
    df_ticker = getDataframe_withNormed(df_ticker)
    dict_ticker = getCAPMvalues(df_ticker, ticker)
    return {'df': df_ticker, 'capm': dict_ticker}

# (실행 예시) CAPM: V(비자)

In [343]:
# 단 한줄로 CAPM 분석 실행
capm_V = operateCAPM('V')

In [344]:
# 위 출력에 ['df']로 데이터프레임 조회 가능
df_V = capm_V['df']
df_V

Unnamed: 0,timestamp,open,high,low,close,volume,SPX,return_price,return_SPX,normed_price,normed_SPX
0,2018-01-05,114.57,119.000,113.95,118.86,28091199,2743.15,0.042449,0.026010,1.000000,1.000000
1,2018-01-12,118.61,120.480,117.46,120.09,31608196,2786.24,0.010348,0.015708,1.010348,1.015708
2,2018-01-19,121.18,123.650,119.91,122.70,32530633,2810.30,0.021734,0.008635,1.032307,1.024479
3,2018-01-26,122.87,126.380,122.40,126.32,31242833,2872.87,0.029503,0.022264,1.062763,1.047289
4,2018-02-02,126.74,126.880,120.70,120.91,45770049,2762.13,-0.042828,-0.038547,1.017247,1.006919
...,...,...,...,...,...,...,...,...,...,...,...
273,2023-03-31,222.59,225.840,218.65,225.46,33005510,4109.31,0.019996,0.034833,1.896853,1.498026
274,2023-04-06,225.23,230.050,224.12,225.99,20775193,4105.02,0.002351,-0.001044,1.901312,1.496462
275,2023-04-14,225.56,234.990,224.32,234.02,31235537,4137.64,0.035533,0.007946,1.968871,1.508354
276,2023-04-21,234.02,235.568,230.94,234.05,24375871,4133.52,0.000128,-0.000996,1.969123,1.506852


In [346]:
# 위 출력에 ['camp']으로 analytic CAMP 정보 조회 가능
dict_V = capm_V['capm']
dict_V

{'ticker': 'V',
 'beta': 0.9834667722932491,
 'r_CAPM': 0.002728302424427509,
 'cov': 0.0008042703452127653,
 'r_M': 0.001968913309352517,
 'r_f': 0.0479}

In [365]:
showCandleStickChart(df_V, 'V')

<Figure size 640x480 with 0 Axes>

In [366]:
showPriceSPXPlot(df_V, 'V')

<Figure size 640x480 with 0 Axes>

In [367]:
showLinREgCAPMPlot(df_V, 'V')

V: CAPM beta = 1.0166


<Figure size 640x480 with 0 Axes>

# (실행 예시) S&P500 기업들 목록에서 무작위 5개 기업을 추출해 순식간에 CAPM 결과를 출력하고 다양한 차트를 그려내기

## 1. S&P500 기업 중 5개 기업을 무작위 추출

In [355]:
import random 
tickers = random.sample(list(sp500['Ticker']), 5)
tickers

['PLD', 'DISH', 'PTC', 'PM', 'ZION']

## 2. 반복문으로 자동화 함수 operateCAPM 반복실행, 결과 저장

In [350]:
result_bucket = []
for ticker in tickers:
    capm_result = operateCAPM(ticker)
    result_bucket.append(capm_result)

## 3. 첫번째 기업에 대해 데이터프레임과 계산 결과를 조회

In [353]:
result_bucket[0]['df']

Unnamed: 0,timestamp,open,high,low,close,volume,SPX,return_price,return_SPX,normed_price,normed_SPX
0,2018-01-05,49.66,50.7200,47.870,50.41,2049474,2743.15,0.017972,0.026010,1.000000,1.000000
1,2018-01-12,50.40,50.4000,46.590,49.27,2348624,2786.24,-0.022615,0.015708,0.977385,1.015708
2,2018-01-19,49.60,51.7300,48.760,51.64,1543937,2810.30,0.048102,0.008635,1.024400,1.024479
3,2018-01-26,51.50,52.6500,50.150,51.14,1571069,2872.87,-0.009682,0.022264,1.014481,1.047289
4,2018-02-02,51.05,51.9000,48.270,49.24,1768476,2762.13,-0.037153,-0.038547,0.976790,1.006919
...,...,...,...,...,...,...,...,...,...,...,...
273,2023-03-31,113.09,115.9199,103.110,108.01,6546749,4109.31,-0.028862,0.034833,2.142630,1.498026
274,2023-04-06,109.18,110.0700,97.150,99.80,5598556,4105.02,-0.076011,-0.001044,1.979766,1.496462
275,2023-04-14,99.44,104.5700,98.795,101.73,4916296,4137.64,0.019339,0.007946,2.018052,1.508354
276,2023-04-21,102.04,105.0000,101.010,102.78,3192196,4133.52,0.010321,-0.000996,2.038881,1.506852


In [354]:
result_bucket[0]['capm']

{'ticker': 'GNRC',
 'beta': 0.9834667722932491,
 'r_CAPM': 0.0026603827233198235,
 'cov': 0.001075440321211676,
 'r_M': 0.0018998517985611505,
 'r_f': 0.0479}

## 4. 여러 기업들에 대해 다양한 시각화 플롯들을 한꺼번에 출력해보기

In [371]:
for i in range(len(tickers)):
    showCandleStickChart(result_bucket[i]['df'], tickers[i])
    showPriceSPXPlot(result_bucket[i]['df'], tickers[i])    
    showLinREgCAPMPlot(result_bucket[i]['df'], tickers[i])        

PLD: CAPM beta = 1.3568


DISH: CAPM beta = 0.8090


PTC: CAPM beta = 0.8360


PM: CAPM beta = 0.8707


ZION: CAPM beta = 0.9913
