In [None]:
SYMBOL = '^GSPC'

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
import matplotlib.pyplot as plt
import warnings

plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = [16,9]
plt.rcParams['figure.dpi'] = 300
warnings.simplefilter(action = 'ignore', category = FutureWarning)

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf

In [None]:
df = yf.download(SYMBOL,
                start = '2020-01-01',
                end = '2022-12-31',
                progress = False)

In [None]:
df = df.loc[:,["Adj Close"]]
df.rename(columns = {"Adj Close" : "adj_close"}, inplace =True)

In [None]:
df['simple_rtn'] = df.adj_close.pct_change()
df['log_rtn']=np.log(df.adj_close/df.adj_close.shift(1))

#df.drop('adj_close', axis = 1, inplace = True)
df.dropna(how = 'any', inplace = True)
df.head()

In [None]:
# 실현변동성 주기를 주(Week)로 했고, 연환산하고자 값에 루트 52(일년 Week의 개수)를 곱했다.
def realized_volatility(x):
    return np.sqrt(np.sum(x**2))
df_rv = df.groupby(pd.Grouper(freq = 'D')).apply(realized_volatility)
df_rv.rename(columns = {'log_rtn' : 'rv'}, inplace = True)
df_rv.rv = df_rv.rv * np.sqrt(250)


fig, ax = plt.subplots(4, 1, figsize=(11, 9), sharex=True)

df.adj_close.plot(ax = ax[0])
ax[0].set(title = SYMBOL + " Time Series",
         ylabel = 'Stock Price($)')

df.simple_rtn.plot(ax = ax[1])
ax[1].set(ylabel = 'Simple Returns(%)')

ax[2].plot(df.log_rtn)
ax[2].set(ylabel = 'Log Returns(%)')

ax[3].plot(df_rv.rv)
ax[3].set(ylabel = 'Realized_Volatility\n(Annualized)')

ax[3].tick_params(axis = 'x',
                 which = 'major',
                  labelsize = 12)


In [None]:
df.head()

In [None]:
import cufflinks as cf
from plotly.offline import iplot, init_notebook_mode

cf.set_config_file(world_readable=True, theme='pearl', offline=True)

init_notebook_mode()

In [None]:
df.iplot(subplots = True, shape = (3,1), shared_xaxes = True, title = SYMBOL + " Time Series")

In [None]:
#Outlier(특이값) 찾기
df_rolling = df[['simple_rtn']].rolling(window=21).agg(['mean', 'std']) #지난 21일치 데이터를 사용해 이동평균과 표준편차를 계산
df_rolling.columns = df_rolling.columns.droplevel()

In [None]:
df_outliers = df.join(df_rolling)

In [None]:
def identify_outliers(row, n_sigmas = 2):
    x = row['simple_rtn']
    mu = row['mean']
    sigma = row['std']
    if (x > mu + 2 * sigma) | (x < mu - 2 * sigma):
        return 1
    else:
        return 0

In [None]:
df_outliers['outlier'] = df_outliers.apply(identify_outliers, axis = 1)
outliers = df_outliers.loc[df_outliers['outlier'] == 1, ['simple_rtn']]

In [None]:
fig, ax = plt.subplots()
ax.plot(df_outliers.index, df_outliers.simple_rtn, color = 'blue', label = 'Normal')
ax.scatter(outliers.index, outliers.simple_rtn, color = 'red', label = 'Anomaly')
ax.set_title(SYMBOL + " Stock Returns")
ax.legend(loc = 'lower right')

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import seaborn as sns
import scipy.stats as scs
import statsmodels.api as sm
import statsmodels.tsa.api as smt

In [None]:
r_range = np.linspace(min(df.log_rtn), max(df.log_rtn), num = 1000)
mu = df.log_rtn.mean()
sigma = df.log_rtn.std()
norm_pdf = scs.norm.pdf(r_range, loc = mu, scale = sigma)

In [None]:
fig, ax = plt.subplots(1, 2, figsize = (16,8))
# Histogram
sns.distplot(df.log_rtn, kde = False, norm_hist = True, ax = ax[0])
ax[0].set_title('Distribution of ' + SYMBOL +' Returns', fontsize = 16)
ax[0].plot(r_range, norm_pdf, 'g', lw = 2, label = f'N({mu:.2f}, {sigma**2:.4f})')
ax[0].legend(loc = 'upper left');
#QQ plot
qq = sm.qqplot(df.log_rtn.values, line = 's', ax = ax[1])
ax[1].set_title('Q-Q Plot', fontsize = 16)
plt.show()

In [None]:
jb_test = scs.jarque_bera(df.log_rtn.values)

print('---------- Descriptive Statistics ----------')
print('Range of dates:', min(df.index.date), '-', max(df.index.date))
print('Number of observations:', df.shape[0])
print(f'Mean: {df.log_rtn.mean():.4f}')
print(f'Median: {df.log_rtn.median():.4f}')
print(f'Min: {df.log_rtn.min():.4f}')
print(f'Max: {df.log_rtn.max():.4f}')
print(f'Standard Deviation: {df.log_rtn.std():.4f}')
print(f'Skewness: {df.log_rtn.skew():.4f}')
print(f'Kurtosis: {df.log_rtn.kurtosis():.4f}') 
print(f'Jarque-Bera statistic: {jb_test[0]:.2f} with p-value: {jb_test[1]:.2f}')

In [None]:
# Volatility Clustering
df.log_rtn.plot(title = "Daily " + SYMBOL + " Returns", figsize = (10,6))
plt.show()

In [None]:
#수익률의 자기 상관 부재
N_LAGS = 50
SIGNIFICANCE_LEVEL = 0.05

In [None]:
acf = smt.graphics.plot_acf(df.log_rtn, lags = N_LAGS, alpha = SIGNIFICANCE_LEVEL)

In [None]:
df['moving_std_252'] = df[['log_rtn']].rolling(window=252).std()
df['moving_std_21'] = df[['log_rtn']].rolling(window=21).std()

In [None]:
fig, ax = plt.subplots(3, 1, figsize=(18, 15), sharex=True)

df.adj_close.plot(ax=ax[0])
ax[0].set(title=SYMBOL+' Time Series', ylabel='Price ($)')

df.log_rtn.plot(ax=ax[1])
ax[1].set(ylabel='Log returns (%)')

df.moving_std_252.plot(ax=ax[2], color='r', 
                       label='Moving Volatility 252d')
df.moving_std_21.plot(ax=ax[2], color='g', 
                      label='Moving Volatility 21d')
ax[2].set(ylabel='Moving Volatility',
          xlabel='Date')
ax[2].legend()

plt.show()

In [None]:
# 상관관계(S&P500와 VIX)
df = yf.download(['SOXL', '^VIX'], 
                 start='2020-01-01', 
                 end='2022-12-31',
                 progress=False)
df = df[['Adj Close']]
df.columns = df.columns.droplevel(0)
df = df.rename(columns={'SOXL' : 'soxl', '^VIX' : 'vix'})

In [None]:
df['log_rtn'] = np.log(df.soxl / df.soxl.shift(1))
df['vol_rtn'] = np.log(df.vix / df.vix.shift(1))
df.dropna(how='any', axis=0, inplace=True)

In [None]:
corr_coeff = df.log_rtn.corr(df.vol_rtn)
ax = sns.regplot(x = 'log_rtn', y = 'vol_rtn', data = df,
                line_kws = {'color':'red'})
ax.set(title = f'SOXL vs. VIX ($\\rho$ = {corr_coeff:.2f})',
      ylabel = 'VIX Log Returns',
      xlabel = 'SOXL Log Returns')