In [None]:
import yfinance as yf
import os
import pandas as pd
import random
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

In [None]:
# Getting Financial Data from the Financial Data Source
Symbols  = ["MSFT", "AMZN", "GOOG"]

if not os.path.exists('data'):
    os.mkdir('data')

for symbol in Symbols:
    if not os.path.exists(f"data/{symbol}.csv"):
        data = yf.download(symbol, start = "2010-01-01", end = "2018-12-31")
        if data.size > 0:
            data.to_csv(f"data/{symbol}.csv")
        else:
            print("Not Saving...")

for symbol in Symbols:
    s = open(f"data/{symbol}.csv").readlines()
    if len(s) < 10:
        os.system(f"rm data/{symbol}.csv")

In [None]:
files = glob('data/*.csv')
full_df = None

for f in files:
    print(f)
    df = pd.read_csv(f)

    symbol = f.split('/')[1].split('.')[0]
    df['name'] = symbol

    if full_df is None:
        full_df = df
    else:
        full_df = full_df.append(df, ignore_index = True)

In [None]:
df = pd.read_csv("all_stocks_5yr.csv")
df.head()

In [None]:
df['Name'].unique()

In [None]:
df[df['Name'] == 'IBM'].head()

In [None]:
ibm = df[df['Name'] == 'IBM']
ibm["close"].plot()

In [None]:
df['date'].min(), df['date'].max()

In [None]:
dates = pd.date_range(df['date'].min(), df['date'].max())
print(dates)

In [None]:
# Changing dataframe for close price
close_price = pd.DataFrame(index = dates)
symbols = df['Name'].unique()

df2 = pd.DataFrame(data = ibm['close'].to_numpy(),
                   index=ibm['date'], columns = ['IBM'])
df2.head()

In [None]:
# Fill in our close prices dataframe
for symbol in symbols:
  df_sym = df[df['Name'] == symbol]
  df_tmp = pd.DataFrame(data = df_sym['close'].to_numpy(),
                   index= df_sym['date'], columns = [symbol])
  close_price = close_price.join(df_tmp) # left-join by defualt

In [None]:
close_price.head()

In [None]:
!head sp500_close.csv

In [None]:
close = pd.read_csv('sp500_close.csv', index_col = 0, parse_dates = True)
close.head()

In [None]:
close['IBM'].plot()

In [None]:
# How to Deal with Missing Data
close.dropna(axis = 0, how = "all", inplace = True)
close['IBM'].plot()

In [None]:
close.isna().sum()

In [None]:
# find any backwards filled stock data
close.iloc[0,:].isna().sum()
# fill NA values
close.fillna(method = 'ffill', inplace = True)

In [None]:
close.isna().sum().sum()

In [None]:
close.plot(legend = False, figsize=(10,10))

In [None]:
# normalized data
close_norm = close / close.iloc[0]
close_norm.plot(legend = False, figsize = (10,10))

In [None]:
# Adjusted Close, Using sp500sub.csv if possible
df = pd.read_csv('sp500sub.csv', index_col = 'Date', parse_dates = True)
df.head()

goog = df[df["Name"] =="GOOG"]
goog[['Close', 'Adj Close']].plot(figsize = (10,10))

aapl = df[df['Name'] == 'AAPL']
aapl[['Close', 'Adj Close']].plot(figsize = (10,10))

In [None]:
# Returns
data = pd.read_csv('all_stocks_5yr.csv', parse_dates = True)
sbux = data[data["Name"]=='SBUX'].copy()
sbux.head()
sbux['close'].plot()

# Calulate Return
sbux['prev_close'] = sbux['close'].shift(1)
sbux.head()

sbux['return'] = sbux["close"]/sbux['prev_close']-1
sbux.head()

sbux['return2'] = sbux['close'].pct_change(1)
sbux.head()

sbux["return"].hist(bins = 100)

sbux['log_return'] =np.log(sbux['return']+1)
sbux.head()

sbux['log_return'].hist(bins = 1000)

sbux['log_return'].mean(), sbux['log_return'].std()

In [None]:
# QQ-Plot
x_list = np.linspace(sbux['return'].min(),sbux['return'].max(),100)
y_list = norm.pdf(x_list, loc = sbux['return'].mean(),
                  scale = sbux['return'].std())

plt.plot(x_list, y_list)
sbux['return'].hist(bins = 100, density = True)

In [None]:
# T-Distribution

In [None]:
# Conifdence Intrevals

In [None]:
# Statistical Testing

In [None]:
# Covariance and Correlation

In [None]:
# Alpha and Beta

In [None]:
# Mixture of Gaussians

In [None]:
# Price Simulation
