In [90]:
import numpy as np
import pandas as pd
from pandas_datareader import data as web

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline

import datetime as dt
import mplfinance as mpf

import os
from pathlib import Path

directory = "data"
dataPath = Path().absolute()/directory

try:
    os.mkdir(dataPath)
except:
    pass

# Functions

## Utils

In [68]:
def get_ticker_path(ticker):
    return dataPath/(ticker+".csv")

def save_to_csv_from_yahoo(ticker, sY, sM, sD, eY, eM, eD):
    start = dt.datetime(sY,sM,sD)
    end = dt.datetime(eY,eM,eD)
    
    df = web.DataReader(ticker, 'yahoo', start, end)
    
    df.to_csv(get_ticker_path(ticker))
    
    return df

def get_multiple_stocks(sY,sM,sD,eY,eM,eD,*tickers):
    for x in tickers:
        save_to_csv_from_yahoo(x, sY, sM, sD, eY, eM, eD)

def get_df_from_csv(ticker):
    try:
        df = pd.read_csv(get_ticker_path(ticker))

    except FileNotFoundError:
        print("File doesn't exists")
        return None
    else:
        return df
    
def merge_df_by_column(col_name,sY,sM,sD,eY,eM,eD,*tickers):
    mult_df = pd.DataFrame()
    start = f"{sY}-{sM}-{sD}"
    end = f"{eY}-{eM}-{eD}" 
    
    for x in tickers:
        mult_df[x] = web.DataReader(x, 'yahoo', start, end)[col_name]
        
    return mult_df
    

## Metrics Functions

In [76]:
def add_daily_return_to_df(df, ticker):
    df['Daily return'] = (df['Adj Close']/df['Adj Close'].shift(1))-1
    df.to_csv(get_ticker_path(ticker))
    
    return df

def get_return_over_period(df, sY, sM, sD, eY, eM, eD):
    start = f"{sY}-{sM}-{sD}"
    end = f"{eY}-{eM}-{eD}"   
    
    df['Date'] = pd.to_datetime(df['Date'])
    
    mask = (df['Date']>= start) & (df['Date']<=end)
    daily_return = df.loc[mask]['Daily return'].mean()
    days = df.loc[mask].shape[0]
    
    return (days*daily_return)

def get_stock_mean_sd(stock_df, ticker):
    return stock_df[ticker].mean(), stock_df[ticker].std()

def get_multi_stock_mean_sd(stock_df):
    for stock in stock_df:
        mean, sd = get_stock_mean_sd(stock_df, stock)
        cov = sd/mean
        print("stock: {:4} Mean: {:7.2f} Standard Deviation {:2.2f}".format(stock, mean, sd))
        print("Coefficient of Variation: {}\n".format(cov))

## Plots Functions

In [77]:
def mplfinance_plot(ticker, chart_type, sY, sM, sD, eY, eM, eD):
    start = f"{sY}-{sM}-{sD}"
    end = f"{eY}-{eM}-{eD}"  
    
    df = get_df_from_csv(ticker)
    
    if df is not None:
        df.index = pd.DatetimeIndex(df['Date'])
        df_sub = df.loc[start:end]
        mpf.plot(df_sub, type='candle')
        mpf.plot(df_sub, type='line')
        mpf.plot(df_sub, type='ohlc', mav=4)
        
        s = mpf.make_mpf_style(base_mpf_style='charles', rc={'font.size':8})
        fig = mpf.figure(figsize=(12,8), style=s)
        ax = fig.add_subplot(2,1,2)
        av = fig.add_subplot(2,1,2, sharex=ax)
        mpf.plot(df_sub, type=chart_type, mav=(3,5,7), ax=ax, volume=av, show_nontrading=True)
        
def price_plot(ticker, sY, sM, sD, eY, eM, eD):
    start = f"{sY}-{sM}-{sD}"
    end = f"{eY}-{eM}-{eD}"  
    
    df = get_df_from_csv(ticker)
    
    if df is not None:
        df.index = pd.DatetimeIndex(df['Date'])
        df_sub = df.loc[start:end]
        
        df_np = df_sub.to_numpy()
        
        np_adj_close = df_np[:,5]
        
        date_arr = df_np[:,1]
        
        fig = plt.figure(figsize=(12,8), dpi=100)
        axes = fig.add_axes([0,0,1,1])
        
        axes.plot(date_arr, np_adj_close, color='navy')
        
        # Define n of dates
        axes.xaxis.set_major_locator(plt.MaxNLocator(8))
        
        axes.grid(True, color="0.6", dashes=(5, 2, 1, 2))
        axes.set_facecolor('#FAEBD7')
        
        
def plot_return_mult_stock(investment, stock_df):
    (stock_df/stock_df.iloc[0]*investment).plot(figsize=(15,6))
    


# Test functions

In [78]:
save_to_csv_from_yahoo('AMZN',2020,1,1,2021,1,1)

amzn = get_df_from_csv('AMZN')

add_daily_return_to_df(amzn,'AMZN')

get_return_over_period(amzn,2020,1,1,2021,1,1)

# mplfinance_plot('AMZN', 'ohlc',2020,1,1,2021,1,1)

# price_plot('AMZN', 2020,1,1,2021,1,1)

tickers = ['FB',"AMZN","AAPL","NFLX","GOOG"]

# get_multiple_stocks(2020,1,1,2021,1,1, *tickers)

mult_df = merge_df_by_column('Adj Close', 2020,1,1,2021,1,1, *tickers)

# plot_return_mult_stock(100, mult_df)

get_multi_stock_mean_sd(mult_df)
mult_df

stock: FB   Mean:  234.55 Standard Deviation 38.57
Coefficient of Variation: 0.1644238149761194

stock: AMZN Mean: 2680.86 Standard Deviation 545.76
Coefficient of Variation: 0.20357722570815504

stock: AAPL Mean:   94.46 Standard Deviation 21.82
Coefficient of Variation: 0.23099015711227702

stock: NFLX Mean:  446.83 Standard Deviation 65.61
Coefficient of Variation: 0.14684116488739776

stock: GOOG Mean: 1481.40 Standard Deviation 175.97
Coefficient of Variation: 0.11878452933446867



Unnamed: 0_level_0,FB,AMZN,AAPL,NFLX,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-02,209.779999,1898.010010,73.988487,329.809998,1367.369995
2020-01-03,208.669998,1874.969971,73.269157,325.899994,1360.660034
2020-01-06,212.600006,1902.880005,73.852997,335.829987,1394.209961
2020-01-07,213.059998,1906.859985,73.505646,330.750000,1393.339966
2020-01-08,215.220001,1891.969971,74.688087,339.260010,1404.319946
...,...,...,...,...,...
2020-12-24,267.399994,3172.689941,131.161407,513.969971,1738.849976
2020-12-28,277.000000,3283.959961,135.852509,519.119995,1776.089966
2020-12-29,276.779999,3322.000000,134.043655,530.869995,1758.719971
2020-12-30,271.869995,3285.850098,132.900696,524.590027,1739.520020
