# Imports

In [1]:
# Provides ways to work with large multidimensional arrays
import numpy as np 
# Allows for further data manipulation and analysis
import pandas as pd
from pandas_datareader import data as web # Reads stock data 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

import datetime as dt # For defining dates
import mplfinance as mpf # Matplotlib finance

import time

# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join

import warnings
warnings.filterwarnings("ignore")

# Default Values

In [2]:
# Define path to files
path = "./Stocks/"

# Start date defaults
S_YEAR = 2017
S_MONTH = 1
S_DAY = 3

S_DATE_STR = f"{S_YEAR}-{S_MONTH}-{S_DAY}"
S_DATE_DATETIME = dt.datetime(S_YEAR, S_MONTH, S_DAY)

# End date defaults
E_YEAR = 2021
E_MONTH = 8
E_DAY = 19

E_DATE_STR = f"{E_YEAR}-{E_MONTH}-{E_DAY}"
E_DATE_DATETIME = dt.datetime(E_YEAR, E_MONTH, E_DAY)

# Get Stock File Names in a List

In [3]:
# listdir returns all files in the directory and isfile will return true
# if it is a file and then we store its name in our list named files
files = [x for x in listdir(path) if isfile(join(path, x))]

# Remove extension from file names
# Splitext splits the file name into 2 parts being the name and extension
# We say get all file names and then store just the name in our list named files
tickers = [os.path.splitext(x)[0] for x in files]
print(tickers)

['A', 'AA', 'AAL', 'AAME', 'AAOI', 'AAON', 'AAP', 'AAPL', 'AAT', 'AAWW', 'ABBV', 'ABC', 'ABCB', 'ABEO', 'ABG', 'ABIO', 'ABM', 'ABMD', 'ABR', 'ABT', 'ABTX', 'AC', 'ACAD', 'ACBI', 'ACC', 'ACCO', 'ACER', 'ACGL', 'ACHC', 'ACHV', 'ACIW', 'ACLS', 'ACM', 'ACMR', 'ACN', 'ACNB', 'ACOR', 'ACRE', 'ACRS', 'ACRX', 'ACTG', 'ACU', 'ACY', 'ADBE', 'ADC', 'ADES', 'ADI', 'ADM', 'ADMA', 'ADMP', 'ADMS', 'ADNT', 'ADP', 'ADS', 'ADSK', 'ADTN', 'ADUS', 'ADVM', 'ADXS', 'AE', 'AEE', 'AEHR', 'AEIS', 'AEL', 'AEMD', 'AEO', 'AEP', 'AERI', 'AES', 'AEY', 'AFG', 'AFI', 'AFL', 'AGCO', 'AGEN', 'AGFS', 'AGIO', 'AGLE', 'AGM', 'AGNC', 'AGO', 'AGR', 'AGRX', 'AGTC', 'AGX', 'AGYS', 'AHH', 'AHT', 'AIG', 'AIMC', 'AIN', 'AINC', 'AIR', 'AIRG', 'AIRI', 'AIRT', 'AIT', 'AIV', 'AIZ', 'AJG', 'AJRD', 'AJX', 'AKAM', 'AKBA', 'AKR', 'AKTS', 'AL', 'ALB', 'ALBO', 'ALCO', 'ALDX', 'ALE', 'ALEX', 'ALG', 'ALGN', 'ALGT', 'ALJJ', 'ALK', 'ALKS', 'ALL', 'ALLE', 'ALLY', 'ALNA', 'ALNY', 'ALOT', 'ALPN', 'ALRM', 'ALSN', 'ALT', 'ALTR', 'ALV', 'ALX', 'ALX

# Create a DataFrame from our List

In [4]:
stock_df = pd.DataFrame(tickers,columns=['Ticker'])
stock_df

Unnamed: 0,Ticker
0,A
1,AA
2,AAL
3,AAME
4,AAOI
...,...
2902,ZSAN
2903,ZTS
2904,ZUMZ
2905,ZYNE


# Function that Return a DataFrame from a CSV

In [5]:
def get_df_from_csv(ticker):
    try:
        df = pd.read_csv(path + ticker + '.csv')
    except FileNotFoundError:
        print("File Dosen't Exisit")
    else:
        return df

# Function that Saves DataFrame to CSV

In [6]:
def save_dataframe_to_csv(df, ticker):
    df.to_csv(path + ticker + '.csv')

# Delete Unnamed Columns in CSV Files

In [7]:
def delete_unnamed_cols(df):
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    return df

# Add Daily Return to DataFrame

In [8]:
# We calculate a percentage rate of return for each day to compare investments.
# Simple Rate of Return = (End Price - Beginning Price) / Beginning Price OR (EP / BP) - 1

# Shift provides the value from the previous day
# NaN is displayed because there was no previous day price for the 1st calculation
def add_daily_return_to_df(df, ticker):
    df['daily_return'] = (df['Adj Close'] / df['Adj Close'].shift(1)) - 1
    # Save data to a CSV file
    df.to_csv(path + ticker + '.csv')
    return df

# Returns Return on Investment over Time

In [9]:
# Return on Investment is the return you received from your investment
# This amount does not include your initial investment
# If you invest 100 and have 200 after 5 years
# ROI = End Value (200) - Initial Value (100) / Inital Value = 1
# Your new total is Inital Investment + 1 * Inital Investment = 200

def get_roi_defined_time(df):
    # Set as a datetime
    df['Date'] = pd.to_datetime(df['Date'])
    
    start_val = df[df['Date'] == S_DATE_STR]['Adj Close'][0]
    print("Initial Price :", start_val)
        
    end_val = df[df['Date'] == E_DATE_STR]['Adj Close']
    print("Final Price :", end_val.item())
        
    # Calculate return on investment
    roi = (end_val - start_val) / start_val

    # Return the total return between 2 dates
    return roi

# Get Coefficient of Variation

In [10]:
def get_cov(stock_df):
    mean = stock_df["Adj Close"].mean()
    sd = stock_df["Adj Close"].std()
    
    cov = sd / mean
    return cov

# Test Functions

In [11]:
# Get our 1st ticker
tickers[0]

'A'

In [12]:
# Get a dataframe for that ticker
stock_a = get_df_from_csv(tickers[0])
stock_a

Unnamed: 0.1,Unnamed: 0,Date,Adj Close,daily_return
0,0,2017-01-03,44.773853,
1,1,2017-01-04,45.361336,0.013121
2,2,2017-01-05,44.822021,-0.011889
3,3,2017-01-06,46.218483,0.031156
4,4,2017-01-09,46.362946,0.003126
...,...,...,...,...
1161,1161,2021-08-13,159.229996,0.006829
1162,1162,2021-08-16,162.070007,0.017836
1163,1163,2021-08-17,160.910004,-0.007157
1164,1164,2021-08-18,163.020004,0.013113


In [13]:
# Add daily return to this dataframe
add_daily_return_to_df(stock_a, tickers[0])
stock_a

Unnamed: 0.1,Unnamed: 0,Date,Adj Close,daily_return
0,0,2017-01-03,44.773853,
1,1,2017-01-04,45.361336,0.013121
2,2,2017-01-05,44.822021,-0.011889
3,3,2017-01-06,46.218483,0.031156
4,4,2017-01-09,46.362946,0.003126
...,...,...,...,...
1161,1161,2021-08-13,159.229996,0.006829
1162,1162,2021-08-16,162.070007,0.017836
1163,1163,2021-08-17,160.910004,-0.007157
1164,1164,2021-08-18,163.020004,0.013113


In [14]:
# Delete unnamed columns in dataframe
stock_a = delete_unnamed_cols(stock_a)
stock_a

Unnamed: 0,Date,Adj Close,daily_return
0,2017-01-03,44.773853,
1,2017-01-04,45.361336,0.013121
2,2017-01-05,44.822021,-0.011889
3,2017-01-06,46.218483,0.031156
4,2017-01-09,46.362946,0.003126
...,...,...,...
1161,2021-08-13,159.229996,0.006829
1162,2021-08-16,162.070007,0.017836
1163,2021-08-17,160.910004,-0.007157
1164,2021-08-18,163.020004,0.013113


In [15]:
# Save cleaned dataframe to csv
save_dataframe_to_csv(stock_a, tickers[0])

# Add Daily Returns & Clean Up All Files

In [16]:
# Cycle through all tickers
for ticker in tickers:
    print("Working on :", ticker)
    
    # Get a dataframe for that ticker
    stock_df = get_df_from_csv(ticker)
    
    # Add daily return to this dataframe
    add_daily_return_to_df(stock_df, ticker)
    
    # Delete unnamed columns in dataframe
    stock_df = delete_unnamed_cols(stock_df)
    
    # Save cleaned dataframe to csv
    save_dataframe_to_csv(stock_df, ticker)

Working on : A
Working on : AA
Working on : AAL
Working on : AAME
Working on : AAOI
Working on : AAON
Working on : AAP
Working on : AAPL
Working on : AAT
Working on : AAWW
Working on : ABBV
Working on : ABC
Working on : ABCB
Working on : ABEO
Working on : ABG
Working on : ABIO
Working on : ABM
Working on : ABMD
Working on : ABR
Working on : ABT
Working on : ABTX
Working on : AC
Working on : ACAD
Working on : ACBI
Working on : ACC
Working on : ACCO
Working on : ACER
Working on : ACGL
Working on : ACHC
Working on : ACHV
Working on : ACIW
Working on : ACLS
Working on : ACM
Working on : ACMR
Working on : ACN
Working on : ACNB
Working on : ACOR
Working on : ACRE
Working on : ACRS
Working on : ACRX
Working on : ACTG
Working on : ACU
Working on : ACY
Working on : ADBE
Working on : ADC
Working on : ADES
Working on : ADI
Working on : ADM
Working on : ADMA
Working on : ADMP
Working on : ADMS
Working on : ADNT
Working on : ADP
Working on : ADS
Working on : ADSK
Working on : ADTN
Working on : ADUS

# Get Stock Return over Time Period & Coefficient of Variation

In [17]:
stock_a

# Get total return since 2017
# Final Price 167.67 = (44.77 * 2.745) + 44.77
get_roi_defined_time(stock_a)

Initial Price : 44.77385330200195
Final Price : 167.6699981689453


1165    2.744819
Name: Adj Close, dtype: float64

In [18]:
# Get coefficient of variation 
# This is higher than normal because I'm using many years instead of one
get_cov(stock_a)

0.32362810321585683