In [1]:
# Provides ways to work with large multidimensional arrays
import numpy as np 
# Allows for further data manipulation and analysis
import pandas as pd 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

import datetime as dt # For defining dates

import time
# In Powershell Prompt : conda install -c conda-forge multitasking
# pip install -i https://pypi.anaconda.org/ranaroussi/simple yfinance

import yfinance as yf

# To show all your output File -> Preferences -> Settings Search for Notebook
# Notebook Output Text Line Limit and set to 100

# Used for file handling like deleting files
import os

# conda install -c conda-forge cufflinks-py
# conda install -c plotly plotly
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go

# Make Plotly work in your Jupyter Notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
# Use Plotly locally
cf.go_offline()

from plotly.subplots import make_subplots

# New Imports
# Used to get data from a directory

from os import listdir
from os.path import isfile, join

import warnings
warnings.simplefilter("ignore")

Constants

In [2]:
PATH = "C:/Users/davez/Desktop/Investing with Data Science/"
S_DATE = "2017-02-01"
E_DATE = "2022-12-06"
S_DATE_DT = pd.to_datetime(S_DATE)
E_DATE_DT = pd.to_datetime(E_DATE)


Get Column Data from CSV

In [3]:
def get_column_from_csv(file, col_name):
    # Try to get the file and if it doesnt exist issue a warning
    try:
        df = pd.read_csv(file)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df[col_name]

Get Stock Tickers

In [4]:
tickers = get_column_from_csv("C:/Users/davez/Desktop/Investing with Data Science/Wilshire-5000-Stocks-New.csv", "Ticker")
print(len(tickers))
print(tickers)

3481
0          A
1         AA
2        AAL
3       AAME
4        AAN
        ... 
3476    ZUMZ
3477     ZUO
3478    ZYNE
3479    ZYXI
3480    ZNGA
Name: Ticker, Length: 3481, dtype: object


Get Dataframe from CSV

In [5]:
# Reads a dataframe from the CSV file, changes index to date and returns it
def get_stock_df_from_csv(ticker):
    
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(PATH + ticker + '.csv', index_col=0)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df

In [6]:
print(tickers)

0          A
1         AA
2        AAL
3       AAME
4        AAN
        ... 
3476    ZUMZ
3477     ZUO
3478    ZYNE
3479    ZYXI
3480    ZNGA
Name: Ticker, Length: 3481, dtype: object


In [7]:

files = [x for x in listdir(PATH) if isfile(join(PATH, x))]
tickers = [os.path.splitext(x)[0] for x in files]
tickers
# tickers.remove('.ds_Store') MacOS Only
tickers.sort()
len(tickers)

3262

In [8]:
# Add Daily Returns
def add_daily_return_to_df(df):
    df['daily_return'] = (df['Close'] / df['Close'].shift(1)) - 1
    #df.to_csv(PATH + ticker + '.csv')
    return df

In [9]:
# Add Cumulative Returns
def add_cum_return_to_df(df):
    df['cum_return'] = (1 + df['daily_return']).cumprod()
    #df.to_csv(PATH + ticker + '.csv')
    return df

In [10]:
# Add Bollinger Bands
def add_bollinger_bands(df):
    df['middle_band'] = df['Close'].rolling(window=20).mean()
    df['upper_band'] = df['middle_band'] + 1.96 * df['Close'].rolling(window=20).std()
    df['lower_band'] = df['middle_band'] - 1.96 * df['Close'].rolling(window=20).std()
    # df.to_csv(PATH + ticker + '.csv')

In [11]:
# Add Ichimoku Data to Dataframe
def add_Ichimoku(df):
    # Conversion Line = (Highest value in period + Lowest value in period)/2 (9 sessions)
    hi_val = df['High'].rolling(window=9).max()
    low_val = df['Low'].rolling(window=9).min()
    df['Conversion'] = (hi_val + low_val) / 2
    
    # Base Line = (Highest value in period + Lowest value in period)/2 (26 sessions)
    hi_val2 = df['High'].rolling(window=26).max()
    low_val2 = df['Low'].rolling(window=26).min()
    df['Baseline'] = (hi_val2 + low_val2) / 2
    
    # Span A = (Conversion Value + Base Value)/2 (26 sessions)
    df['SpanA'] = ((df['Conversion'] + df['Baseline']) / 2)
    
    # Span B = (Conversion Value + Base Value)/2 (52 sessions)
    hi_val3 = df['High'].rolling(window=52).max()
    low_val3 = df['Low'].rolling(window=52).min()
    df['SpanB'] = ((hi_val3 + low_val3) / 2).shift(26)
    
    # Lagging Span = Price shifted back 26 periods
    df['Lagging'] = df['Close'].shift(-26)
    return df
    # df.to_csv(PATH + ticker + '.csv')

In [12]:
try:
    print("Working on :", "A")
    new_df = get_stock_df_from_csv("A")
    new_df = add_daily_return_to_df(new_df)
    new_df = add_cum_return_to_df(new_df)
    new_df = add_bollinger_bands(new_df)
    new_df = add_Ichimoku(new_df)
    new_df.to_csv(PATH + 'A' + '.csv')
except Exception as ex:
    print(ex)

Working on : A
'NoneType' object is not subscriptable


In [13]:
 new_df = add_Ichimoku(new_df)

TypeError: 'NoneType' object is not subscriptable