In [1]:
# Provides ways to work with large multidimensional arrays
import numpy as np 
# Allows for further data manipulation and analysis
import pandas as pd 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

# pip install numpy
# conda install -c anaconda pandas
# conda install -c conda-forge matplotlib

import datetime as dt # For defining dates

import time

# In Powershell Prompt : conda install -c conda-forge multitasking
# pip install -i https://pypi.anaconda.org/ranaroussi/simple yfinance

import yfinance as yf

# To show all your output File -> Preferences -> Settings Search for Notebook
# Notebook Output Text Line Limit and set to 100

# Used for file handling like deleting files
import os

# conda install -c conda-forge cufflinks-py
# conda install -c plotly plotly
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go

# Make Plotly work in your Jupyter Notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
# Use Plotly locally
cf.go_offline()

from plotly.subplots import make_subplots

# New Imports
# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join
import warnings
warnings.simplefilter("ignore")

constants

In [2]:
PATH = "C:/Users/davez/Desktop/Investing with Data Science/"

# Start end date defaults
S_DATE = "2017-02-01"
E_DATE = "2022-12-06"
S_DATE_DT = pd.to_datetime(S_DATE)
E_DATE_DT = pd.to_datetime(E_DATE)

In [3]:
# Reads a dataframe from the CSV file, changes index to date and returns it
def get_stock_df_from_csv(ticker):
    
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(PATH + ticker + '.csv', index_col=0)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df

In [4]:
# Used to generate the red and green fill for the Ichimoku cloud
def get_fill_color(label):
    if label >= 1:
        return 'rgba(0,250,0,0.4)'
    else:
        return 'rgba(250,0,0,0.4)'

In [5]:
def get_Ichimoku(df):

    candle = go.Candlestick(x=df.index, open=df['Open'],
    high=df['High'], low=df["Low"], close=df['Close'], name="Candlestick")

    df1 = df.copy()
    fig = go.Figure()
    df['label'] = np.where(df['SpanA'] > df['SpanB'], 1, 0)
    df['group'] = df['label'].ne(df['label'].shift()).cumsum()

    df = df.groupby('group')

    dfs = []
    for name, data in df:
        dfs.append(data)

    for df in dfs:
        fig.add_traces(go.Scatter(x=df.index, y=df.SpanA,
        line=dict(color='rgba(0,0,0,0)')))

        fig.add_traces(go.Scatter(x=df.index, y=df.SpanB,
        line=dict(color='rgba(0,0,0,0)'),
        fill='tonexty',
        fillcolor=get_fill_color(df['label'].iloc[0])))

    baseline = go.Scatter(x=df1.index, y=df1['Baseline'], 
    line=dict(color='pink', width=2), name="Baseline")

    conversion = go.Scatter(x=df1.index, y=df1['Conversion'], 
    line=dict(color='black', width=1), name="Conversion")

    lagging = go.Scatter(x=df1.index, y=df1['Lagging'], 
    line=dict(color='purple', width=2), name="Lagging")

    span_a = go.Scatter(x=df1.index, y=df1['SpanA'], 
    line=dict(color='green', width=2, dash='dot'), name="Span A")

    span_b = go.Scatter(x=df1.index, y=df1['SpanB'], 
    line=dict(color='red', width=1, dash='dot'), name="Span B")
    
    fig.add_trace(candle)
    fig.add_trace(baseline)
    fig.add_trace(conversion)
    fig.add_trace(lagging)
    fig.add_trace(span_a)
    fig.add_trace(span_b)
    
    fig.update_layout(height=1200, width=1800, showlegend=True)

    fig.show()

In [7]:
sec_df = pd.read_csv("C:/Users/davez/Desktop/Investing with Data Science/big_stock_sectors.csv")

indus_df = sec_df.loc[sec_df['Sector'] == "Industrial"]
health_df = sec_df.loc[sec_df['Sector'] == "Healthcare"]
it_df = sec_df.loc[sec_df['Sector'] == "Information Technology"]
comm_df = sec_df.loc[sec_df['Sector'] == "Communication"]
staple_df = sec_df.loc[sec_df['Sector'] == "Staples"]
discretion_df = sec_df.loc[sec_df['Sector'] == "Discretionary"]
utility_df = sec_df.loc[sec_df['Sector'] == "Utilities"]
financial_df = sec_df.loc[sec_df['Sector'] == "Financials"]
material_df = sec_df.loc[sec_df['Sector'] == "Materials"]
restate_df = sec_df.loc[sec_df['Sector'] == "Real Estate"]
energy_df = sec_df.loc[sec_df['Sector'] == "Energy"]

In [8]:
sec_df

Unnamed: 0,Ticker,Name,Description,Mrkt Cap,Sector
0,A,Agilent Technologies,Life Sciences Tools & Services,48.21B,Healthcare
1,AA,Alcoa,Metals & Mining,11.15B,Materials
2,AAC,Ares Acquisition,Blank Check / SPAC,1.22B,SPAC
3,AACG,ATA Creativity Global,Diversified Consumer Services,33.59M,Discretionary
4,AACI,Armada Acquisition I,Blank Check / SPAC,202.75M,SPAC
...,...,...,...,...,...
6098,ZY,Zymergen,Chemicals,685.06M,Materials
6099,ZYME,Zymeworks,Biotechnology,763.02M,Healthcare
6100,ZYNE,Zynerba Pharmaceuticals,Pharmaceuticals,118.71M,Healthcare
6101,ZYXI,Zynex,Health Care Equipment & Supplies,363.44M,Healthcare


In [9]:
def get_cum_ret_for_stocks(stock_df):
    tickers = []
    cum_rets = []

    for index, row in stock_df.iterrows():
        df = get_stock_df_from_csv(row['Ticker'])
        if df is None:
            pass
        else:
            tickers.append(row['Ticker'])
            cum = df['cum_return'].iloc[-1]
            cum_rets.append(cum)
    return pd.DataFrame({'Ticker':tickers, 'CUM_RET':cum_rets})

In [11]:
health_care = get_cum_ret_for_stocks(health_df)

IndexError: single positional indexer is out-of-bounds

In [12]:
health_df

Unnamed: 0,Ticker,Name,Description,Mrkt Cap,Sector
0,A,Agilent Technologies,Life Sciences Tools & Services,48.21B,Healthcare
5,AADI,Aadi Bioscience,Biotechnology,504.59M,Healthcare
22,ABBV,AbbVie,Biotechnology,239.37B,Healthcare
23,ABC,AmerisourceBergen,Health Care Providers & Services,27.66B,Healthcare
25,ABCL,AbCellera Biologics,Biotechnology,4.03B,Healthcare
...,...,...,...,...,...
6088,ZSAN,Zosano Pharma,Health Care Equipment & Supplies,55.62M,Healthcare
6091,ZTS,Zoetis,Pharmaceuticals,115.46B,Healthcare
6099,ZYME,Zymeworks,Biotechnology,763.02M,Healthcare
6100,ZYNE,Zynerba Pharmaceuticals,Pharmaceuticals,118.71M,Healthcare
