In [None]:
# Install libraries
!pip install yfinance
!pip install get-all-tickers

import yfinance as yf, pandas as pd, numpy as np, shutil, time
import requests
from get_all_tickers import get_tickers as gt
from statistics import mean



In [15]:
# List of tickers (keep format consistent)
tickers = ["FB", "AMZN"]
print("Number of stocks to observe: " + str(len(tickers)))

Number of stocks to observe: 2


In [20]:
# Limit API calls on one ticker that could be having issues
Amount_of_API_Calls = 0
Stock_Failure = 0
Stocks_Not_Imported = 0

index = 0
while (index < len(tickers)) and (Amount_of_API_Calls < 1800): #Cap the number of calls otherwise IP gets blocked for pulling too much data
    try:
        print("Iteration = " + str(index))
        stock = tickers[index]  # Gets the current stock ticker
        temp = yf.Ticker(str(stock))
        Hist_data = temp.history(period="max")  # Get historical data
        time.sleep(2)  # Each ticker has 2 second puase since there is a limit for pulling data
        Amount_of_API_Calls += 1 
        Stock_Failure = 0
        index += 1  
    except ValueError:
        print("Yahoo Finance Backend Error, trying to pull data again")  
        if Stock_Failure > 5:  # Move on to the next ticker if the current ticker fails more than 5 times
            index += 1
            Stocks_Not_Imported += 1
        Amount_of_API_Calls += 1
        Stock_Failure += 1
    # Handle SSL error
    except requests.exceptions.SSLError as e:
        print("Yahoo Finance Backend Error, trying to pull data again")  # An error occured on Yahoo Finance's backend. We will attempt to retreive the data again
        if Stock_Failure > 5:  
            index += 1
            Stocks_Not_Imported += 1
        Amount_of_API_Calls += 1
        Stock_Failure += 1

print("The amount of stocks we successfully imported: " + str(index - Stocks_Not_Imported))

Iteration = 0
Iteration = 1
The amount of stocks we successfully imported: 2


In [21]:
# Create the dataframe that we will be adding the final analysis of each stock to
Compare_Stocks = pd.DataFrame(columns=["Company", "Days_Observed", "Crosses", "True_Positive", "False_Positive", "True_Negative", "False_Negative", "Sensitivity", 
"Specificity", "Accuracy", "TPR", "FPR"])

count = 0
for stock in list_files:
    # Dataframe to hold the historical data of the stock we are interested in.
    Hist_data = pd.read_csv(stock)

    # Initialization
    Days_Observed = 0
    Crosses = 0
    True_Positive = 0
    False_Positive = 0
    True_Negative = 0
    False_Negative = 0
    Sensitivity = 0
    Specificity = 0
    Accuracy = 0
    prices = []

    c = 0
    # Add the closing prices to the prices list to start at more than 2 dollars to reduce outlier interference
    while c < len(Hist_data):
        if Hist_data.iloc[c,4] > float(2.00):  
            prices.append(Hist_data.iloc[c,4])
        c += 1

    # Make a dataframe for price list
    prices_df = pd.DataFrame(prices) 

    # Calculate exponentialy weighted moving averages
    day12 = prices_df.ewm(span=12).mean()  
    day26 = prices_df.ewm(span=26).mean()
    macd = []  # Holds the MACD line values

    counter=0  # Loop to substantiate the MACD line
    while counter < (len(day12)):
        macd.append(day12.iloc[counter,0] - day26.iloc[counter,0])  # Subtract the 26 day EW moving average from the 12 day.
        counter += 1
    macd_df = pd.DataFrame(macd)
    signal_df = macd_df.ewm(span=9).mean() # Create the signal line, which is a 9 day EW moving average
    signal = signal_df.values.tolist()  # Add the signal line values to a list.
    
    #  Loop to Compare the expected MACD crosses results to the actual results
    Day = 1
    while Day < len(macd)-5:  # Subtract 5 to look at the 5 day post average
        Prev_Day = Day-1      # Subtract 1 to use the last day for prediction

        # Decisions are based on a 5 day average
        Avg_Closing_Next_Days = (prices[Day+1] + prices[Day+2] + prices[Day+3] + prices[Day+4] + prices[Day+5])/5 
        
        Days_Observed += 1  # Count how many days were observed
        if ((signal[Prev_Day] > macd[Prev_Day]) and (signal[Day] <= macd[Day])):  # Check if the signal line dips below the MACD line (Expected increase over the next x days)
            Crosses += 1   # register that a cross occurred
            if (prices[Day] < Avg_Closing_Next_Days):  # Tests if the price increases over the next x days
                True_Positive += 1
            else:
                False_Negative += 1

        if ((signal[Prev_Day] < macd[Prev_Day]) and (signal[Day] >= macd[Day])): # Check if the signal line moves above the MACD line (Expected dip over the next x days)
            Crosses += 1
            if (prices[Day] > Avg_Closing_Next_Days):  # Tests if the price decreases over the next x days
                True_Negative += 1
            else:
                False_Positive += 1
        Day += 1
    try:
        Sensitivity = (True_Positive / (True_Positive + False_Negative)) # Calculate sensitivity
    except ZeroDivisionError:  # Catch the divide by zero error
        Sensitivity = 0
    try:
        Specificity = (True_Negative / (True_Negative + False_Positive)) # Calculate specificity
    except ZeroDivisionError:
        Specificity
    try:
        Accuracy = (True_Positive + True_Negative) / (True_Negative + True_Positive + False_Positive + False_Negative) # Calculate accuracy
    except ZeroDivisionError:
        Accuracy = 0

    TPR = Sensitivity  # Calculate the true positive rate
    FPR = 1 - Specificity  # Calculate the false positive rate

    # Dictionary of observation parameters
    add_row = {'Company' : Company, 'Days_Observed' : Days_Observed, 'Crosses' : Crosses, 'True_Positive' : True_Positive, 'False_Positive' : False_Positive, 
    'True_Negative' : True_Negative, 'False_Negative' : False_Negative, 'Sensitivity' : Sensitivity, 'Specificity' : Specificity, 'Accuracy' : Accuracy, 'TPR' : TPR, 'FPR' : FPR} 

    # Add the parameters as features of the stock
    Compare_Stocks = Compare_Stocks.append(add_row, ignore_index = True) 
    count += 1

In [22]:
# Delete stocks that don't have enough crosses observed (currently capped at 100)
Not_Enough_Records = []  
row = 0
while row < (len(Compare_Stocks)):
    if Compare_Stocks.iloc[row, 2] < 100:
        Not_Enough_Records.append(row)  
    row += 1

# Remove records that do not have enough crosses
Compare_Stocks = Compare_Stocks.drop(Not_Enough_Records)  

Avg_Accuracy = []  
i=0
while i < (len(Compare_Stocks)):
    Avg_Accuracy.append(Compare_Stocks.iloc[i,9])
    i += 1

In [23]:
# Create a dataframe to rank and sort the stocks by their accuracy
df = Compare_Stocks[['Company','Days_Observed', 'Crosses', 'True_Positive', 'False_Positive', 'True_Negative', 'False_Negative', 'Sensitivity', 'Specificity', 'TPR', 'FPR', 'Accuracy']]
df["Companies_Ranked"] = df["Accuracy"].rank(ascending = False)  
df.sort_values("Accuracy", inplace = True, ascending = False)  

# Average accuracy
from statistics import mean
print("The average accuracy of all stocks observed: " + str(mean(Avg_Accuracy)))  

The average accuracy of all stocks observed: 0.5240465416936005


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
