In [1]:
import os
from pandas_datareader import data as pdr
import pandas as pd
import numpy as np
import yfinance as yf
from tqdm import tqdm
import requests
import urllib.request
import re
import time
import tensorflow as tf
import tensorflow.keras as keras
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Dense
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from IPython.display import display, HTML
import warnings

In [16]:
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
yf.pdr_override()
pd.set_option('display.max_columns', None)
START_DATE = "2003-08-01"
END_DATE = "2015-01-01"

In [3]:
#Utility function to reformat yahoo finance numbers
def data_string_to_float(number_string):
    if ("N/A" in number_string) or ("NaN" in number_string):
        return "N/A"
    elif number_string == ">0":
        return 0
    elif "B" in number_string:
        return float(number_string.replace("B", "")) * 1000000000
    elif "M" in number_string:
        return float(number_string.replace("M", "")) * 1000000
    elif "K" in number_string:
        return float(number_string.replace("K", "")) * 1000
    else:
        return float(number_string)


In [4]:
#Get list of tickers and find price data
statspath = "C://Users//nickd//Downloads//intraQuarter//_KeyStats//"
ticker_list = os.listdir(statspath)
all_data = pdr.get_data_yahoo(ticker_list, START_DATE, END_DATE)
stock_data = all_data["Adj Close"]

[*********************100%***********************]  560 of 560 completed

141 Failed downloads:
- S: Data doesn't exist for startDate = 1059714000, endDate = 1420092000
- NTRI: No data found, symbol may be delisted
- HCN: No data found for this date range, symbol may be delisted
- CCE: No data found for this date range, symbol may be delisted
- STI: No data found, symbol may be delisted
- ARG: Data doesn't exist for startDate = 1059714000, endDate = 1420092000
- TWC: No data found for this date range, symbol may be delisted
- WPX: No data found, symbol may be delisted
- TSO: No data found for this date range, symbol may be delisted
- PX: Data doesn't exist for startDate = 1059714000, endDate = 1420092000
- SNDK: No data found for this date range, symbol may be delisted
- SAI: Data doesn't exist for startDate = 1059714000, endDate = 1420092000
- ARO: No data found for this date range, symbol may be delisted
- DPS: No data found for this date range, symbol may be delisted
- DTV: No data 

In [5]:
stock_data.dropna(how="all", axis=1, inplace=True)
missing_tickers = [
    ticker for ticker in ticker_list if ticker.upper() not in stock_data.columns
]
print(f"{len(missing_tickers)} tickers are missing: \n {missing_tickers} ")
stock_data.ffill(inplace=True)
stock_data.to_csv("stock_prices.csv")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stock_data.dropna(how="all", axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stock_data.ffill(inplace=True)


141 tickers are missing: 
 ['ace', 'aci', 'act', 'adt', 'agn', 'altr', 'alxn', 'ann', 'apc', 'apol', 'arg', 'arna', 'aro', 'avp', 'bbry', 'bbt', 'bcr', 'beam', 'bhi', 'bks', 'bll', 'brcm', 'btu', 'cab', 'cam', 'cbg', 'cbs', 'cce', 'celg', 'cern', 'cfn', 'chk', 'cog', 'coh', 'ctl', 'cvc', 'dell', 'df', 'disca', 'dlph', 'dnb', 'dnr', 'do', 'dow', 'dps', 'dsw', 'dtv', 'esv', 'etfc', 'fb', 'fdo', 'flir', 'frx', 'ftr', 'gmcr', 'hcbk', 'hcn', 'hcp', 'hot', 'hrs', 'hsp', 'htz', 'ir', 'jcp', 'jdsu', 'jec', 'josb', 'joy', 'krft', 'ksu', 'life', 'lltc', 'lm', 'lo', 'ltd', 'luk', 'mhfi', 'mjn', 'mon', 'mwv', 'myl', 'nbl', 'ne', 'ntri', 'nu', 'pbct', 'pcl', 'pcln', 'pcp', 'petm', 'pll', 'pvtb', 'px', 'qep', 'rai', 'rdc', 'rht', 'rtn', 's', 'sai', 'scg', 'sd', 'se', 'sfly', 'shld', 'sial', 'sndk', 'sne', 'spls', 'sti', 'stj', 'swy', 'symc', 'te', 'teg', 'tibx', 'tif', 'tmk', 'tso', 'tss', 'twc', 'tyc', 'ua', 'utx', 'var', 'viab', 'vitc', 'vprt', 'wag', 'wfm', 'win', 'wlp', 'wpo', 'wpx', 'wyn', 'xl'

In [6]:
index_data = pdr.get_data_yahoo("SPY", start=START_DATE, end=END_DATE)
index_data.to_csv("sp500_index.csv")

[*********************100%***********************]  1 of 1 completed


In [7]:
df = pd.DataFrame()

for ticker in ticker_list:
    ticker = ticker.upper()

    stock_ohlc = pdr.get_data_yahoo(ticker, START_DATE, END_DATE)
    if stock_ohlc.empty:
        print(f"No data for {ticker}")
        continue
    adj_close = stock_ohlc["Adj Close"].rename(ticker)
    df = pd.concat([df, adj_close], axis=1)
df.to_csv("stock_prices.csv")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- ACE: Data doesn't exist for startDate = 1059714000, endDate = 1420092000
No data for ACE
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- ACI: Data doesn't exist for startDate = 1059714000, endDate = 1420092000
No data for ACI
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- ACT: Data doesn't exist for startDate = 1059714000, endDate = 1420

[*********************100%***********************]  1 of 1 completed

1 Failed download:
- CAB: Data doesn't exist for startDate = 1059714000, endDate = 1420092000
No data for CAB
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- CAM: No data found for this date range, symbol may be delisted
No data for CAM
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- CBG: No data found for this date range, symbol may be delisted
No data for CBG
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- CBS: No data found, symbol may be delisted
N

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- ESV: No data

[*********************100%***********************]  1 of 1 completed

1 Failed download:
- IR: Data doesn't exist for startDate = 1059714000, endDate = 1420092000
No data for IR
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- JCP: No data found, symbol may be delisted
No data for JCP
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- JDSU: No data found for this date range, symbol may be delisted
No data for JDSU
[*********************100%***********************]  1 of 1 completed

1 Failed downloa

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- NTRI: No data found, symbol may be delisted
No data for NTRI
[*******************

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- SFLY: No data found, symbol may be delisted
No data for SFLY
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- SHLD: Data doesn't exist for startDate = 1059714000, endDate = 1420092000
No data for SHLD
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- SIAL: No data found for this date range, symbol may be delisted
No data for SIAL
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[**********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- WFM: No data found for this date range, symbol may be delisted
No data for WFM
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- WIN: No data found, symbol may be delisted
No data for WIN
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- WLP: No data found for this date range, symbol may be delisted
No data for WLP
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%*

In [8]:
features = ["Total Debt/Equity",
                      'Trailing P/E',
                      'Price/Book',
                      'Profit Margin',
                      'Operating Margin',
                      'Return on Assets',
                      'Return on Equity',
                      'Revenue Per Share',
                      'Market Cap',
                        'Enterprise Value',
                        'Forward P/E',
                        'PEG Ratio',
                        'Enterprise Value/Revenue',
                        'Enterprise Value/EBITDA',
                        'Revenue',
                        'Gross Profit',
                        'EBITDA',
                        'Net Income Avl to Common ',
                        'Diluted EPS',
                        'Qtrly Earnings Growth',
                        'Qtrly Revenue Growth',
                        'Total Cash',
                        'Total Cash Per Share',
                        'Total Debt',
                        'Current Ratio',
                        'Book Value Per Share',
                        'Cash Flow',
                        'Beta',
                        '% Held by Insiders',
                        '% Held by Institutions',
                        'Shares Short (as of',
                        'Short Ratio',
                        'Short % of Float',
                        'Shares Short (prior ',
                        'Current Price',
                        'Shares Outstanding' ]


In [11]:
# Read in SP500 data and stock data, parsing the dates.
sp500_raw_data = pd.read_csv("sp500_index.csv", index_col="Date", parse_dates=True)
stock_raw_data = pd.read_csv("stock_prices.csv", index_col="Date", parse_dates=True)

In [12]:
# We will reindex to include the weekends.
start_date = str(stock_raw_data.index[0])
end_date = str(stock_raw_data.index[-1])
idx = pd.date_range(start_date, end_date)
sp500_raw_data = sp500_raw_data.reindex(idx)
stock_raw_data = stock_raw_data.reindex(idx)

In [13]:
# Now the weekends are NaN, so we fill forward these NaNs
# (i.e weekends take the value of Friday's adjusted close).
sp500_raw_data.ffill(inplace=True)
stock_raw_data.ffill(inplace=True)
# The tickers whose data is to be parsed.
stock_list = [x[0] for x in os.walk(statspath)]
stock_list = stock_list[1:]

In [14]:
 # Creating a new dataframe which we will later fill.
df_columns = [
    "Date",
    "Unix",
    "Ticker",
    "Price",
    "stock_p_change",
    "SP500",
    "SP500_p_change",
] + features

df = pd.DataFrame(columns=df_columns)

In [18]:
# tqdm is a simple progress bar
for stock_directory in tqdm(stock_list, desc="Parsing progress:", unit="tickers"):
    keystats_html_files = os.listdir(stock_directory)

    ticker = stock_directory.split(statspath)[1]

    for file in keystats_html_files:
            # Convert the datetime format of our file to unix time
        date_stamp = datetime.strptime(file, "%Y%m%d%H%M%S.html")
        unix_time = time.mktime(date_stamp.timetuple())

            # Read in the html file as a string.
        full_file_path = stock_directory + "/" + file

            # This will store the parsed values
        value_list = []

        with open(full_file_path, "r") as source:
            source = source.read()
                # Remove commas from the html to make parsing easier.
            source = source.replace(",", "")

                # Regex search for the different variables in the html file, then append to value_list
            for variable in features:
                if variable == 'Current Price':
                    try:
                        value_list.append(0)
                    except KeyError:
                        continue
                else:
                
                    
                    # Search for the table entry adjacent to the variable name.
                    try:
                        regex = (
                            r">"
                            + re.escape(variable)
                             + r".*?(\-?\d+\.*\d*K?M?B?|N/A[\\n|\s]*|>0|NaN)%?"
                             r"(</td>|</span>)"
                        )
                        value = re.search(regex, source, flags=re.DOTALL).group(1)

                        # Dealing with number formatting
                        value_list.append(data_string_to_float(value))

                    # The data may not be present. Process accordingly
                    except AttributeError:
                        # In the past, 'Avg Vol' was instead named 'Average Volume'
                        # If 'Avg Vol' fails, search for 'Average Volume'.
                        if variable == "Avg Vol (3 month)":
                            try:
                                new_variable = ">Average Volume (3 month)"
                                regex = (
                                    re.escape(new_variable)
                                    + r".*?(\-?\d+\.*\d*K?M?B?|N/A[\\n|\s]*|>0)%?"
                                    r"(</td>|</span>)"
                                )
                                value = re.search(regex, source, flags=re.DOTALL).group(
                                       1
                                )
                                value_list.append(data_string_to_float(value))
                            except AttributeError:
                                value_list.append("N/A")
                        else:
                            value_list.append("N/A")

            current_date = datetime.fromtimestamp(unix_time).strftime("%Y-%m-%d")
            one_year_later = datetime.fromtimestamp(unix_time + 31536000).strftime(
                 "%Y-%m-%d"
            )

            # SP500 prices now and one year later, and the percentage change
            sp500_price = float(sp500_raw_data.loc[current_date, "Adj Close"])
            sp500_1y_price = float(sp500_raw_data.loc[one_year_later, "Adj Close"])
            sp500_p_change = round(
                ((sp500_1y_price - sp500_price) / sp500_price * 100), 2
            )

            # Stock prices now and one year later. We need a try/except because some data is missing
            stock_price, stock_1y_price = "N/A", "N/A"
            try:
                stock_price = float(stock_raw_data.loc[current_date, ticker.upper()])
                stock_1y_price = float(stock_raw_data.loc[one_year_later, ticker.upper()])
            except KeyError:
                # If stock data is missing, we must skip this datapoint
                continue

            stock_p_change = round(
                ((stock_1y_price - stock_price) / stock_price * 100), 2
            )
        # Append all our data to the dataframe.
            new_df_row = [
                date_stamp,
                unix_time,
                ticker,
                stock_price,
                stock_p_change,
                sp500_price,
                sp500_p_change,
            ] + value_list
            df = df.append(dict(zip(df_columns, new_df_row)), ignore_index=True)
    # Remove rows with missing stock price data
    df.dropna(axis=0, subset=["Price", "stock_p_change"], inplace=True)
    # Output the CSV
    df.to_csv("keystats.csv", index=False)

Parsing progress:: 100%|████████████████████████████████████████████████████████| 560/560 [07:18<00:00,  1.28tickers/s]


In [19]:
def status_calc(stock, sp500, outperformance=10):
    """A simple function to classify whether a stock outperformed the S&P500
    :param stock: stock price
    :param sp500: S&P500 price
    :param outperformance: stock is classified 1 if stock price > S&P500 price + outperformance
    :return: true/false
    """
    if outperformance < 0:
        raise ValueError("outperformance must be positive")
    return stock - sp500 >= outperformance

In [20]:
def Build_Data_Set():
    data_df = pd.read_csv("keystats.csv")
    data_df.drop_duplicates()
    data_df.dropna(inplace=True)
    data_df = data_df.reindex(np.random.permutation(data_df.index))
    features = data_df.columns[4:]
    X = np.array(data_df[features].values)
    X = preprocessing.scale(X)

    y = list(
        status_calc(
            data_df["stock_p_change"], data_df["SP500_p_change"], outperformance=10
        )
    )


    Z = np.array(data_df[["stock_p_change","SP500_p_change"]])


    return X,y,Z

test_size = 1000

invest_amount = 10000
total_invests = 0
if_market = 0
if_strat = 0



    
X, y, Z = Build_Data_Set()
print(len(X))

    
clf = RandomForestClassifier(n_estimators=100, random_state=1)
clf.fit(X[:-test_size],y[:-test_size])

correct_count = 0

for x in range(1, test_size+1):
    if clf.predict([X[-x]])[0] == y[-x]:
        correct_count += 1

    if clf.predict([X[-x]])[0] == 1:
        invest_return = invest_amount + (invest_amount * (Z[-x][0]/100))
        market_return = invest_amount + (invest_amount * (Z[-x][1]/100))
        total_invests += 1
        if_market += market_return
        if_strat += invest_return
            

print("Accuracy:", (correct_count/test_size) * 100.00)

print("Total Trades:", total_invests)
print("Ending with Strategy:",if_strat)
print("Ending with Market:",if_market)

compared = ((if_strat - if_market) / if_market) * 100.0
do_nothing = total_invests * invest_amount

avg_market = ((if_market - do_nothing) / do_nothing) * 100.0
avg_strat = ((if_strat - do_nothing) / do_nothing) * 100.0


    
print("Compared to market, we earn",str(compared)+"% more")
print("Average investment return:", str(avg_strat)+"%")
print("Average market return:", str(avg_market)+"%")



3532
Accuracy: 92.4
Total Trades: 300
Ending with Strategy: 4344890.0
Ending with Market: 3410055.0
Compared to market, we earn 27.414073966548923% more
Average investment return: 44.82966666666667%
Average market return: 13.6685%


In [21]:
data_df = pd.read_csv("keystats.csv")
data_df.drop_duplicates()
data_df.dropna(inplace=True)
data_df = data_df.reindex(np.random.permutation(data_df.index))
features = data_df.columns[4:]
X = np.array(data_df[features].values)
sc = MinMaxScaler(feature_range=(0,1))
X = sc.fit_transform(X)
y = np.array(
    status_calc(
        data_df["stock_p_change"], data_df["SP500_p_change"], outperformance=10
    )
)
y.astype(int)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))
model = Sequential()
model.add(LSTM(units=50,return_sequences=True,input_shape=(X.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam',loss='mean_squared_error')
model.fit(X,y,epochs=100,batch_size=32)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1a941ee3d60>

In [22]:
if not os.path.exists("forward_json/"):
    os.makedirs("forward_json/")

    # Retrieve a list of tickers from the fundamental data folder
ticker_list = os.listdir(statspath)

In [23]:
for ticker in tqdm(ticker_list, desc="Download progress:", unit="tickers"):
        try:
            link = f"https://query2.finance.yahoo.com/v10/finance/quoteSummary/"+ticker.upper()+"?modules=assetProfile,financialData,defaultKeyStatistics,calendarEvents,incomeStatementHistory,cashflowStatementHistory,balanceSheetHistory"
            resp = urllib.request.urlopen(link).read()

            # Write results to forward/
            save = f"forward_json/"+str(ticker)+".json"
            store = open(save,"w")
            store.write(str(resp))
            store.close()
        except Exception as e:
            print(f"{ticker}: {str(e)}\n")
            time.sleep(2)

Download progress::   1%|▌                                                        | 6/560 [00:01<02:55,  3.15tickers/s]

ace: HTTP Error 404: Not Found



Download progress::   4%|██                                                      | 20/560 [00:07<02:33,  3.52tickers/s]

aet: HTTP Error 404: Not Found



Download progress::   4%|██▏                                                     | 22/560 [00:10<05:55,  1.51tickers/s]

agn: HTTP Error 404: Not Found



Download progress::   5%|██▉                                                     | 29/560 [00:13<03:20,  2.64tickers/s]

alxn: HTTP Error 404: Not Found



Download progress::   7%|███▊                                                    | 38/560 [00:18<02:40,  3.26tickers/s]

ann: HTTP Error 404: Not Found



Download progress::   7%|████                                                    | 41/560 [00:20<04:59,  1.73tickers/s]

apc: HTTP Error 404: Not Found



Download progress::   8%|████▍                                                   | 44/560 [00:23<05:51,  1.47tickers/s]

apol: HTTP Error 404: Not Found



Download progress::   8%|████▌                                                   | 45/560 [00:25<09:40,  1.13s/tickers]

arg: HTTP Error 404: Not Found



Download progress::   8%|████▌                                                   | 46/560 [00:28<12:30,  1.46s/tickers]

arna: HTTP Error 404: Not Found



Download progress::   8%|████▋                                                   | 47/560 [00:30<14:38,  1.71s/tickers]

aro: HTTP Error 404: Not Found



Download progress::   9%|█████                                                   | 51/560 [00:33<06:37,  1.28tickers/s]

avp: HTTP Error 404: Not Found



Download progress::  11%|█████▉                                                  | 59/560 [00:37<02:44,  3.04tickers/s]

bbry: HTTP Error 404: Not Found



Download progress::  11%|██████                                                  | 60/560 [00:39<07:34,  1.10tickers/s]

bbt: HTTP Error 404: Not Found



Download progress::  11%|██████▏                                                 | 62/560 [00:42<08:38,  1.04s/tickers]

bcr: HTTP Error 404: Not Found



Download progress::  12%|██████▋                                                 | 67/560 [00:45<04:13,  1.95tickers/s]

bhi: HTTP Error 404: Not Found



Download progress::  13%|███████                                                 | 71/560 [00:48<04:06,  1.99tickers/s]

bks: HTTP Error 404: Not Found



Download progress::  13%|███████▎                                                | 73/560 [00:50<06:31,  1.24tickers/s]

bll: HTTP Error 404: Not Found



Download progress::  13%|███████▍                                                | 74/560 [00:53<10:04,  1.24s/tickers]

bmc: HTTP Error 404: Not Found



Download progress::  14%|███████▋                                                | 77/560 [00:55<07:00,  1.15tickers/s]

brcm: HTTP Error 404: Not Found



Download progress::  15%|████████▍                                               | 84/560 [00:59<02:57,  2.68tickers/s]

ca: HTTP Error 404: Not Found



Download progress::  15%|████████▌                                               | 85/560 [01:01<07:26,  1.06tickers/s]

cab: HTTP Error 404: Not Found



Download progress::  16%|████████▊                                               | 88/560 [01:04<06:08,  1.28tickers/s]

cam: HTTP Error 404: Not Found



Download progress::  16%|█████████▏                                              | 92/560 [01:07<04:51,  1.61tickers/s]

cbg: HTTP Error 404: Not Found



Download progress::  17%|█████████▎                                              | 93/560 [01:09<08:28,  1.09s/tickers]

cbs: HTTP Error 404: Not Found



Download progress::  17%|█████████▍                                              | 94/560 [01:12<11:10,  1.44s/tickers]

cce: HTTP Error 404: Not Found



Download progress::  17%|█████████▋                                              | 97/560 [01:14<07:11,  1.07tickers/s]

celg: HTTP Error 404: Not Found



Download progress::  18%|█████████▊                                              | 98/560 [01:16<10:00,  1.30s/tickers]

cern: HTTP Error 404: Not Found



Download progress::  18%|█████████▊                                             | 100/560 [01:19<09:14,  1.21s/tickers]

cfn: HTTP Error 404: Not Found



Download progress::  21%|███████████▋                                           | 119/560 [01:26<01:50,  4.00tickers/s]

cog: HTTP Error 404: Not Found



Download progress::  21%|███████████▊                                           | 120/560 [01:28<06:08,  1.19tickers/s]

coh: HTTP Error 404: Not Found



Download progress::  22%|███████████▉                                           | 121/560 [01:31<09:11,  1.26s/tickers]

col: HTTP Error 404: Not Found



Download progress::  22%|████████████▏                                          | 124/560 [01:34<07:05,  1.03tickers/s]

cov: HTTP Error 404: Not Found



Download progress::  23%|████████████▍                                          | 127/560 [01:36<05:29,  1.31tickers/s]

csc: HTTP Error 404: Not Found



Download progress::  23%|████████████▊                                          | 131/560 [01:39<04:10,  1.71tickers/s]

ctl: HTTP Error 404: Not Found



Download progress::  24%|█████████████▏                                         | 134/560 [01:42<04:56,  1.44tickers/s]

cvc: HTTP Error 404: Not Found



Download progress::  26%|██████████████                                         | 143/560 [01:46<02:28,  2.82tickers/s]

df: HTTP Error 404: Not Found



Download progress::  27%|██████████████▋                                        | 150/560 [01:51<02:38,  2.58tickers/s]

disca: HTTP Error 404: Not Found



Download progress::  27%|██████████████▉                                        | 152/560 [01:53<04:56,  1.37tickers/s]

dlph: HTTP Error 404: Not Found



Download progress::  28%|███████████████▎                                       | 156/560 [01:56<03:54,  1.72tickers/s]

dnr: HTTP Error 404: Not Found



Download progress::  29%|███████████████▋                                       | 160/560 [01:59<03:43,  1.79tickers/s]

dps: HTTP Error 404: Not Found



Download progress::  29%|███████████████▉                                       | 162/560 [02:02<05:27,  1.21tickers/s]

dsw: HTTP Error 404: Not Found



Download progress::  29%|████████████████                                       | 164/560 [02:04<06:19,  1.04tickers/s]

dtv: HTTP Error 404: Not Found



Download progress::  31%|████████████████▊                                      | 171/560 [02:08<02:46,  2.34tickers/s]

ecyt: HTTP Error 404: Not Found



Download progress::  31%|█████████████████▎                                     | 176/560 [02:12<02:37,  2.44tickers/s]

emc: HTTP Error 404: Not Found



Download progress::  32%|█████████████████▉                                     | 182/560 [02:15<02:28,  2.55tickers/s]

esrx: HTTP Error 404: Not Found



Download progress::  33%|█████████████████▉                                     | 183/560 [02:17<05:55,  1.06tickers/s]

esv: HTTP Error 404: Not Found



Download progress::  33%|██████████████████                                     | 184/560 [02:20<08:18,  1.33s/tickers]

etfc: HTTP Error 404: Not Found



Download progress::  35%|███████████████████                                    | 194/560 [02:25<02:01,  3.00tickers/s]

fb: HTTP Error 404: Not Found



Download progress::  35%|███████████████████▎                                   | 196/560 [02:27<04:17,  1.41tickers/s]

fdo: HTTP Error 404: Not Found



Download progress::  37%|████████████████████▏                                  | 205/560 [02:32<02:03,  2.87tickers/s]

flir: HTTP Error 404: Not Found



Download progress::  38%|████████████████████▋                                  | 211/560 [02:35<02:00,  2.89tickers/s]

frx: HTTP Error 404: Not Found



Download progress::  38%|█████████████████████                                  | 214/560 [02:38<03:27,  1.67tickers/s]

ftr: HTTP Error 404: Not Found



Download progress::  38%|█████████████████████                                  | 215/560 [02:40<06:09,  1.07s/tickers]

gas: HTTP Error 404: Not Found



Download progress::  40%|██████████████████████                                 | 224/560 [02:44<02:01,  2.78tickers/s]

gmcr: HTTP Error 404: Not Found



Download progress::  42%|███████████████████████▎                               | 237/560 [02:50<01:22,  3.91tickers/s]

har: HTTP Error 404: Not Found



Download progress::  43%|███████████████████████▌                               | 240/560 [02:52<02:54,  1.83tickers/s]

hcbk: HTTP Error 404: Not Found



Download progress::  43%|███████████████████████▋                               | 241/560 [02:55<05:33,  1.05s/tickers]

hcn: HTTP Error 404: Not Found



Download progress::  44%|████████████████████████▎                              | 248/560 [02:58<02:05,  2.49tickers/s]

hot: HTTP Error 404: Not Found



Download progress::  45%|████████████████████████▉                              | 254/560 [03:02<02:09,  2.36tickers/s]

hrs: HTTP Error 404: Not Found



Download progress::  46%|█████████████████████████                              | 255/560 [03:05<05:06,  1.01s/tickers]

hsp: HTTP Error 404: Not Found



Download progress::  49%|███████████████████████████                            | 275/560 [03:12<01:19,  3.61tickers/s]

jcp: HTTP Error 404: Not Found



Download progress::  49%|███████████████████████████                            | 276/560 [03:14<04:05,  1.16tickers/s]

jdsu: HTTP Error 404: Not Found



Download progress::  49%|███████████████████████████▏                           | 277/560 [03:16<05:59,  1.27s/tickers]

jec: HTTP Error 404: Not Found



Download progress::  50%|███████████████████████████▌                           | 280/560 [03:19<04:26,  1.05tickers/s]

josb: HTTP Error 404: Not Found



Download progress::  50%|███████████████████████████▌                           | 281/560 [03:21<06:09,  1.32s/tickers]

joy: HTTP Error 404: Not Found



Download progress::  52%|████████████████████████████▊                          | 293/560 [03:27<01:27,  3.05tickers/s]

krft: HTTP Error 404: Not Found



Download progress::  53%|████████████████████████████▉                          | 295/560 [03:29<03:18,  1.34tickers/s]

ksu: HTTP Error 404: Not Found



Download progress::  54%|█████████████████████████████▋                         | 302/560 [03:33<01:42,  2.51tickers/s]

lltc: HTTP Error 404: Not Found



Download progress::  54%|█████████████████████████████▊                         | 304/560 [03:36<03:05,  1.38tickers/s]

lm: HTTP Error 404: Not Found



Download progress::  55%|██████████████████████████████▏                        | 307/560 [03:38<02:53,  1.46tickers/s]

lo: HTTP Error 404: Not Found



Download progress::  56%|██████████████████████████████▌                        | 311/560 [03:42<02:31,  1.64tickers/s]

ltd: HTTP Error 404: Not Found



Download progress::  56%|██████████████████████████████▋                        | 312/560 [03:44<04:33,  1.10s/tickers]

luk: HTTP Error 404: Not Found



Download progress::  58%|███████████████████████████████▉                       | 325/560 [03:49<01:09,  3.37tickers/s]

mcp: HTTP Error 404: Not Found



Download progress::  59%|████████████████████████████████▍                      | 330/560 [03:53<01:42,  2.24tickers/s]

mhfi: HTTP Error 404: Not Found



Download progress::  59%|████████████████████████████████▌                      | 331/560 [03:55<03:49,  1.00s/tickers]

mjn: HTTP Error 404: Not Found



Download progress::  60%|█████████████████████████████████                      | 337/560 [03:59<01:54,  1.95tickers/s]

molx: HTTP Error 404: Not Found



Download progress::  62%|██████████████████████████████████▎                    | 349/560 [04:04<00:56,  3.74tickers/s]

mwv: HTTP Error 404: Not Found



Download progress::  62%|██████████████████████████████████▍                    | 350/560 [04:06<02:59,  1.17tickers/s]

myl: HTTP Error 404: Not Found



Download progress::  63%|██████████████████████████████████▍                    | 351/560 [04:09<04:25,  1.27s/tickers]

nbl: HTTP Error 404: Not Found



Download progress::  64%|███████████████████████████████████▏                   | 358/560 [04:12<01:22,  2.45tickers/s]

nfx: HTTP Error 404: Not Found



Download progress::  66%|████████████████████████████████████▏                  | 369/560 [04:17<00:57,  3.35tickers/s]

ntri: HTTP Error 404: Not Found



Download progress::  67%|█████████████████████████████████████                  | 377/560 [04:21<01:01,  2.99tickers/s]

nyx: HTTP Error 404: Not Found



Download progress::  69%|█████████████████████████████████████▋                 | 384/560 [04:25<00:54,  3.20tickers/s]

p: HTTP Error 404: Not Found



Download progress::  69%|█████████████████████████████████████▉                 | 386/560 [04:28<02:05,  1.39tickers/s]

pbct: HTTP Error 404: Not Found



Download progress::  70%|██████████████████████████████████████▎                | 390/560 [04:31<01:37,  1.74tickers/s]

pcl: HTTP Error 404: Not Found



Download progress::  70%|██████████████████████████████████████▍                | 391/560 [04:33<03:02,  1.08s/tickers]

pcln: HTTP Error 404: Not Found



Download progress::  70%|██████████████████████████████████████▌                | 392/560 [04:35<04:01,  1.44s/tickers]

pcp: HTTP Error 404: Not Found



Download progress::  71%|██████████████████████████████████████▉                | 396/560 [04:38<01:59,  1.38tickers/s]

petm: HTTP Error 404: Not Found



Download progress::  73%|████████████████████████████████████████▎              | 411/560 [04:44<00:38,  3.91tickers/s]

pom: HTTP Error 404: Not Found



Download progress::  75%|█████████████████████████████████████████              | 418/560 [04:48<00:44,  3.20tickers/s]

pvtb: HTTP Error 404: Not Found



Download progress::  76%|█████████████████████████████████████████▋             | 424/560 [04:51<00:47,  2.86tickers/s]

qep: HTTP Error 404: Not Found



Download progress::  76%|█████████████████████████████████████████▊             | 426/560 [04:54<01:37,  1.37tickers/s]

rai: HTTP Error 404: Not Found



Download progress::  76%|█████████████████████████████████████████▉             | 427/560 [04:56<02:35,  1.17s/tickers]

rdc: HTTP Error 404: Not Found



Download progress::  77%|██████████████████████████████████████████▏            | 430/560 [04:59<01:52,  1.16tickers/s]

rht: HTTP Error 404: Not Found



Download progress::  78%|██████████████████████████████████████████▉            | 437/560 [05:03<00:47,  2.57tickers/s]

rsh: HTTP Error 404: Not Found



Download progress::  78%|███████████████████████████████████████████            | 438/560 [05:05<01:54,  1.07tickers/s]

rtn: HTTP Error 404: Not Found



Download progress::  79%|███████████████████████████████████████████▍           | 442/560 [05:08<01:08,  1.72tickers/s]

scg: HTTP Error 404: Not Found



Download progress::  80%|████████████████████████████████████████████           | 448/560 [05:12<00:49,  2.26tickers/s]

sfly: HTTP Error 404: Not Found



Download progress::  80%|████████████████████████████████████████████           | 449/560 [05:14<01:47,  1.03tickers/s]

shld: HTTP Error 404: Not Found



Download progress::  81%|████████████████████████████████████████████▎          | 451/560 [05:16<01:50,  1.01s/tickers]

sial: HTTP Error 404: Not Found



Download progress::  81%|████████████████████████████████████████████▌          | 454/560 [05:19<01:29,  1.18tickers/s]

sks: HTTP Error 404: Not Found



Download progress::  82%|████████████████████████████████████████████▉          | 458/560 [05:22<01:04,  1.58tickers/s]

sndk: HTTP Error 404: Not Found



Download progress::  82%|█████████████████████████████████████████████          | 459/560 [05:25<01:50,  1.10s/tickers]

sne: HTTP Error 404: Not Found



Download progress::  82%|█████████████████████████████████████████████▏         | 460/560 [05:27<02:22,  1.42s/tickers]

sni: HTTP Error 404: Not Found



Download progress::  82%|█████████████████████████████████████████████▎         | 461/560 [05:29<02:44,  1.66s/tickers]

snts: HTTP Error 404: Not Found



Download progress::  83%|█████████████████████████████████████████████▌         | 464/560 [05:32<01:39,  1.04s/tickers]

spls: HTTP Error 404: Not Found



Download progress::  83%|█████████████████████████████████████████████▊         | 467/560 [05:35<01:17,  1.19tickers/s]

sti: HTTP Error 404: Not Found



Download progress::  84%|█████████████████████████████████████████████▉         | 468/560 [05:37<01:56,  1.27s/tickers]

stj: HTTP Error 404: Not Found



Download progress::  85%|██████████████████████████████████████████████▌        | 474/560 [05:40<00:39,  2.17tickers/s]

swy: HTTP Error 404: Not Found



Download progress::  85%|██████████████████████████████████████████████▊        | 476/560 [05:43<01:04,  1.30tickers/s]

symc: HTTP Error 404: Not Found



Download progress::  86%|███████████████████████████████████████████████▏       | 481/560 [05:46<00:39,  1.98tickers/s]

te: HTTP Error 404: Not Found



Download progress::  86%|███████████████████████████████████████████████▎       | 482/560 [05:48<01:20,  1.03s/tickers]

teg: HTTP Error 404: Not Found



Download progress::  87%|███████████████████████████████████████████████▊       | 487/560 [05:52<00:38,  1.91tickers/s]

tibx: HTTP Error 404: Not Found



Download progress::  87%|███████████████████████████████████████████████▉       | 488/560 [05:54<01:13,  1.02s/tickers]

tif: HTTP Error 404: Not Found



Download progress::  88%|████████████████████████████████████████████████▏      | 491/560 [05:57<00:57,  1.21tickers/s]

tmk: HTTP Error 404: Not Found



Download progress::  89%|████████████████████████████████████████████████▉      | 498/560 [06:00<00:23,  2.69tickers/s]

tso: HTTP Error 404: Not Found



Download progress::  89%|█████████████████████████████████████████████████      | 499/560 [06:03<00:57,  1.06tickers/s]

tss: HTTP Error 404: Not Found



Download progress::  89%|█████████████████████████████████████████████████      | 500/560 [06:05<01:20,  1.35s/tickers]

twc: HTTP Error 404: Not Found



Download progress::  89%|█████████████████████████████████████████████████▏     | 501/560 [06:07<01:34,  1.61s/tickers]

twx: HTTP Error 404: Not Found



Download progress::  90%|█████████████████████████████████████████████████▌     | 504/560 [06:10<00:56,  1.01s/tickers]

tyc: HTTP Error 404: Not Found



Download progress::  91%|██████████████████████████████████████████████████▎    | 512/560 [06:14<00:16,  2.94tickers/s]

utx: HTTP Error 404: Not Found



Download progress::  92%|██████████████████████████████████████████████████▌    | 515/560 [06:17<00:26,  1.68tickers/s]

var: HTTP Error 404: Not Found



Download progress::  92%|██████████████████████████████████████████████████▊    | 517/560 [06:19<00:35,  1.23tickers/s]

viab: HTTP Error 404: Not Found



Download progress::  92%|██████████████████████████████████████████████████▉    | 518/560 [06:21<00:52,  1.25s/tickers]

vitc: HTTP Error 404: Not Found



Download progress::  93%|███████████████████████████████████████████████████▎   | 522/560 [06:24<00:26,  1.42tickers/s]

vprt: HTTP Error 404: Not Found



Download progress::  94%|███████████████████████████████████████████████████▋   | 526/560 [06:27<00:18,  1.85tickers/s]

wag: HTTP Error 404: Not Found



Download progress::  95%|████████████████████████████████████████████████████▏  | 531/560 [06:31<00:13,  2.16tickers/s]

wfm: HTTP Error 404: Not Found



Download progress::  95%|████████████████████████████████████████████████████▍  | 534/560 [06:33<00:15,  1.66tickers/s]

win: HTTP Error 404: Not Found



Download progress::  96%|████████████████████████████████████████████████████▌  | 535/560 [06:36<00:27,  1.09s/tickers]

wlp: HTTP Error 404: Not Found



Download progress::  96%|████████████████████████████████████████████████████▉  | 539/560 [06:39<00:14,  1.47tickers/s]

wpo: HTTP Error 404: Not Found



Download progress::  96%|█████████████████████████████████████████████████████  | 540/560 [06:41<00:23,  1.17s/tickers]

wpx: HTTP Error 404: Not Found



Download progress::  97%|█████████████████████████████████████████████████████▍ | 544/560 [06:44<00:10,  1.47tickers/s]

wyn: HTTP Error 404: Not Found



Download progress::  98%|█████████████████████████████████████████████████████▉ | 549/560 [06:47<00:05,  2.06tickers/s]

xlnx: HTTP Error 404: Not Found



Download progress::  99%|██████████████████████████████████████████████████████▍| 554/560 [06:51<00:02,  2.12tickers/s]

yhoo: HTTP Error 404: Not Found



Download progress::  99%|██████████████████████████████████████████████████████▋| 557/560 [06:54<00:01,  1.54tickers/s]

zlc: HTTP Error 404: Not Found



Download progress:: 100%|██████████████████████████████████████████████████████▊| 558/560 [06:56<00:02,  1.10s/tickers]

zmh: HTTP Error 404: Not Found



Download progress:: 100%|██████████████████████████████████████████████████████▉| 559/560 [06:58<00:01,  1.44s/tickers]

znga: HTTP Error 404: Not Found



Download progress:: 100%|███████████████████████████████████████████████████████| 560/560 [07:00<00:00,  1.33tickers/s]


In [24]:
gather=['debtToEquity',
            'Trailing P/E', ## Added a custom calculation, look down
            'priceToBook',
            'profitMargins',
            'operatingMargins',
            'returnOnAssets',
            'returnOnEquity',
            'revenuePerShare',
            'Market Cap', ## Leaving this here to avoid changing all the numbering for the list
            'enterpriseValue',
            'forwardPE',
            'pegRatio',
            'enterpriseToRevenue',
            'enterpriseToEbitda',
            'totalRevenue',
            'grossProfit',
            'ebitda',
            'netIncomeToCommon',
            'trailingEps',
            'earningsGrowth',
            'revenueGrowth',
            'totalCash',
            'totalCashPerShare',
            'totalDebt',
            'currentRatio',
            'bookValue',
            'operatingCashflow',
            'beta',
            'heldPercentInsiders',
            'heldPercentInstitutions',
            'sharesShort',
            'shortRatio',
            'shortPercentOfFloat',
            'sharesShortPriorMonth',
			'currentPrice',
			'sharesOutstanding']

In [25]:
df = pd.DataFrame(columns = ['Date',
'Unix',
'Ticker',
'Price',
'stock_p_change',
'SP500',
'SP500_p_change',
##############
'DE Ratio',
'Trailing P/E',
'Price/Book',
'Profit Margin',
'Operating Margin',
'Return on Assets',
'Return on Equity',
'Revenue Per Share',
'Market Cap',
'Enterprise Value',
'Forward P/E',
'PEG Ratio',
'Enterprise Value/Revenue',
'Enterprise Value/EBITDA',
'Revenue',
'Gross Profit',
'EBITDA',
'Net Income Avl to Common ',
'Diluted EPS',
'Earnings Growth',
'Revenue Growth',
'Total Cash',
'Total Cash Per Share',
'Total Debt',
'Current Ratio',
'Book Value Per Share',
'Cash Flow',
'Beta',
'Held by Insiders',
'Held by Institutions',
'Shares Short (as of',
'Short Ratio',
'Short % of Float',
'Shares Short (prior ',
'Current Price',
'Shares Outstanding',                              
])

In [26]:
tickerfile_list = os.listdir("forward_json/")

In [27]:
 for tickerfile in tqdm(tickerfile_list, desc="Parsing progress:", unit="tickers"):
        ticker = tickerfile.split(".json")[0].upper()
        source = open(f"forward_json/{tickerfile}").read()
        # Remove commas from the html to make parsing easier.
        source = source.replace(",", "")

        # Regex search for the different variables in the html file, then append to value_list
        value_list = []
        for variable in gather:
            try:    

                regex = re.escape(variable) + r'.*?"(-?\d{1,8}\.\d{1,8}M?B?K?|N/A)%?'
                value = re.search(regex, source)
                value = value.group(1)

                if "B" in value:
                    value = float(value.replace("B",'')) * 1000000000

                elif "M" in value:
                    value = float(value.replace("M",'')) * 1000000

                elif "K" in value:
                    value = float(value.replace("K",'')) * 1000

                value_list.append(value)

            except Exception as e:
                value = "N/A"
                value_list.append(value)

        if value_list.count("N/A") > 15:
		        pass

        else:

            df = df.append({'Date':0,
		                        'Unix':0,
		                        'Ticker':ticker, ## Getting Only The Stock Name, not 'json'
		                        'Price':0,
		                        'stock_p_change':0,
		                        'SP500':0,
                                'SP500_p_change':0,
		                        'DE Ratio':value_list[0],
		                        'Trailing P/E':float(value_list[34]) / float(value_list[18]) ,
		                        'Price/Book':value_list[2],
		                        'Profit Margin':value_list[3],
		                        'Operating Margin':value_list[4],
		                        'Return on Assets':value_list[5],
		                        'Return on Equity':value_list[6],
		                        'Revenue Per Share':value_list[7],
		                        'Market Cap': float(value_list[35]) * float(value_list[34]) , #Multiplying Shares Outstanding * Current Price to determine Market Cap
		                        'Enterprise Value':value_list[9],
		                        'Forward P/E':value_list[10],
		                        'PEG Ratio':value_list[11],
		                        'Enterprise Value/Revenue':value_list[12],
		                        'Enterprise Value/EBITDA':value_list[13],
		                        'Revenue':value_list[14],
		                        'Gross Profit':value_list[15],
		                        'EBITDA':value_list[16],
		                        'Net Income Avl to Common ':value_list[17],
		                        'Diluted EPS':value_list[18],
		                        'Earnings Growth':value_list[19],
		                        'Revenue Growth':value_list[20],
		                        'Total Cash':value_list[21],
		                        'Total Cash Per Share':value_list[22],
		                        'Total Debt':value_list[23],
		                        'Current Ratio':value_list[24],
		                        'Book Value Per Share':value_list[25],
		                        'Cash Flow':value_list[26],
		                        'Beta':value_list[27],
		                        'Held by Insiders':value_list[28],
		                        'Held by Institutions':value_list[29],
		                        'Shares Short (as of':value_list[30],
		                        'Short Ratio':value_list[31],
		                        'Short % of Float':value_list[32],
		                        'Shares Short (prior ':value_list[33],
                                'Current Price': value_list[34],
                                'Shares Outstanding': value_list[35]}, ignore_index = True)
df.to_csv("forward_sample.csv", index=False)

Parsing progress:: 100%|███████████████████████████████████████████████████████| 423/423 [00:03<00:00, 107.68tickers/s]


In [28]:
# The percentage by which a stock has to beat the S&P500 to be considered a 'buy'
OUTPERFORMANCE = 10


def build_data_set():
    """
    Reads the keystats.csv file and prepares it for scikit-learn
    :return: X_train and y_train numpy arrays
    """
    data_df = pd.read_csv("keystats.csv")
    data_df.drop_duplicates()
    data_df.dropna(inplace=True)
    data_df = data_df.reindex(np.random.permutation(data_df.index))
    features = data_df.columns[7:]
    X = np.array(data_df[features].values)
    X_train = data_df[features].values
    # Generate the labels: '1' if a stock beats the S&P500 by more than 10%, else '0'.
    y_train = list(
        status_calc(
            data_df["stock_p_change"],
            data_df["SP500_p_change"],
            OUTPERFORMANCE,
        )
    )

    return X_train, y_train

In [29]:
X_train, y_train = build_data_set()

clf = RandomForestClassifier(n_estimators=100, random_state=0, criterion = "log_loss")
clf.fit(X_train, y_train)

    # Now we get the actual data from which we want to generate predictions.
data = pd.read_csv("forward_sample.csv")
data.dropna(inplace=True)
data.drop_duplicates()
data= data.reindex(np.random.permutation(data.index))
features = data.columns[7:]
X_test = data[features].values
z = data["Ticker"].values

    # Get the predicted tickers
y_pred = clf.predict(X_test)
if sum(y_pred) == 0:
    print("No stocks predicted!")
else:
    invest_list = z[y_pred].tolist()
    pList = [*set(invest_list)]
    print(
        f"{len(pList)} stocks predicted to outperform the S&P500 by more than {OUTPERFORMANCE}%:"
    )
    print(" ".join(pList))

102 stocks predicted to outperform the S&P500 by more than 10%:
S AZO BBBY IGT ORLY DKS NUE BEAM FOSL NLY AEO HCP PBI GES WHR PLL SWN LLL BLK GPS GCI WU ALTR GME RL AIV BA PX TRIP APA HTZ HUM MAC BIG SAI LRCX WYNN ADT M CB OI BWA NILE CLDX CRM WTW RHI BTU NOK R WGO SD CNX HOV X GRPN PETS MAS CAMP AA GNW QDEL AN GS YUM CAH AIZ MON CLF HPQ DELL JWN CMI XRX CHK DDS CCL LH LIFE NE NBR TDC KSS INTU MTB EXPR ABC SCHL MSI SLM FTI BIIB GT MOS DO ANF PH UA AMP XL DLX SEE


In [30]:
OUTPERFORMANCE = 10
data_df = pd.read_csv("keystats.csv")
data_df.drop_duplicates()
data_df.dropna(inplace=True)
data_df = data_df.reindex(np.random.permutation(data_df.index))
features = data_df.columns[7:]
X = np.array(data_df[features].values)
sc = MinMaxScaler(feature_range=(0,1))
X = sc.fit_transform(X)
y_train = np.array(
    status_calc(
        data_df["stock_p_change"],
        data_df["SP500_p_change"],
        OUTPERFORMANCE,
    )
)
y_train = y_train.astype(int)
model = Sequential()
model.add(LSTM(units=50,return_sequences=True,input_shape=(X.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam',loss='mean_squared_error')
model.fit(X,y_train,epochs=100,batch_size=32)
y_pred = model.predict(X)
display(y_pred)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 99/100
Epoch 100/100


array([[0.33777282],
       [0.44410145],
       [0.3812386 ],
       ...,
       [0.08561456],
       [0.6341475 ],
       [0.34800845]], dtype=float32)