## This notebook is for pulling the data to make a prediction for a particular stock ticker

Note: We will have to apply the transformation to add the particular columns needed.

In [1]:
%config Completer.use_jedi = False

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from joblib import load

In [3]:
def HighLowVolatility(df):
    """
    Define High-Low Volatility as (High-Low)/High*100 (percentage)
    And add this column.
    """
    
    df['HighLowVolatility'] = 0 # Placeholder
    
    for i in range(len(df)):
        df.loc[i, 'HighLowVolatility'] = (df.loc[i, 'High'] - df.loc[i, 'Low'])/df.loc[i, 'High']
        
    return df

def HighOpenVolatility(df):
    """
    The difference of a day's high-open proportion.
    """
    
    df['HighOpenVolatility'] = (df['High'] - df['Open'])/df['Open']
        
    return df
        

def AvgHighLowVolatility(df, num_days=10):
    """
    Calculate the average HighLowVolatility for the past
    num_days days. Can specify num_days, which I will default to 10.
    """
    df[f'AvgHighLowVolatility_{num_days}'] = 0 # Placeholder
    
    for i in range(num_days, df.shape[0]):
        prev_high_low_vols = df.loc[i-num_days:i, 'HighLowVolatility']
        df.loc[i, f'AvgHighLowVolatility_{num_days}'] = prev_high_low_vols.mean()        
    return df

def AvgHighLowVolatilityRecursion(df, list_of_num_days):
    """
    Run AvgHighLowVolatility
    for all of the given days in list_of_num_days.
    """
    
    for num_days in list_of_num_days:
        df = AvgHighLowVolatility(df, num_days)
        
    return df  



list_of_num_days = [3, 7, 10]

def add_columns(df):
    df = HighLowVolatility(df)
    df = HighOpenVolatility(df)
    df = AvgHighLowVolatilityRecursion(df, list_of_num_days)
    
    return df

In [4]:
# Pull the data from the website

def get_ticker_data(ticker):
    columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
    
    temp_df = pd.DataFrame(columns=columns)
    
    url = f'https://finance.yahoo.com/quote/{ticker}/history?p={ticker}'
    
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'lxml')
    tables = soup.find_all('tr', attrs={'class': "BdT Bdc($seperatorColor) Ta(end) Fz(s) Whs(nw)"})
    
    for i in range(10, -1, -1):
        curr_table = tables[i]
        curr_data = [x.text for x in curr_table]
        if '-' in curr_data:
            return
        if len(curr_data) != 7:
            return                        
        curr_data.pop(5)
        
        # Change str values to floats/ints.
        for j in range(1,5):

            curr_data[j] = float(curr_data[j].replace(',', ''))

        curr_data[5] = curr_data[5].replace(',', '')
        try:
            curr_data[5] = int(curr_data[5])
        except ValueError:
            continue
            
        temp_df.loc[temp_df.shape[0]] = curr_data
        
    temp_df = temp_df.iloc[::-1]
    
    temp_df.reset_index(inplace=True, drop=True)
    
    return temp_df



In [5]:
def combine_for_results(ticker, prob=0.535):
    """
    Because of how the model works, it is advised
    to not use a probability exceeding 0.55, as there
    are not many tickers that output this probability.
    """
    df = get_ticker_data(ticker) # Got the ticker data
    try:
        new_df = add_columns(df).drop('Date', axis=1) # Add the cols and drop the date

        data_to_plug_in = new_df.loc[new_df.shape[0]-1] # get the correct data

        pred_prob = model.predict_proba([data_to_plug_in])[0][1]

        if pred_prob < prob:
            pred = 'No'
        else:
            pred = 'Yes'

        return pred, pred_prob
    except (TypeError, AttributeError):
        return 0, 0
        

In [6]:
health_care_tickers = pd.read_csv('HealthCareTickers.csv')

list_of_health_care_tickers = health_care_tickers['0'].tolist()

In [7]:
model = load('FinalStockModelHealth.joblib')

In [8]:
list_of_ticks_to_buy = []

for tick in list_of_health_care_tickers:
    pred, prob_yes = combine_for_results(tick)
    if pred == 'Yes':
        list_of_ticks_to_buy.append((tick, prob_yes))
        
sorted_list = sorted(list_of_ticks_to_buy, key=lambda x: x[1], reverse=True)
sorted_list_series = pd.Series(data=sorted_list)

sorted_list_series.to_csv('March3BuysHealthCare.csv')