In [15]:
import yfinance as yf
import YFnews_scraper

In [16]:
def downloader(ticker, time):
    history = yf.download(tickers = ticker,  # list of tickers
                    period = time,         # time period
                    interval = "1d",       # trading interval
                    prepost = False,       # download pre/post market hours data?
                    repair = True)
    return history

def SMA(df):
    # 200 SMA
    df['sma200'] = df['Close'].rolling(200).mean()
    # 90 SMA
    df['sma90'] = df['Close'].rolling(90).mean()
    # 30 SMA
    df['sma30'] = df['Close'].rolling(30).mean()
 
    return df

def MACD(df):
    k = df['Close'].ewm(span=12, adjust=False, min_periods=12).mean()
    # Get the 12-day EMA of the closing price
    d = df['Close'].ewm(span=26, adjust=False, min_periods=26).mean()
    # Subtract the 26-day EMA from the 12-Day EMA to get the MACD
    macd = k - d
    # Get the 9-Day EMA of the MACD for the Trigger line
    macd_s = macd.ewm(span=9, adjust=False, min_periods=9).mean()
    # Calculate the difference between the MACD - Trigger for the Convergence/Divergence value
    macd_h = macd - macd_s
    # Add all of our new values for the MACD to the dataframe
    df['macd'] = df.index.map(macd)
    df['macd_h'] = df.index.map(macd_h)
    df['macd_s'] = df.index.map(macd_s)

    return df

def RSI(df):
    delta = df['Close'].diff()
    up = delta.clip(lower=0)
    down = -1*delta.clip(upper=0)
    ema_up = up.ewm(com=13, adjust=False).mean()
    ema_down = down.ewm(com=13, adjust=False).mean()
    rs = ema_up/ema_down
    df['rsi'] = 100 - (100/(1 + rs))

    return df

def SMA_VOLUME(df):
    # 90 SMA
    df['volume90'] = df['Volume'].rolling(90).mean()

    return df

def load_dataset(ticker, time):
    df = downloader(ticker, time)
    # apply naming
    df['ticker'] = ticker
    # calculate SMAs
    SMA(df)
    # calculate MACD
    MACD(df)
    # calculate RSI
    RSI(df)
    # calculate SMA_Volume
    SMA_VOLUME(df)
    df = df.sort_values(by='Date')
    
    return df

In [17]:
from bs4 import BeautifulSoup
import requests
from tensorflow import keras
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

def scrapeNews():
    '''
    Description: Funtion designed to gather all the news sources when called. 
    Inputs:
        -  
    Outputs:
        - Dictionary of current news sources by number and title w/ paragraph:
            {1: [title, summary], 2: [title2, summary2]}
    '''

    # Create custom URL to send to browser
    url = "https://finance.yahoo.com/news/"

    # Get HTML elements in text format
    html_content = requests.get(url).text

    # Parse the html content
    soup = BeautifulSoup(html_content, "lxml")
    # Return list of static news & ad results 
    all_results = soup.findAll("li", attrs={"class" : "js-stream-content Pos(r)"})

    tokens = []

    for i in range(len(all_results)):
        title_begin_index = str(all_results[i]).find('<u class="StretchedBox"></u>')
        title_substring = str(all_results[i])[title_begin_index:]
        title_end_index = title_substring.find('</a>')
        title = title_substring[28:title_end_index]

        summary_search = title_substring[title_end_index+12:]
        summary_begin_index = summary_search.find('">')
        summary_substring = summary_search[summary_begin_index:]
        summary_end_index = summary_substring.find('</p>')
        summary = summary_substring[2:summary_end_index]

        if len(title) > 0:
            tokens.append(title + " " + summary)

    return tokens

def get_sequences(texts):
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(texts)

    sequences = tokenizer.texts_to_sequences(texts)
    #print("Vocab length", len(tokenizer.word_index) + 1)

    # Need to pad entirely on 53 since that is what the data was trained on
    sequences = pad_sequences(sequences, maxlen = 53, padding = 'post')

    return sequences

In [18]:
def trackNews(ticker, name):
    tokens = scrapeNews()
    model = keras.models.load_model('news_sentiment.h5')
    sequences = get_sequences(tokens)
    sentiments = model.predict(sequences)

    positives = 0
    mentions = 0
    for i in range(len(tokens)):
        # Check for stock mentions
        if name.lower() in tokens[i].lower() or ticker.lower() in tokens[i].lower():
            if np.argmax(sentiments[i]) == 0:
                positives += 1
            else:
                positives -= 1
            mentions += 1

        #print(tokens[i], np.argmax(sentiments[i]), '\n')
        
    return positives, mentions

In [19]:
aapl = load_dataset("AAPL", "1y")
aapl

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,ticker,sma200,sma90,sma30,macd,macd_h,macd_s,rsi,volume90
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2022-05-09,154.929993,155.830002,151.490005,152.059998,151.366486,131577900,AAPL,,,,,,,,
2022-05-10,155.520004,156.740005,152.929993,154.509995,153.805313,115366700,AAPL,,,,,,,100.000000,
2022-05-11,153.500000,155.449997,145.809998,146.500000,145.831848,142689800,AAPL,,,,,,,79.904657,
2022-05-12,142.770004,146.199997,138.800003,142.559998,141.909821,182602000,AAPL,,,,,,,72.217163,
2022-05-13,144.589996,148.100006,143.110001,147.110001,146.439056,113990900,AAPL,,,,,,,75.186141,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-02,170.089996,170.350006,167.539993,168.539993,168.539993,48425700,AAPL,151.093700,150.414666,163.808333,2.647390,-0.007723,2.655113,62.434704,6.529450e+07
2023-05-03,169.500000,170.919998,167.160004,167.449997,167.449997,65136000,AAPL,151.195599,150.770222,164.080666,2.511362,-0.115001,2.626363,59.018937,6.506348e+07
2023-05-04,164.889999,167.039993,164.309998,165.789993,165.789993,81235400,AAPL,151.269549,151.143111,164.345999,2.243745,-0.306094,2.549839,54.159299,6.510107e+07
2023-05-05,170.979996,174.300003,170.759995,173.570007,173.570007,113316400,AAPL,151.372200,151.606555,164.834000,2.629133,0.063435,2.565698,67.617322,6.565109e+07
