In [1]:
# Initial imports
import panel as pn
import os
import requests
import pandas as pd
import datetime as dt
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from yahoofinancials import YahooFinancials
import json
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import numpy as np
pn.extension("plotly")
from panel.interact import interact
import hvplot.pandas
import plotly.express as px


In [34]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API"])
#api_newsdata = NewsDataApiClient(apikey="newsdata")

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

In [3]:
# Loading Functions
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword,fdate,edate):
    all_headlines = []
    all_contents = []
    all_dates = [] 
    # string conversion of dates passed in to function
    fdate_str=str(fdate.strftime("%Y-%m-%d"))
    edate_str=str(edate.strftime("%Y-%m-%d"))

    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    #while date > end_date:
    print(f"retrieving news from: {fdate} - {edate}")
    articles = newsapi.get_everything(
        q=keyword,
        # from_param=str(date)[:10],
        # to=str(date)[:10],
        from_param=fdate_str,
        to=edate_str,
        language="en",
        sort_by="relevancy",
        page=1,
    )
    headlines = []
    contents = []
    for i in range(0, len(articles["articles"])):
        headlines.append(articles["articles"][i]["title"])
        contents.append(articles["articles"][i]["content"])
    all_headlines.append(headlines)
    all_contents.append(contents)
    all_dates.append(fdate)
    #date = date - timedelta(weeks=1)
    return all_headlines, all_dates, all_contents

# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(data,sdate):
    df=data.copy()
    sentiment = []
    sentiment_pos = []
    sentiment_neg = []
    for day in data:
        day_score = []
        day_positive = []
        day_negative = []
        for h in day:
            
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
                day_positive.append(sid.polarity_scores(h)["pos"])
                day_negative.append(sid.polarity_scores(h)["neg"])
        sentiment.append(sum(day_score) / len(day_score))
        sentiment_pos.append(sum(day_positive) / len(day_positive))
        sentiment_neg.append(sum(day_negative) / len(day_negative))
    d={"c0":sentiment,"p0":sentiment_pos,"n0":sentiment_neg,"Date":str(sdate.strftime("%Y-%m-%d"))}
    sentiment_df=pd.DataFrame(data=d).set_index("Date")
    return sentiment_df #, sentiment_pos, sentiment_neg

In [4]:
#call and set API from yahoo financials - crude oil price
yahoo_financials = YahooFinancials('CL=F')
crude_prices=(yahoo_financials.get_historical_price_data("2020-06-01", "2022-06-01", "weekly"))

In [5]:
#set json object and write crrude prices to json
json_object= json.dumps(crude_prices['CL=F']['prices'], indent = 4)
with open("sample.json", "w") as outfile:
    outfile.write(json_object)      

In [6]:
#read json
crude_prices = pd.read_json('sample.json')

In [7]:
#create dataframe for crude prices 
crude_prices_df = pd.DataFrame(crude_prices)
#crude_prices_df.head()

In [8]:
#drop unneeded columns 
crude_prices_df.drop(['date','high','low','open','adjclose'],axis=1, inplace = True)
#crude_prices_df.head()

In [9]:
#rename columns and set index of dataframe on date
crude_prices_df.rename(columns = {'close':'Crude Close', 'volume':'Crude Volume','formatted_date':'Date'}, inplace = True)
crude_prices_df.set_index('Date', inplace = True)
#crude_prices_df.head()

In [10]:
#call yahoo financial to get gold prices 
yahoo_financials = YahooFinancials('GC=F')
gold_prices=(yahoo_financials.get_historical_price_data("2020-06-01", "2022-06-01", "weekly"))
#print(gold_prices)

In [11]:
#set json object and write gold prices to json
json_object= json.dumps(gold_prices['GC=F']['prices'], indent = 4)
with open("sample.json", "w") as outfile:
    outfile.write(json_object)

In [12]:
#read json
gold_prices = pd.read_json('sample.json')

In [13]:
#convert gold prices to dataframe
gold_prices_df = pd.DataFrame(gold_prices)
#gold_prices_df.head()

In [14]:
#drop unneeded columns from the data frame
gold_prices_df.drop(['date','high','low','open','adjclose'],axis=1, inplace = True)
#gold_prices_df.head()

In [15]:
#rename columns and set index to date 
gold_prices_df.rename(columns = {'close':'Gold Close', 'volume':'Gold Volume','formatted_date':'Date'}, inplace = True)
gold_prices_df.set_index('Date', inplace = True)

In [16]:
#call api for SPY s and p 500 data
yahoo_financials = YahooFinancials('SPY')
SPY_prices=(yahoo_financials.get_historical_price_data("2020-06-01", "2022-06-01", "weekly"))
#print(SPY_prices)

In [17]:
#set json object and write data 
json_object= json.dumps(SPY_prices['SPY']['prices'], indent = 4)
with open("sample.json", "w") as outfile:
    outfile.write(json_object)

In [18]:
#read the json data
SPY_prices = pd.read_json('sample.json')

In [19]:
#convert json to a data frame
SPY_prices_df = pd.DataFrame(SPY_prices)
#SPY_prices_df.head()

In [20]:
#drop unneedeeed columns, rename columns and set index to date 
SPY_prices_df.drop(['date','high','low','open','adjclose'],axis=1, inplace = True)
SPY_prices_df.rename(columns = {'close':'SPY Close', 'formatted_date':'Date','volume':'SPY Volume'}, inplace = True)
SPY_prices_df.set_index('Date', inplace = True)

In [21]:
#call api and get bitcoin prices 
yahoo_financials = YahooFinancials('BTC-USD')
BTC_prices=(yahoo_financials.get_historical_price_data("2020-06-01", "2022-06-01", "weekly"))
#print(BTC_prices)

In [22]:
#set sson object and write data
json_object= json.dumps(BTC_prices['BTC-USD']['prices'], indent = 4)
with open("sample.json", "w") as outfile:
    outfile.write(json_object)

In [23]:
#read json data
BTC_prices = pd.read_json('sample.json')

In [24]:
#create Btc data frame
BTC_prices_df = pd.DataFrame(BTC_prices)
#BTC_prices_df.head()

In [25]:
#drop undeeded columns, rename columns set index of data frame to date
BTC_prices_df.drop(['date','high','low','open','adjclose'],axis=1, inplace = True)
BTC_prices_df.rename(columns = {'close':'BTC Close', 'volume':'BTC Volume','formatted_date':'Date'}, inplace = True)
BTC_prices_df.set_index('Date', inplace = True)

In [26]:
# call yahoo financial api for eth data
yahoo_financials = YahooFinancials('ETH-USD')
ETH_prices=(yahoo_financials.get_historical_price_data("2020-06-01", "2022-06-01", "weekly"))
#print(ETH_prices)

In [27]:
#set json obect write data to json
json_object= json.dumps(ETH_prices['ETH-USD']['prices'], indent = 4)
with open("sample.json", "w") as outfile:
    outfile.write(json_object)

In [28]:
#read json data
ETH_prices = pd.read_json('sample.json')

In [29]:
#create eath data frame
ETH_prices_df = pd.DataFrame(ETH_prices)
#ETH_prices_df.head()

In [30]:
#drop unneeded columns, rename columns, set index to date
ETH_prices_df.drop(['date','high','low','open','adjclose'],axis=1, inplace = True)
ETH_prices_df.rename(columns = {'close':'ETH Close', 'volume':'ETH Volume','formatted_date':'Date'}, inplace = True)
ETH_prices_df.set_index('Date', inplace = True)


In [31]:
#concatenate asset dataframes into one data frame
combined_asset_data = pd.concat([crude_prices_df, gold_prices_df, BTC_prices_df, ETH_prices_df,SPY_prices_df], axis="columns", join="inner")

# Adding Features to Predict On to determine if the Asset should move up or down in 2 periods
combined_asset_data[ 'SPY UP(2)' ] = combined_asset_data['SPY Close'].diff(periods=2).apply(lambda x: 1 if x <= 0 else 0)
combined_asset_data[ 'CRUDE UP(2)' ] = combined_asset_data['Crude Close'].diff(periods=2).apply(lambda x: 1 if x <= 0 else 0)
combined_asset_data[ 'GOLD UP(2)' ] = combined_asset_data['Gold Close'].diff(periods=2).apply(lambda x: 1 if x <= 0 else 0)
combined_asset_data[ 'BTC UP(2)' ] = combined_asset_data['BTC Close'].diff(periods=2).apply(lambda x: 1 if x <= 0 else 0)
combined_asset_data[ 'ETH UP(2)' ] = combined_asset_data['ETH Close'].diff(periods=2).apply(lambda x: 1 if x <= 0 else 0)

# Setting Index to datetime to assist with merging with sentiments 
combined_asset_data.index = pd.to_datetime(combined_asset_data.index)

In [32]:
combined_asset_data.tail(1).index.values

array(['2022-05-30T00:00:00.000000000'], dtype='datetime64[ns]')

In [37]:
sid = SentimentIntensityAnalyzer()

In [37]:
# sp_sentiment_df=pd.DataFrame()
# gold_sentiment_df=pd.DataFrame()
# crude_sentiment_df=pd.DataFrame()
# btc_sentiment_df=pd.DataFrame()
# eth_sentiment_df=pd.DataFrame()
# for fdate in pd.to_datetime(pd.Series(list(combined_asset_data.index.values))):
#     edate = fdate
#     fdate = fdate - timedelta(days=6)
#     #S&P 500
#     sp_headlines, sp_dates, sp_contents = get_headlines("S&P 500",fdate,edate)
#     temp_sp_df = headline_sentiment_summarizer_avg(sp_contents,fdate)
#     sp_sentiment_df = pd.concat([sp_sentiment_df,temp_sp_df])
#     # Gold
#     gold_headlines, gold_dates, gold_contents = get_headlines("Gold",fdate,edate)
#     temp_gold_df = headline_sentiment_summarizer_avg(gold_contents,fdate)
#     gold_sentiment_df = pd.concat([gold_sentiment_df,temp_gold_df])
#     # Crude Oil
#     crude_headlines, crude_dates, crude_contents = get_headlines("Crude Oil",fdate,edate)
#     temp_crude_df = headline_sentiment_summarizer_avg(crude_contents,fdate)
#     crude_sentiment_df = pd.concat([crude_sentiment_df,temp_crude_df])
#     # Bitcoin
#     btc_headlines, btc_dates, btc_contents = get_headlines("Bitcoin",fdate,edate)
#     temp_btc_df = headline_sentiment_summarizer_avg(btc_contents,fdate)
#     btc_sentiment_df = pd.concat([btc_sentiment_df,temp_btc_df])
#     # Ethereum
#     eth_headlines, eth_dates, eth_contents = get_headlines("Ethereum",fdate,edate)
#     temp_eth_df = headline_sentiment_summarizer_avg(eth_contents,fdate)
#     eth_sentiment_df = pd.concat([eth_sentiment_df,temp_eth_df])

# #Save sentiment score dataframes to disk to use later if needed (Api calls run out)
# sp_sentiment_df.to_csv('./sp_sentiment.csv')
# gold_sentiment_df.to_csv('./gold_sentiment.csv')
# crude_sentiment_df.to_csv('./crude_sentiment.csv')
# btc_sentiment_df.to_csv('./btc_sentiment.csv')
# eth_sentiment_df.to_csv('./eth_sentiment.csv')
    
    
    
    

Fetching news about 'S&P 500'
******************************
retrieving news from: 2020-05-26 00:00:00 - 2020-06-01 00:00:00
Fetching news about 'Gold'
******************************
retrieving news from: 2020-05-26 00:00:00 - 2020-06-01 00:00:00
Fetching news about 'Crude Oil'
******************************
retrieving news from: 2020-05-26 00:00:00 - 2020-06-01 00:00:00
Fetching news about 'Bitcoin'
******************************
retrieving news from: 2020-05-26 00:00:00 - 2020-06-01 00:00:00
Fetching news about 'Ethereum'
******************************
retrieving news from: 2020-05-26 00:00:00 - 2020-06-01 00:00:00
Fetching news about 'S&P 500'
******************************
retrieving news from: 2020-06-02 00:00:00 - 2020-06-08 00:00:00
Fetching news about 'Gold'
******************************
retrieving news from: 2020-06-02 00:00:00 - 2020-06-08 00:00:00
Fetching news about 'Crude Oil'
******************************
retrieving news from: 2020-06-02 00:00:00 - 2020-06-08 00:00:00
F

NameError: name 'sp' is not defined

In [38]:
# #Save sentiment score dataframes to disk to use later if needed (Api calls run out)
# sp_sentiment_df.to_csv('./sp_sentiment.csv')
# gold_sentiment_df.to_csv('./gold_sentiment.csv')
# crude_sentiment_df.to_csv('./crude_sentiment.csv')
# btc_sentiment_df.to_csv('./btc_sentiment.csv')
# eth_sentiment_df.to_csv('./eth_sentiment.csv')


In [54]:
#Load saved sentiment scores 
sp_sentiment_df = pd.read_csv('./sp_sentiment.csv', index_col='Date', infer_datetime_format=True)
gold_sentiment_df = pd.read_csv('./gold_sentiment.csv', index_col='Date', infer_datetime_format=True)
crude_sentiment_df = pd.read_csv('./crude_sentiment.csv', index_col='Date', infer_datetime_format=True)
btc_sentiment_df = pd.read_csv('./btc_sentiment.csv', index_col='Date', infer_datetime_format=True)
eth_sentiment_df = pd.read_csv('./eth_sentiment.csv', index_col='Date', infer_datetime_format=True)

# renaming sentiment columns
sp_sentiment_df.columns=['sp500 c0','sp500 p0', 'sp500 n0']
gold_sentiment_df.columns=['gold c0','gold p0', 'gold n0']
crude_sentiment_df.columns=['crude c0','crude p0', 'crude n0']
btc_sentiment_df.columns=['btc c0','btc p0', 'btc n0']
eth_sentiment_df.columns=['eth c0','eth p0', 'eth n0']


In [55]:
eth_sentiment_df.head()

Unnamed: 0_level_0,eth c0,eth p0,eth n0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-05-26,0.200015,0.07395,0.02607
2020-06-02,0.222429,0.08383,0.03457
2020-06-09,0.121122,0.06716,0.03901
2020-06-16,0.287277,0.09089,0.02719
2020-06-23,0.199303,0.08053,0.0365


In [None]:
# Initial imports
import numpy as np
import pandas as pd
import hvplot.pandas

In [None]:
# Set the random seed 
from numpy.random import seed

seed(1)
from tensorflow import random
random.set_seed(2)

In [None]:
# # This function accepts the column number for the features (X) and the target (y)
# # It chunks the data up with a rolling window of Xt-n to predict Xt
# # It returns a numpy array of X any y
# def window_data(combined_asset_data, window, feature_col_number, target_col_number):
#     X = []
#     y = []
#     for i in range(len(combined_asset_data) - window - 1):
#         features = combined_asset_data.iloc[i:(i + window), feature_col_number]
#         target = combined_asset_data.iloc[(i + window), target_col_number]
#         X.append(features)
#         y.append(target)
#     return np.array(X), np.array(y).reshape(-1, 1)


# create feature x and target y dataframes
X = combined_asset_data.iloc[:, 0:8]
y = combined_asset_data.iloc[:, 9]

In [None]:
# # Predict Closing Prices using a 10 day window of previous closing prices

# feature_column = 1
# target_column = 1
# X, y = window_data(combined_asset_data, window_size, feature_column, target_column)

# Scale the data of the features set using the StandardScaler, imports 
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
# Use the MinMaxScaler to scale data between 0 and 1.
#set scalare to standard scaler
scaler = StandardScaler().fit(X)

#from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler().fit(X)
# X = scaler.transform(X)

In [None]:
# # Use 70% of the data for training and the remaineder for testing
split = int(0.7 * len(X))

X_train = X[: split]
X_test = X[split:]

y_train = y[: split]
y_test = y[split:]
y_train = pd.DataFrame(y_train)
y_test = pd.DataFrame(y_test)

In [None]:
#  Fit the Scaler object with the features data X
scaler.fit(X_train)

# # Scale the features training and testing sets
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# # Fit theScaler object with the target data Y
scaler.fit(y_train)

# # Scale the target training and testing sets
y_train = scaler.transform(y_train)
y_test = scaler.transform(y_test)
#

In [None]:
#import keras models and layers 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
# Create a shallow, 1 hidden layer, neural network
nn = Sequential()

# Hidden layer
nn.add(Dense(units=2, input_dim=8, activation="relu"))
nn.add(Dense(units=2, activation="relu"))

# Output layer
nn.add(Dense(units=1, activation="linear"))

In [None]:
# Compile the model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# Fit the model set epochs
model_1 = nn.fit(X, y, validation_split=0.3, epochs=10)

In [None]:
# Plot the loss function of the training results for the model
import matplotlib.pyplot as plt
plt.plot(model_1.history["loss"])
#plt.plot(model_2.history["loss"])
plt.title("loss_function - Training -2 hinden layer")
plt.legend(["1 hidden layer", "2 hidden layers"])
plt.show()

In [None]:
# # Reshape the features for the model
# X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
# X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [None]:
# The return sequences need to be set to True if you are adding additional LSTM layers, but 
# You don't have to do this for the final layer. 
# Note: The dropouts help prevent overfitting
# Note: The input shape is the number of time steps and the number of indicators
# Note: Batching inputs has a different input shape of Samples/TimeSteps/Features

model = Sequential()

# Initial model setup
number_units = 8
dropout_fraction = 0.2

# Layer 1
model.add(LSTM(
    units=number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )
model.add(Dropout(dropout_fraction))

# Layer 2
model.add(LSTM(units=number_units, return_sequences=True))
model.add(Dropout(dropout_fraction))

# Layer 3
model.add(LSTM(units=number_units))
model.add(Dropout(dropout_fraction))

# Output layer
model.add(Dense(1))

In [None]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error")

In [None]:
# Summarize the model
model.summary()

In [None]:
# Train the model
# Use at least 10 epochs
# Do not shuffle the data
# Experiement with the batch size, but a smaller batch size is recommended
model.fit(X_train, y_train, epochs=10, shuffle=False, batch_size=3, verbose=1)

In [None]:
# Evaluate the model
model.evaluate(X_test, y_test, verbose=0)

In [None]:
# Make some predictions
predicted = model.predict(X_test)

In [None]:
# Recover the original prices instead of the scaled version
predicted_prices = scaler.inverse_transform(predicted)
real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
# Create a DataFrame of Real and Predicted values
assets = pd.DataFrame({
    "Real": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
}, index = combined_asset_data.index[-len(real_prices): ]) 
assets.head()

In [None]:
# Plot the real vs predicted values as a line chart
assets.plot(title="Actual Vs. Predicted SPY Prices")