In [1]:
# Initial imports
import os
import pandas as pd
from dotenv import load_dotenv
import nltk as nltk
from wordcloud import WordCloud
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
from newsapi import NewsApiClient
load_dotenv()
import alpaca_trade_api as tradeapi

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\bfode\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
# Read your api key environment variable
# YOUR CODE HERE!
api_key = os.getenv("news_api")

In [3]:
# Create a newsapi client
# YOUR CODE HERE!
newsapi = NewsApiClient(api_key=api_key)

In [4]:
# Fetch the Bitcoin news articles
# YOUR CODE HERE!
WallStreet_Bets = newsapi.get_everything(q="bank of america and tweets")
WallStreet_Bets["totalResults"]


# Print total articles
print(f"Total WallStreet Bets news articles: {WallStreet_Bets['totalResults']}")

WallStreet_Bets["articles"][0]

Total WallStreet Bets news articles: 68


{'source': {'id': 'reuters', 'name': 'Reuters'},
 'author': 'Tom Arnold,Karin Strohecker',
 'title': "El Salvador president's bitcoin push casts shadow over IMF efforts - Reuters",
 'description': "A push by El Salvador's President Nayib Bukele to make his country the first in the world to formally adopt bitcoin as legal tender has sparked concerns about the outlook for its programme with the International Monetary Fund (IMF).",
 'url': 'https://www.reuters.com/technology/el-salvador-presidents-bitcoin-push-casts-shadow-over-imf-efforts-2021-06-07/',
 'urlToImage': 'https://www.reuters.com/resizer/dSYmuZSBq_k7OCq3KvByRj7BPAM=/1200x628/smart/filters:quality(80)/cloudfront-us-east-2.images.arcpublishing.com/reuters/AEOA4OXURRJQPGYGYJPYNFQRQU.jpg',
 'publishedAt': '2021-06-07T20:44:00Z',
 'content': "A representation of the virtual cryptocurrency Bitcoin is seen in this picture illustration taken June 7, 2021. REUTERS/Edgar Su/IllustrationA push by El Salvador's President Nayib Bukele to 

In [12]:
# Create the  sentiment scores DataFrame
# YOUR CODE HERE!
WallStreet_Bets_sentiments = []

for article in WallStreet_Bets["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        WallStreet_Bets_sentiments.append({
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu,
            "text": text,
            
        })
        
    except AttributeError:
        pass
    

WallStreet_Bets_df = pd.DataFrame(WallStreet_Bets_sentiments)
WallStreet_Bets_df.head()

Unnamed: 0,compound,positive,negative,neutral,text
0,0.0,0.0,0.0,1.0,A representation of the virtual cryptocurrency...
1,0.3612,0.075,0.0,0.925,"From humble beginnings, Jeff Sine built a care..."
2,0.4201,0.085,0.0,0.915,Published June 2021\r\nMost asset classes have...
3,0.3612,0.118,0.055,0.827,NEW DELHI: Major cryptocurrencies traded lower...
4,-0.0772,0.059,0.066,0.875,Ill tell you how old I am without telling you ...


In [13]:
# Describe the Bitcoin Sentiment
# YOUR CODE HERE!
WallStreet_Bets_df.describe()

Unnamed: 0,compound,positive,negative,neutral
count,20.0,20.0,20.0,20.0
mean,0.140235,0.06885,0.03155,0.8996
std,0.310765,0.057922,0.039771,0.066357
min,-0.5267,0.0,0.0,0.758
25%,-0.0193,0.02775,0.0,0.8715
50%,0.0898,0.066,0.0,0.909
75%,0.3612,0.0895,0.05525,0.93425
max,0.6908,0.193,0.118,1.0


In [14]:
### Natural Language Processing

In [15]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from string import punctuation
import re

In [16]:
# Instantiate the lemmatizer
# YOUR CODE HERE!
lemmatizer = WordNetLemmatizer()

# Create a list of stopwords
# YOUR CODE HERE!
stopWords = set(stopwords.words('english'))
print(len(stopWords))
print(stopWords)

# Expand the default stopwords list if necessary
# YOUR CODE HERE!

179
{'theirs', 'did', 'themselves', 'him', 'above', "aren't", "should've", 'hadn', 'some', 'up', 're', 'have', "shan't", "wouldn't", "don't", 'each', 'do', 'further', 'its', 'which', "you've", 'own', 'during', 'the', "couldn't", 'doesn', "you'd", "isn't", "hadn't", 'yourself', 'itself', 'all', "she's", "didn't", 'such', 'i', 'ours', 'there', 'yourselves', 'it', 'now', 'your', 'at', "won't", 'shan', 'to', 'no', "that'll", 'both', 've', 'in', 'd', 'for', 'ourselves', 'aren', 'my', 'of', 'here', 'ain', 'hers', "you're", 'once', 'where', 'o', 'under', 'can', 'am', 'she', 'on', 'between', 'y', 'what', 'over', 't', 'm', 'just', 'before', 'nor', 'if', 'and', 'few', 'this', 'an', 'ma', 'then', 'through', 'you', "it's", 'while', 'll', 'myself', 'again', 'will', "mightn't", "weren't", 'mustn', "you'll", 'is', 'more', 'won', 'until', 'out', 'me', 'wasn', "needn't", 'how', 'other', 'their', 'haven', "wasn't", 'that', 'our', 'had', 'were', 'too', 'these', 'only', 'didn', 'into', 'against', 'those',

In [17]:
# Complete the tokenizer function
def tokenizer(text):
    """Tokenizes text."""
    
    # Remove the punctuation from text
    regex = re.compile("[^a-zA-Z ]")
    re_clean = regex.sub('', text)
   
    # Create a tokenized list of the words
    words = word_tokenize(re_clean)
    
    # Lemmatize words into root words
    
    lem = [lemmatizer.lemmatize(word) for word in words]
   
    # Convert the words to lowercase
    # Remove the stop words
    output = [word.lower() for word in lem if word.lower() not in stopWords]
    
    return output
    

In [18]:
# Create a new tokens column for WallStreet_Bets
# YOUR CODE HERE!
WallStreet_Bets_df['tokens'] = WallStreet_Bets_df.text.apply(tokenizer)
WallStreet_Bets_df.head()

Unnamed: 0,compound,positive,negative,neutral,text,tokens
0,0.0,0.0,0.0,1.0,A representation of the virtual cryptocurrency...,"[representation, virtual, cryptocurrency, bitc..."
1,0.3612,0.075,0.0,0.925,"From humble beginnings, Jeff Sine built a care...","[humble, beginning, jeff, sine, built, career,..."
2,0.4201,0.085,0.0,0.915,Published June 2021\r\nMost asset classes have...,"[published, june, asset, class, degree, contro..."
3,0.3612,0.118,0.055,0.827,NEW DELHI: Major cryptocurrencies traded lower...,"[new, delhi, major, cryptocurrencies, traded, ..."
4,-0.0772,0.059,0.066,0.875,Ill tell you how old I am without telling you ...,"[ill, tell, old, without, telling, old, first,..."


In [19]:
# Set Alpaca API key and secret
# YOUR CODE HERE!
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# Create the Alpaca API object
# YOUR CODE HERE!
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [20]:
# Format current date as ISO format
# YOUR CODE HERE!

start_date = pd.Timestamp('2015-08-07', tz='America/New_York').isoformat()
end_date = pd.Timestamp('2020-08-07', tz='America/New_York').isoformat()
#today = pd.Timestamp("2021-04-12", tz="America/New_York").isoformat()

# Set the tickers
tickers = ["WFC", "GS","MS"]

# Set timeframe to '1D' for Alpaca API
timeframe = "1D"

# Get current closing prices for SPY and AGG
# YOUR CODE HERE!
df_portfolio = alpaca.get_barset(
    tickers,
    timeframe,
    start = start_date,
    end = end_date
).df

# Preview DataFrame
# YOUR CODE HERE!
df_portfolio

Unnamed: 0_level_0,GS,GS,GS,GS,GS,MS,MS,MS,MS,MS,WFC,WFC,WFC,WFC,WFC
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume
2020-03-18 00:00:00-04:00,150.00,155.9700,135.410,140.18,7229403,31.34,32.8400,27.20,30.75,21759598,27.70,28.89,26.9000,28.14,39482878
2020-03-19 00:00:00-04:00,137.38,152.4900,130.850,149.48,6033664,30.35,32.9000,28.74,30.93,20873167,27.66,29.11,26.2875,28.29,49865465
2020-03-20 00:00:00-04:00,148.99,149.1900,138.290,138.41,7490015,31.25,31.4500,29.19,29.66,25426071,28.68,28.75,26.1800,26.47,57765815
2020-03-23 00:00:00-04:00,136.03,141.9350,133.260,134.97,4574057,29.15,29.7899,27.76,27.82,23517468,26.26,26.87,25.1050,25.23,40784032
2020-03-24 00:00:00-04:00,144.32,153.9000,143.630,153.39,5692174,30.09,33.3750,29.40,33.21,23506795,26.96,29.17,26.3300,28.91,41672870
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-03 00:00:00-04:00,198.49,199.8800,196.530,199.39,3200674,49.22,49.9050,48.72,49.47,8618979,24.25,24.50,23.9000,24.30,42432609
2020-08-04 00:00:00-04:00,199.35,201.7200,198.475,201.65,3688146,49.50,49.7700,49.08,49.26,11431220,24.20,24.29,24.0300,24.22,53890753
2020-08-05 00:00:00-04:00,203.17,204.8075,203.010,204.48,4081429,49.59,49.9600,49.44,49.78,11876099,24.40,24.75,24.3400,24.39,60804684
2020-08-06 00:00:00-04:00,205.00,206.2900,203.830,204.25,3310894,49.83,49.9800,49.24,49.46,10097493,24.20,24.50,24.1200,24.25,75647696


In [21]:
# Format current date as ISO format
# YOUR CODE HERE!

start_date = pd.Timestamp('2015-08-07', tz='America/New_York').isoformat()
end_date = pd.Timestamp('2020-08-07', tz='America/New_York').isoformat()

In [22]:
# Get 5 years' worth of historical data for SPY and AGG
#Wells Fargo Co. (WFC), Goldman Sachs Group Inc. (GS), and Morgan Stanley (MS).

tickers = ["WFC", "GS","MS"]

df_stock_data = alpaca.get_barset(
    tickers,
    timeframe,
    start=start_date,
    end=end_date,
    limit=1000,
).df

# Display sample data
df_stock_data.head()

Unnamed: 0_level_0,GS,GS,GS,GS,GS,MS,MS,MS,MS,MS,WFC,WFC,WFC,WFC,WFC
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume
2016-08-18 00:00:00-04:00,165.34,166.56,164.63,166.0401,1343523,30.27,30.74,30.195,30.54,11530304,48.5,48.66,48.33,48.52,9905086
2016-08-19 00:00:00-04:00,165.36,166.9,164.5,166.18,1370721,30.4,30.715,30.26,30.55,8158949,48.44,48.73,48.2,48.64,11208944
2016-08-22 00:00:00-04:00,165.87,166.91,164.89,166.26,1074287,30.44,30.7,30.39,30.6,5601966,48.64,48.74,48.5,48.65,7631001
2016-08-23 00:00:00-04:00,167.0,167.85,165.98,166.01,1578192,30.75,30.97,30.67,30.72,7331515,48.64,48.75,48.39,48.41,13706691
2016-08-24 00:00:00-04:00,165.78,166.7,164.85,165.34,1575529,30.81,31.16,30.72,30.92,11429566,48.5,48.77,48.35,48.64,14356901


In [23]:
closing_price_gs = pd.DataFrame(df_stock_data["GS"]["close"])
closing_price_ms = pd.DataFrame(df_stock_data["MS"]["close"])
closing_price_wfc = pd.DataFrame(df_stock_data["WFC"]["close"])
all_closing_price = pd.concat([closing_price_gs, closing_price_ms, closing_price_wfc], axis="columns", join="inner")
all_closing_price.index = all_closing_price.index.date
all_closing_price.columns = ["GS","MS","WFC"]
all_closing_price_returns = all_closing_price.pct_change().dropna()
all_closing_price_returns.head(10)

Unnamed: 0,GS,MS,WFC
2016-08-19,0.000843,0.000327,0.002473
2016-08-22,0.000481,0.001637,0.000206
2016-08-23,-0.001504,0.003922,-0.004933
2016-08-24,-0.004036,0.00651,0.004751
2016-08-25,0.00381,0.001617,-0.004934
2016-08-26,0.000241,0.006458,0.002479
2016-08-29,0.005301,0.0077,0.021641
2016-08-30,0.015399,0.024709,0.021384
2016-08-31,-0.000295,-0.004229,0.00316
2016-09-01,-0.005549,-0.004368,-0.007482


In [334]:
# Create a series using "Close" price percentage returns, drop any NaNs, and check the results:
# (Make sure to multiply the pct_change() results by *100)
df_stock_data['Return'] = df_stock_data.Close.pct_change() * 100
df_stock_data['Lagged_Return'] = df_stock_data['Return'].shift()
df_stock_data = df_stock_data.dropna()
df_stock_data.tail()

AttributeError: 'DataFrame' object has no attribute 'Close'