In [1]:
# Initial imports
import os
import pandas as pd
from dotenv import load_dotenv
import nltk as nltk
from wordcloud import WordCloud
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
from newsapi import NewsApiClient
load_dotenv()
import alpaca_trade_api as tradeapi

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\bfode\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
# Read your api key environment variable
# YOUR CODE HERE!
api_key = os.getenv("news_api")

In [3]:
# Create a newsapi client
# YOUR CODE HERE!
newsapi = NewsApiClient(api_key=api_key)

In [4]:
# Fetch the Bitcoin news articles
# YOUR CODE HERE!
WallStreet_Bets = newsapi.get_everything(q="bank of america and tweets")
WallStreet_Bets["totalResults"]


# Print total articles
print(f"Total WallStreet Bets news articles: {WallStreet_Bets['totalResults']}")

WallStreet_Bets["articles"][0]

Total WallStreet Bets news articles: 68


{'source': {'id': 'reuters', 'name': 'Reuters'},
 'author': 'Tom Arnold,Karin Strohecker',
 'title': "El Salvador president's bitcoin push casts shadow over IMF efforts - Reuters",
 'description': "A push by El Salvador's President Nayib Bukele to make his country the first in the world to formally adopt bitcoin as legal tender has sparked concerns about the outlook for its programme with the International Monetary Fund (IMF).",
 'url': 'https://www.reuters.com/technology/el-salvador-presidents-bitcoin-push-casts-shadow-over-imf-efforts-2021-06-07/',
 'urlToImage': 'https://www.reuters.com/resizer/dSYmuZSBq_k7OCq3KvByRj7BPAM=/1200x628/smart/filters:quality(80)/cloudfront-us-east-2.images.arcpublishing.com/reuters/AEOA4OXURRJQPGYGYJPYNFQRQU.jpg',
 'publishedAt': '2021-06-07T20:44:00Z',
 'content': "A representation of the virtual cryptocurrency Bitcoin is seen in this picture illustration taken June 7, 2021. REUTERS/Edgar Su/IllustrationA push by El Salvador's President Nayib Bukele to 

In [8]:
# Create the  sentiment scores DataFrame
# YOUR CODE HERE!
WallStreet_Bets_sentiments = []

for article in WallStreet_Bets["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        WallStreet_Bets_sentiments.append({
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu,
            "text": text,
            
        })
        
    except AttributeError:
        pass
    



WallStreet_Bets_df = pd.DataFrame(WallStreet_Bets_sentiments)
WallStreet_Bets_df.head()

Unnamed: 0,compound,positive,negative,neutral,text
0,0.0,0.0,0.0,1.0,A representation of the virtual cryptocurrency...
1,0.3612,0.075,0.0,0.925,"From humble beginnings, Jeff Sine built a care..."
2,0.4201,0.085,0.0,0.915,Published June 2021\r\nMost asset classes have...
3,0.3612,0.118,0.055,0.827,NEW DELHI: Major cryptocurrencies traded lower...
4,-0.0772,0.059,0.066,0.875,Ill tell you how old I am without telling you ...


In [9]:
# Describe the Bitcoin Sentiment
# YOUR CODE HERE!
WallStreet_Bets_df.describe()

Unnamed: 0,compound,positive,negative,neutral
count,20.0,20.0,20.0,20.0
mean,0.140235,0.06885,0.03155,0.8996
std,0.310765,0.057922,0.039771,0.066357
min,-0.5267,0.0,0.0,0.758
25%,-0.0193,0.02775,0.0,0.8715
50%,0.0898,0.066,0.0,0.909
75%,0.3612,0.0895,0.05525,0.93425
max,0.6908,0.193,0.118,1.0


In [10]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from string import punctuation
import re

In [11]:
# Instantiate the lemmatizer
# YOUR CODE HERE!
lemmatizer = WordNetLemmatizer()

# Create a list of stopwords
# YOUR CODE HERE!
stopWords = set(stopwords.words('english'))
print(len(stopWords))
print(stopWords)

# Expand the default stopwords list if necessary
# YOUR CODE HERE!

179
{'can', 'hasn', 'my', 'that', 'yours', 'were', 'yourself', 'with', 'are', 'hadn', 'myself', 'me', 'themselves', "didn't", 'after', "needn't", 'haven', 'not', "mustn't", 'between', 'shan', 'an', 'how', 'when', 'out', "wouldn't", 'shouldn', 'their', 'll', 'a', 'than', 'wouldn', 'has', 'theirs', "couldn't", 'its', 'isn', 'you', 'by', 'some', 'for', 'ours', 'from', 'and', 'ain', 'did', 'who', 'himself', 'the', 'both', "you'll", 'just', 'why', 'our', 'under', 'her', 'his', 'at', 'do', 'to', 'hers', 'most', 'mustn', 'any', 'will', 'doesn', 'wasn', 'but', 'all', "you've", 'such', 'on', "should've", 'very', "shan't", 'off', 'before', 'here', 'what', 'couldn', 'as', 'so', "hadn't", 're', 'below', 'down', 'is', 'nor', 'was', 'up', "won't", 'more', 'them', 'over', "mightn't", 's', 'during', 'few', 'i', 'him', 't', "hasn't", 'no', 'of', 'until', 'herself', 'mightn', 'once', "that'll", 'have', 'o', 'this', 'ourselves', 'needn', 'too', 'those', 'now', 'itself', 'd', 'into', 'we', 'does', "aren't

In [12]:
# Complete the tokenizer function
def tokenizer(text):
    """Tokenizes text."""
    
    # Remove the punctuation from text
    regex = re.compile("[^a-zA-Z ]")
    re_clean = regex.sub('', text)
   
    # Create a tokenized list of the words
    words = word_tokenize(re_clean)
    
    # Lemmatize words into root words
    
    lem = [lemmatizer.lemmatize(word) for word in words]
   
    # Convert the words to lowercase
    # Remove the stop words
    output = [word.lower() for word in lem if word.lower() not in stopWords]
    
    return output
    

In [13]:
# Create a new tokens column for WallStreet_Bets
# YOUR CODE HERE!
WallStreet_Bets_df['tokens'] = WallStreet_Bets_df.text.apply(tokenizer)
WallStreet_Bets_df.head()

Unnamed: 0,compound,positive,negative,neutral,text,tokens
0,0.0,0.0,0.0,1.0,A representation of the virtual cryptocurrency...,"[representation, virtual, cryptocurrency, bitc..."
1,0.3612,0.075,0.0,0.925,"From humble beginnings, Jeff Sine built a care...","[humble, beginning, jeff, sine, built, career,..."
2,0.4201,0.085,0.0,0.915,Published June 2021\r\nMost asset classes have...,"[published, june, asset, class, degree, contro..."
3,0.3612,0.118,0.055,0.827,NEW DELHI: Major cryptocurrencies traded lower...,"[new, delhi, major, cryptocurrencies, traded, ..."
4,-0.0772,0.059,0.066,0.875,Ill tell you how old I am without telling you ...,"[ill, tell, old, without, telling, old, first,..."


In [14]:
# Create a new tokens column for WallStreet_Bets
# YOUR CODE HERE!
WallStreet_Bets_df['tokens'] = WallStreet_Bets_df.text.apply(tokenizer)
WallStreet_Bets_df.head()

Unnamed: 0,compound,positive,negative,neutral,text,tokens
0,0.0,0.0,0.0,1.0,A representation of the virtual cryptocurrency...,"[representation, virtual, cryptocurrency, bitc..."
1,0.3612,0.075,0.0,0.925,"From humble beginnings, Jeff Sine built a care...","[humble, beginning, jeff, sine, built, career,..."
2,0.4201,0.085,0.0,0.915,Published June 2021\r\nMost asset classes have...,"[published, june, asset, class, degree, contro..."
3,0.3612,0.118,0.055,0.827,NEW DELHI: Major cryptocurrencies traded lower...,"[new, delhi, major, cryptocurrencies, traded, ..."
4,-0.0772,0.059,0.066,0.875,Ill tell you how old I am without telling you ...,"[ill, tell, old, without, telling, old, first,..."


In [15]:
# Format current date as ISO format
# YOUR CODE HERE!

start_date = pd.Timestamp('2015-08-07', tz='America/New_York').isoformat()
end_date = pd.Timestamp('2020-08-07', tz='America/New_York').isoformat()
#today = pd.Timestamp("2021-04-12", tz="America/New_York").isoformat()

# Set the tickers
tickers = ["WFC", "GS","MS"]

# Set timeframe to '1D' for Alpaca API
timeframe = "1D"

# Get current closing prices for SPY and AGG
# YOUR CODE HERE!
df_portfolio = alpaca.get_barset(
    tickers,
    timeframe,
    start = start_date,
    end = end_date
).df

# Preview DataFrame
# YOUR CODE HERE!
df_portfolio

NameError: name 'alpaca' is not defined

In [14]:
# Format current date as ISO format
# YOUR CODE HERE!

start_date = pd.Timestamp('2015-08-07', tz='America/New_York').isoformat()
end_date = pd.Timestamp('2020-08-07', tz='America/New_York').isoformat()

In [1]:
# Get 5 years' worth of historical data for SPY and AGG
#Wells Fargo Co. (WFC), Goldman Sachs Group Inc. (GS), and Morgan Stanley (MS).

tickers = ["WFC", "GS","MS"]

df_stock_data = alpaca.get_barset(
    tickers,
    timeframe,
    start=start_date,
    end=end_date,
    limit=1000,
).df

# Display sample data
df_stock_data.head()

NameError: name 'alpaca' is not defined

In [16]:
closing_price_gs = pd.DataFrame(df_stock_data["GS"]["close"])
closing_price_ms = pd.DataFrame(df_stock_data["MS"]["close"])
closing_price_wfc = pd.DataFrame(df_stock_data["WFC"]["close"])
all_closing_price = pd.concat([closing_price_gs, closing_price_ms, closing_price_wfc], axis="columns", join="inner")
all_closing_price.index = all_closing_price.index.date
all_closing_price.columns = ["GS","MS","WFC"]
all_closing_price_returns = all_closing_price.pct_change().dropna()
all_closing_price_returns.head(10)

NameError: name 'df_stock_data' is not defined

In [None]:
# Create a series using "Close" price percentage returns, drop any NaNs, and check the results:
# (Make sure to multiply the pct_change() results by *100)
df_stock_data['Return'] = df_stock_data.Close.pct_change() * 100
df_stock_data['Lagged_Return'] = df_stock_data['Return'].shift()
df_stock_data = df_stock_data.dropna()
df_stock_data.tail()