In [32]:
import os
import pandas as pd
from pathlib import Path
import requests
import json
from newsapi import NewsApiClient
#from theguardian import theguardian_content
from dotenv import load_dotenv
load_dotenv()
from collections import Counter
from nltk.corpus import reuters, stopwords
from nltk.util import ngrams
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem import WordNetLemmatizer
import re
import nltk
nltk.download('reuters')
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
from pmaw import PushshiftAPI
import praw

[nltk_data] Downloading package reuters to
[nltk_data]     C:\Users\krist\AppData\Roaming\nltk_data...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\krist\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\krist\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\krist\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [33]:
api_key = os.getenv("NEWSAPI_KEY")

In [34]:
newsapi = NewsApiClient(api_key=api_key)

In [35]:
#Get news articles on certain topic based on keywords
def get_news(keywords):  
    news_article = newsapi.get_everything(
            q = keywords, language='en', sort_by= 'relevancy', page=5
    )
    return news_article

In [36]:
#Creates dataframe of the articles chosen 
def form_df(keywords):
    news = get_news(keywords)['articles']

    articles = []
    for article in news:
        try:
            title = article['title']
            description = article['description']
            text = article['content']
            date = article['publishedAt'][:10]

            articles.append({
                'text' : text,
                'date' : date,
                'language' : 'en'
            })
        except AttributeError:
            pass
    
    return pd.DataFrame(articles)

In [7]:
#Test to see if works
get_news("Microsoft")

{'status': 'ok',
 'totalResults': 15154,
 'articles': [{'source': {'id': None, 'name': 'Android Central'},
   'author': 'andrew.myrick@futurenet.com (Andrew Myrick)',
   'title': 'Android apps for Windows get their most significant update yet',
   'description': 'A new update to the Windows 11 Dev Channel brings an update to the Windows Subsystem for Android.',
   'url': 'https://www.androidcentral.com/apps-software/windows-11-android-12l-apps',
   'urlToImage': 'https://cdn.mos.cms.futurecdn.net/thimyQpk6bpV82doUaSJ4T-1200-80.jpg',
   'publishedAt': '2022-05-23T14:38:22Z',
   'content': '<ul><li>Microsoft has updated the Windows Subsystem for Android to Android 12L.\xa0</li><li>When the Subsystem was originally launched, it relied on Android 11.\xa0</li><li>The update is currently availabl… [+3377 chars]'},
  {'source': {'id': None, 'name': 'Digital Trends'},
   'author': 'Fionna Agomuoh',
   'title': 'Microsoft has finally brought Teams to its own app store',
   'description': 'Micro

In [8]:
get_news("Microsoft")['articles']

[{'source': {'id': None, 'name': 'Android Central'},
  'author': 'andrew.myrick@futurenet.com (Andrew Myrick)',
  'title': 'Android apps for Windows get their most significant update yet',
  'description': 'A new update to the Windows 11 Dev Channel brings an update to the Windows Subsystem for Android.',
  'url': 'https://www.androidcentral.com/apps-software/windows-11-android-12l-apps',
  'urlToImage': 'https://cdn.mos.cms.futurecdn.net/thimyQpk6bpV82doUaSJ4T-1200-80.jpg',
  'publishedAt': '2022-05-23T14:38:22Z',
  'content': '<ul><li>Microsoft has updated the Windows Subsystem for Android to Android 12L.\xa0</li><li>When the Subsystem was originally launched, it relied on Android 11.\xa0</li><li>The update is currently availabl… [+3377 chars]'},
 {'source': {'id': None, 'name': 'Digital Trends'},
  'author': 'Fionna Agomuoh',
  'title': 'Microsoft has finally brought Teams to its own app store',
  'description': 'Microsoft Teams is now available for download in the Microsoft Store a

In [9]:
#Test to see if works
form_df("Microsoft")

Unnamed: 0,title,description,text,date,language
0,Android apps for Windows get their most signif...,A new update to the Windows 11 Dev Channel bri...,<ul><li>Microsoft has updated the Windows Subs...,2022-05-23,en
1,Microsoft has finally brought Teams to its own...,Microsoft Teams is now available for download ...,Microsoft has now brought the Microsoft Teams ...,2022-05-17,en
2,JPM says Meta will become Broadcom's next bill...,"Facebook parent Meta Platforms Inc <a href=""ht...",May 31 (Reuters) - Facebook parent Meta Platfo...,2022-05-31,en
3,Xbox plans to launch a streaming puck and Sams...,Xbox cloud streaming is going to grow beyond t...,Did you miss a session from GamesBeat Summit 2...,2022-05-06,en
4,"This Week In Security: Follina, Open Redirect ...","Depending on who you ask, there’s either 2 vul...","Depending on who you ask, there’s either 2 vul...",2022-06-03,en
5,This is What It's Actually Like to Work a 4-Da...,We talked to founders of three companies that ...,Image Credit: Nicolás Ortega\r\nAt first it so...,2022-05-11,en
6,"Fuck yeah, passwordless logins! Google, Apple,...",The FIDO Alliance has been at the forefront of...,The FIDO Alliance has been at the forefront of...,2022-05-06,en
7,The metaverse could lead to an urban exodus,"Several companies, including Apple and Microso...","Several companies, including Apple and Microso...",2022-05-24,en
8,Microsoft Edge just got a new way to protect y...,Microsoft has officially announced a new way f...,Microsoft Edge just got even more secure. Afte...,2022-05-13,en
9,Microsoft has new tools to encourage the trans...,"At Microsoft Build 2022, Microsoft detailed Pr...",The transition to ARM chips on Windows has bee...,2022-05-24,en


In [40]:
apple = form_df('apple')
netflix = form_df('netflix')
facebook = form_df('facebook')
uber = form_df('uber')
microchip = form_df('microchip technology')
airbnb = form_df('airbnb')
diamondback = form_df('diamond back energy')
marathon = form_df('marathon oil corp')
devon = form_df('devon energy corp')
sunpower = form_df('sunpower corp')
renewable = form_df('renewable energy group inc')
mckinsey = form_df('mckinsey & company')
blackrock = form_df('blackrock')
paypal = form_df('paypal')
mercadolibre = form_df('mercadolibre inc')
sofi = form_df('sofi')
all_df = pd.concat([apple, netflix,facebook,uber,microchip, airbnb,diamondback,marathon, devon, sunpower, renewable, mckinsey, blackrock, paypal, mercadolibre,sofi], 
                    keys=['apple', 'netflix','facebook','uber','microchip', 'airbnb','diamondback','marathon', 'devon', 'sunpower', 'renewable', 'mckinsey', 'blackrock', 'paypal', 'mercadolibre','sofi'], 
                    names=['Company'], axis=0)
all_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,text,date,language
Company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
apple,0,Enlarge/ 2021's iPhone 13 still uses Apple's p...,2022-05-13,en
apple,1,Apple today previewed a range of new accessibi...,2022-05-17,en
apple,2,"Apple in February announced a new ""Tap to Pay ...",2022-05-16,en
apple,3,Apple today seeded the release candidate versi...,2022-05-12,en
apple,4,A mans suspected case of painful eczema on his...,2022-06-01,en


In [66]:
all_df.to_csv('all_stocks_articles.csv')

In [78]:
all_df = all_df.reset_index()


In [81]:
all_df.tail()

Unnamed: 0,Company,level_1,text,date,language,compound,neg,neu,pos
235,sofi,15,"Now that the 2022 schedule has been announced,...",2022-05-13,en,0.0,0.0,1.0,0.0
236,sofi,16,Premarket trading on Monday was not looking go...,2022-05-09,en,-0.7274,0.149,0.851,0.0
237,sofi,17,"In the second half of May 2022, video game mak...",2022-06-04,en,0.5267,0.0,0.891,0.109
238,sofi,18,SoFi Technologies (NASDAQ:SOFI) shares fell 12...,2022-05-13,en,-0.1779,0.153,0.712,0.134
239,sofi,19,Justin Sullivan/Getty Images News,2022-05-09,en,0.0,0.0,1.0,0.0


In [82]:
all_df.to_csv('all_stocks_sentiment.csv')

In [74]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

nltk.download('vader_lexicon')
analyzer = SentimentIntensityAnalyzer()

def vader_analyzer(df):
    analyzer = SentimentIntensityAnalyzer()
    df['compound'] = [analyzer.polarity_scores(x)['compound'] for x in df['text']]
    df['neg'] = [analyzer.polarity_scores(x)['neg'] for x in df['text']]
    df['neu'] = [analyzer.polarity_scores(x)['neu'] for x in df['text']]
    df['pos'] = [analyzer.polarity_scores(x)['pos'] for x in df['text']]

    df['date'] = pd.to_datetime(
    df['date'],
    infer_datetime_format = True,
    utc = True    
    )
    df['date'] = df['date'].dt.date
    
    return df

def daily_sentiment(df):
    vader_df = vader_analyzer(df)
    vader_df = vader_df.groupby(['Company','date'])['Company','pos','neg','neu','compound'].mean().reset_index()
    vader_df = vader_df[['date','ticker','pos','neg','neu','compound']]
    return vader_df


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\krist\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [75]:
daily_sentiment(all_df)

  vader_df = vader_df.groupby(['Company','date'])['Company','pos','neg','neu','compound'].mean().reset_index()


KeyError: "Columns not found: 'Company'"

In [11]:
lemmatizer = WordNetLemmatizer()

In [12]:
def process_text(article):
    sw = set(stopwords.words('english'))
    regex = re.compile("[^a-zA-Z ]")
    re_clean = regex.sub('', article)
    words = word_tokenize(re_clean)
    lem = [lemmatizer.lemmatize(word) for word in words]
    output = [word.lower() for word in lem if word.lower() not in sw]
    return output

In [16]:
#Use pmaw for reddit apis
api = PushshiftAPI()

In [17]:
import datetime as dt
before = int(dt.datetime(2022,2,1,0,0).timestamp())
after = int(dt.datetime(2021,12,1,0,0).timestamp())

In [18]:
#Retrieve comments 
comments = api.search_comments(subreddit= 'StockMarket', limit = 10000, before=before, after=after)

In [None]:
comments_df = pd.DataFrame(comments)
comments_df.dropna()
comments_df.head()


Unnamed: 0,all_awardings,archived,associated_award,author,author_flair_background_color,author_flair_css_class,author_flair_richtext,author_flair_template_id,author_flair_text,author_flair_text_color,...,subreddit_id,subreddit_name_prefixed,subreddit_type,top_awarded_type,total_awards_received,treatment_tags,unrepliable_reason,awarders,retrieved_on,author_cakeday
0,[],False,,Mentalilnes,,,[],,,,...,t5_2qjuv,r/StockMarket,public,,0,[],,,,
1,[],False,,aurora4000,,,[],,,,...,t5_2qjuv,r/StockMarket,public,,0,[],,,,
2,[],False,,3p1cBm4n9669,,,[],,,,...,t5_2qjuv,r/StockMarket,public,,0,[],,,,
3,[],False,,Banabak,,,[],,,,...,t5_2qjuv,r/StockMarket,public,,0,[],,,,
4,[],False,,Sergent1969,,,[],,,,...,t5_2qjuv,r/StockMarket,public,,0,[],,,,


In [None]:
reddit = praw.Reddit(client_id='uyIJlACcp5Qux9ubYsb2vA', client_secret='R7a--uqyrVelMGrfsPqA_qIcZX2Ugg', user_agent = 'Trade app bu u/SignificantRange8761')

In [None]:
def get_posts(sub):
    posts = []
    subreddit = reddit.subreddit(sub)
    for post in subreddit.hot(limit=1000):
        posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created])
    posts = pd.DataFrame(posts,columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created'])
    return posts

In [None]:
stock_market = get_posts('StockMarket')

In [None]:
stock_market.head()

Unnamed: 0,title,score,id,subreddit,url,num_comments,body,created
0,Interesting.,236,v4o24m,StockMarket,https://i.redd.it/jojcfokqnl391.png,36,,1654346000.0
1,Interesting.,2101,v41hv2,StockMarket,https://i.redd.it/hgfyuinj9f391.png,513,,1654269000.0
2,Most Anticipated Earnings Releases for the wee...,26,v4o3eb,StockMarket,https://i.redd.it/n9qasqm5ol391.png,4,,1654347000.0
3,"Market close - Friday, June 3 2022 🟥🩸",128,v47rxg,StockMarket,https://i.redd.it/tz0zlfj9qg391.png,55,,1654287000.0
4,Elon Musk wants to cut Tesla 10% of jobs and h...,627,v3wmin,StockMarket,https://www.msn.com/en-us/autos/news/elon-musk...,267,,1654253000.0


In [None]:
stock_market.to_csv('test.csv')

In [47]:
#tokenizer
def tokenizer(df):
    """Tokenizes text."""
    
    # Remove the punctuation from text
    for rows in df['text']:
        regex = re.compile("[^a-zA-Z ]")
        re_clean = regex.sub('', rows)

    # Create a tokenized list of the words
        words = word_tokenize(re_clean)
    
    # Lemmatize words into root words
        lemmatizer = WordNetLemmatizer()
        lem = [lemmatizer.lemmatize(word) for word in words]

    # Remove the stop words
        sw = set(stopwords.words('english'))
    
    # Convert the words to lowercase
        tokens = [word.lower() for word in lem if word.lower() not in sw]
    
    
    return tokens

In [None]:
#Binning
def binner(df):
    bins = [-1,-0.1,0.1,1]
    labels = ['negative','neutral', 'positive']
    df['Vader Sentiment'] = pd.cut(df["compound"], bins, labels=labels)
    return df

In [None]:
news_df = pd.read_csv("../Notebooks/Data/Cleaned_Data/news_sentiment.csv", index_col=[0])
news_df.head()

Unnamed: 0,date,ticker,pos,neg,neu,compound
0,2022-05-06,ABNB,0.0,0.0,1.0,0.0
1,2022-05-11,ABNB,0.075167,0.026,0.898667,0.199033
2,2022-05-12,ABNB,0.21,0.159,0.631,0.4912
3,2022-05-14,ABNB,0.169,0.0,0.831,0.70995
4,2022-05-16,ABNB,0.0,0.0,1.0,0.0


In [None]:
binner(news_df)

Unnamed: 0,date,ticker,pos,neg,neu,compound,Vader Sentiment
0,2022-05-06,ABNB,0.000000,0.000,1.000000,0.000000,neutral
1,2022-05-11,ABNB,0.075167,0.026,0.898667,0.199033,positive
2,2022-05-12,ABNB,0.210000,0.159,0.631000,0.491200,positive
3,2022-05-14,ABNB,0.169000,0.000,0.831000,0.709950,positive
4,2022-05-16,ABNB,0.000000,0.000,1.000000,0.000000,neutral
...,...,...,...,...,...,...,...
185,2022-05-13,UBER,0.000000,0.167,0.833000,-0.636900,negative
186,2022-05-14,UBER,0.081000,0.062,0.857000,0.202300,positive
187,2022-05-16,UBER,0.042667,0.000,0.957333,0.195600,positive
188,2022-05-17,UBER,0.000000,0.048,0.952000,-0.241100,negative


In [38]:
df_wall = pd.read_csv('../Notebooks/Data/Cleaned_Data/wallstreetbets_comments.csv')

In [39]:
df_wall.tail()

Unnamed: 0,datetime,text
995,2021-05-16 14:21:57,Nice
996,2021-05-16 14:21:50,I have July 195 and I'm not selling. Straight ...
997,2021-05-16 14:21:46,This sub is honestly worse than qanon i think....
998,2021-05-16 14:21:43,Im holding too but investors will only look at...
999,2021-05-16 14:21:39,I get my news from they guy at the bowling all...


In [11]:
#Takes text column and turn into list of words to iterate and analyze
def text_splitter(df):
    df["text splitted"] = df.text.str.lower().str.replace('[^\w\s]','').str.split()
    df["text splitted"].transform(lambda x: Counter(x)).sum()
    return df

In [94]:
split_df = text_splitter(df_wall)
split_df.head()

  df["text splitted"] = df.text.str.lower().str.replace('[^\w\s]','').str.split()


Unnamed: 0,datetime,text,text splitted
0,2013-01-15 00:50:40,[deleted],[deleted]
1,2013-01-15 00:48:12,\r\n[**@wallstreetbets**](http://twitter.com/w...,"[wallstreetbetshttptwittercomwallstreetbets, g..."
2,2013-01-10 11:08:19,"Simple, you haven't been on irc in a while...o...","[simple, you, havent, been, on, irc, in, a, wh..."
3,2013-01-10 08:32:49,*Nice.*,[nice]
4,2013-01-10 03:39:18,haha! yea talon!,"[haha, yea, talon]"


In [10]:
#Counts occurence of certain words in text to see how much a certain stock is talked about
def word_occurence(df, word):
    df = text_splitter(df)
    count = 0 
    for rows in df['text splitted']:
        for words in rows:
            if words == word:
                count += 1
    return count

In [96]:
word_occurence(df_wall,'nflx')

  df["text splitted"] = df.text.str.lower().str.replace('[^\w\s]','').str.split()


7

In [97]:
#CSV with text put into list of words ready to be analyzed
split_df.to_csv('wallstreetbets_comments_splitted.csv')

In [99]:
#Test code with apple articles
apple_df = pd.read_csv('../Notebooks/Data/Cleaned_Data/apple_articles.csv', index_col=[0])
apple_df.head()

Unnamed: 0,datetime,text
19,2022-06-06 20:11:28,"As part of iOS 16, Apple is introducing Safety..."
18,2022-06-06 20:11:33,The new iOS 16 developer beta was released on ...
17,2022-06-06 20:12:11,US stocks lost steam throughout Monday's tradi...
16,2022-06-06 20:12:16,The Fine Print: The following comments are own...
15,2022-06-06 20:12:50,The Fine Print: The following comments are own...


In [100]:
text_splitter(apple_df)

  df["text splitted"] = df.text.str.lower().str.replace('[^\w\s]','').str.split()


Unnamed: 0,datetime,text,text splitted
19,2022-06-06 20:11:28,"As part of iOS 16, Apple is introducing Safety...","[as, part, of, ios, 16, apple, is, introducing..."
18,2022-06-06 20:11:33,The new iOS 16 developer beta was released on ...,"[the, new, ios, 16, developer, beta, was, rele..."
17,2022-06-06 20:12:11,US stocks lost steam throughout Monday's tradi...,"[us, stocks, lost, steam, throughout, mondays,..."
16,2022-06-06 20:12:16,The Fine Print: The following comments are own...,"[the, fine, print, the, following, comments, a..."
15,2022-06-06 20:12:50,The Fine Print: The following comments are own...,"[the, fine, print, the, following, comments, a..."
14,2022-06-06 20:13:45,"As is par for the course, Apple announced new ...","[as, is, par, for, the, course, apple, announc..."
13,2022-06-06 20:14:32,"This story is part of WWDC 2022, CNET's comple...","[this, story, is, part, of, wwdc, 2022, cnets,..."
12,2022-06-06 20:15:10,Synch your Quest headset to Apple Health for a...,"[synch, your, quest, headset, to, apple, healt..."
11,2022-06-06 20:15:15,In a product demo at its Worldwide Developers ...,"[in, a, product, demo, at, its, worldwide, dev..."
10,2022-06-06 20:15:38,iOS 16 was announced today at WWDC 2022 with s...,"[ios, 16, was, announced, today, at, wwdc, 202..."


In [101]:
word_occurence(apple_df, 'major')

  df["text splitted"] = df.text.str.lower().str.replace('[^\w\s]','').str.split()


2

In [7]:
import requests
def pull_articles(keyword):

	url = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/search/NewsSearchAPI"

	querystring = {"q":keyword,"pageNumber":"1","pageSize":"50","autoCorrect":"true","fromPublishedDate":"2012-06-01","toPublishedDate":"2022-06-01"}

	headers = {
	"X-RapidAPI-Key": os.getenv('RapidAPI_Key'),
	"X-RapidAPI-Host": os.getenv('RapidAPI_Host')
	}

	response = requests.request("GET", url, headers=headers, params=querystring)
	while 'pageNumber' <= 'pageSize':

		print(response.text)

In [8]:
pull_articles('netflix')

In [6]:
wallstreet_df = pd.read_csv('../Notebooks/Data/Cleaned_Data/wallstreetbets_comments.csv')
wallstreet_df.tail()

Unnamed: 0,datetime,text
995,2021-05-16 14:21:57,Nice
996,2021-05-16 14:21:50,I have July 195 and I'm not selling. Straight ...
997,2021-05-16 14:21:46,This sub is honestly worse than qanon i think....
998,2021-05-16 14:21:43,Im holding too but investors will only look at...
999,2021-05-16 14:21:39,I get my news from they guy at the bowling all...


In [65]:
def keyword_filter(df, keywords):

    filtered_list = []
    try:
        for keyword in keywords:
            for text in df['text']:
                if keyword in str(text):
                    filtered_list.append(text)

    except ValueError:
        pass
    filtered_df = pd.concat([df['datetime'], pd.DataFrame(filtered_list).rename(columns = {0: 'text'})], axis = 1).dropna()

    return filtered_df

In [13]:
filtered = word_occurence(wallstreet_df,'uber')
filtered

  df["text splitted"] = df.text.str.lower().str.replace('[^\w\s]','').str.split()


3

In [20]:
def pull_comments(df, word):
    text_splitter(df)
    comments = []
    for rows in df['text splitted']:
        for words in rows:
            if words == word:
                return rows

In [23]:
apple_com = pull_comments(wallstreet_df, 'apple')
apple_com

  df["text splitted"] = df.text.str.lower().str.replace('[^\w\s]','').str.split()


['samsung',
 'seeing',
 'apple',
 'and',
 'ibms',
 'partnership',
 'could',
 'view',
 'a',
 'partnership',
 'with',
 'blackberry',
 'for',
 'enterprise',
 'as',
 'a',
 'next',
 'major',
 'step']

In [29]:
security_df = pd.read_csv('../Notebooks/Data/Cleaned_Data/securityanalysis_comments.csv')
security_df.head()

Unnamed: 0,datetime,text
0,2015-02-15 01:08:48,I'm surprised anyone else has heard about EIF....
1,2015-02-14 21:06:46,I recommend Apple's financial statements. I a...
2,2015-02-14 20:31:32,"IMO, one of the better value blogs out there. ..."
3,2015-02-14 00:07:56,Wal-mart has their old annual reports from ear...
4,2015-02-13 18:29:30,"mediums a great platform, i think they already..."


In [42]:
def articles_vader_analyzer(df):
    
    analyzer = SentimentIntensityAnalyzer()
    df['articles_compound_sentiment'] = [analyzer.polarity_scores(x)['compound'] for x in df['text']]
    df['articles_positive_sentiment'] = [analyzer.polarity_scores(x)['pos'] for x in df['text']]
    df['articles_neutral_sentiment'] = [analyzer.polarity_scores(x)['neu'] for x in df['text']]
    df['articles_negative_sentiment'] = [analyzer.polarity_scores(x)['neg'] for x in df['text']]
    
    return df

In [43]:
def reddit_vader_analyzer(subreddit, df):
    
    analyzer = SentimentIntensityAnalyzer()
    df[f'{subreddit}_compound_sentiment'] = [analyzer.polarity_scores(x)['compound'] for x in df['text']]
    df[f'{subreddit}_positive_sentiment'] = [analyzer.polarity_scores(x)['pos'] for x in df['text']]
    df[f'{subreddit}_neutral_sentiment'] = [analyzer.polarity_scores(x)['neu'] for x in df['text']]
    df[f'{subreddit}_negative_sentiment'] = [analyzer.polarity_scores(x)['neg'] for x in df['text']]
    
    return df

In [44]:
def daily_mean(df):

    df['datetime'] = pd.to_datetime(df['datetime'])
    daily_mean_df = df.set_index('datetime').groupby(pd.Grouper(freq='d')).mean()

    return daily_mean_df

In [45]:
def articles_pull(keywords):

    newsapi_response = newsapi.get_everything(q = keywords, language = 'en', sort_by = 'publishedAt')['articles']
    articles_list = []
    for article in newsapi_response:
        try:
            title = article['title']
            description = article['description']
            text = article['content']
            date = article['publishedAt']
            articles_list.append({'date' : date, 'text' : text})
            articles = pd.DataFrame(articles_list).rename(columns = {'date': 'datetime'}).sort_values('datetime')
            articles['datetime'] = pd.to_datetime(articles['datetime'], infer_datetime_format = True, errors = 'coerce')
        except AttributeError:
            pass
    
    return articles

In [83]:
uber_articles = articles_pull('UBER OR Uber OR uber')
uber_articles.to_csv('./Data/Cleaned_Data/uber_articles.csv')

In [84]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

uber_articles_sentiment = daily_mean(articles_vader_analyzer(uber_articles))

In [85]:
#stockmarket_comments = pd.read_csv('../Notebooks/Data/Cleaned_Data/stockmarket_comments.csv')
securityanalysis_comments = pd.read_csv('../Notebooks/Data/Cleaned_Data/securityanalysis_comments.csv')
algotrading_comments = pd.read_csv('../Notebooks/Data/Cleaned_Data/algotrading_comments.csv')
wallstreetbets_comments = pd.read_csv('../Notebooks/Data/Cleaned_Data/wallstreetbets_comments.csv')

In [87]:
securityanalysis_uber_comments = keyword_filter(securityanalysis_comments, ['UBER', 'Uber', 'uber'])
algotrading_uber_comments = keyword_filter(algotrading_comments, ['UBER', 'Uber', 'uber'])
wallstreetbets_uber_comments = keyword_filter(wallstreetbets_comments, ['UBER', 'Uber', 'uber'])

In [89]:
securityanalysis_uber_daily_sentiment = daily_mean(reddit_vader_analyzer('securityanalysis', securityanalysis_uber_comments))
algotrading_uber_sentiment = daily_mean(reddit_vader_analyzer('algotrading', algotrading_uber_comments))
wallstreetbets_uber_sentiment = daily_mean(reddit_vader_analyzer('wallstreetbets', wallstreetbets_uber_comments))

In [92]:
uber_sentiment = pd.concat([uber_articles_sentiment, securityanalysis_uber_daily_sentiment, algotrading_uber_sentiment, wallstreetbets_uber_sentiment], axis = 1)
uber_sentiment.to_csv('./Data/Cleaned_Data/uber_sentiment.csv')
uber_sentiment

Unnamed: 0_level_0,articles_compound_sentiment,articles_positive_sentiment,articles_neutral_sentiment,articles_negative_sentiment,securityanalysis_compound_sentiment,securityanalysis_positive_sentiment,securityanalysis_neutral_sentiment,securityanalysis_negative_sentiment,algotrading_compound_sentiment,algotrading_positive_sentiment,algotrading_neutral_sentiment,algotrading_negative_sentiment,wallstreetbets_compound_sentiment,wallstreetbets_positive_sentiment,wallstreetbets_neutral_sentiment,wallstreetbets_negative_sentiment
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2013-01-10,,,,,,,,,,,,,0.272,0.156,0.709,0.136
2013-01-11,,,,,,,,,,,,,,,,
2013-01-12,,,,,,,,,,,,,,,,
2013-01-13,,,,,,,,,,,,,,,,
2013-01-14,,,,,,,,,0.13385,0.058,0.8805,0.061,,,,
2013-01-15,,,,,,,,,,,,,-0.1907,0.029,0.9325,0.0385
2015-02-12,,,,,0.475,0.101,0.8152,0.084,,,,,,,,
2015-02-13,,,,,0.469543,0.112714,0.816714,0.070429,,,,,,,,
2015-02-14,,,,,0.209367,0.086667,0.840333,0.073,,,,,,,,
2015-02-15,,,,,0.9997,0.211,0.754,0.035,,,,,,,,


In [None]:
uber_concat = pd.concat([uber_se])