# Twitter (SNSCRAPE), News, and Google Search Data Pre-Processing

## Verify GPU, mount drive, import libraries

In [None]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
%tensorflow_version 2.x
import tensorflow as tf
import timeit

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

def cpu():
  with tf.device('/cpu:0'):
    random_image_cpu = tf.random.normal((100, 100, 100, 3))
    net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
    return tf.math.reduce_sum(net_cpu)

def gpu():
  with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)
  
# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()

# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). Sum of ten runs.
CPU (s):
3.265947783999991
GPU (s):
0.05093340400003399
GPU speedup over CPU: 64x


In [None]:
!pip install snscrape
!pip install langdetect
!pip install googletrans

Collecting snscrape
  Downloading https://files.pythonhosted.org/packages/81/dd/4a4ec9eedd8cc85ced7c5a6a23853965195203aec825ef3f7778a0c3b69e/snscrape-0.3.4-py3-none-any.whl
Installing collected packages: snscrape
Successfully installed snscrape-0.3.4
Collecting langdetect
[?25l  Downloading https://files.pythonhosted.org/packages/56/a3/8407c1e62d5980188b4acc45ef3d94b933d14a2ebc9ef3505f22cf772570/langdetect-1.0.8.tar.gz (981kB)
[K     |████████████████████████████████| 983kB 6.0MB/s 
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25l[?25hdone
  Created wheel for langdetect: filename=langdetect-1.0.8-cp36-none-any.whl size=993195 sha256=9ea16ab35a18cb7dd8525f680150eb5424ae297b532e47a3636f444247628ffe
  Stored in directory: /root/.cache/pip/wheels/8d/b3/aa/6d99de9f3841d7d3d40a60ea06e6d669e8e5012e6c8b947a57
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.8
Collecting googl

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import snscrape.modules.twitter as sntwitter
import csv
import pandas as pd
import re
import numpy as np
import string

import nltk
nltk.download('stopwords')
stopwords = nltk.corpus.stopwords.words('english')
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger') 

from nltk.tokenize import word_tokenize

from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()

import langdetect
from langdetect import detect
from googletrans import Translator 

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...




In [None]:
cd /content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Raw_Extracts

/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts


## News Sentiment Analysis 

In [None]:
# Read in raw headlines data 
pd.set_option('display.max_colwidth', 90)
news = pd.read_csv(r'/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/headlines_keywords.csv')
news.head()

Unnamed: 0,headlines,num of comments,date
0,Roku adds Apple AirPlay support (and another HBO Max workaround),47,2020-11-11
1,"Apple launches MacBook Air and Pro, $699 Mac Mini with in-house silicon as Intel break...",188,2020-11-10
2,Apple's Mac with in-house silicon presents $15B sales opportunity - JPMorgan,34,2020-11-09
3,Apple suspends iPhone supplier Pegatron for labor violations,16,2020-11-09
4,"Sony, Apple linked to talks in key deal for podcasting's Wondery",23,2020-11-07


In [None]:
news.shape

(5202, 3)

In [None]:
# convert date column to date time 
news['date'] = pd.to_datetime(news['date']) 

# Ensure all rows are populated 
nan_value = float("NaN")
news.replace("", nan_value, inplace=True)
news.dropna(subset = ["headlines"], inplace=True)

# Delete Duplicates 
news.drop_duplicates()
news.shape

(5202, 3)

In [None]:
news['sentiment_vader'] = news['headlines'].apply(lambda x: sid.polarity_scores(x))
news.head()

Unnamed: 0,headlines,num of comments,date,sentiment_vader
0,Roku adds Apple AirPlay support (and another HBO Max workaround),47,2020-11-11,"{'neg': 0.0, 'neu': 0.769, 'pos': 0.231, 'compound': 0.4019}"
1,"Apple launches MacBook Air and Pro, $699 Mac Mini with in-house silicon as Intel break...",188,2020-11-10,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}"
2,Apple's Mac with in-house silicon presents $15B sales opportunity - JPMorgan,34,2020-11-09,"{'neg': 0.0, 'neu': 0.763, 'pos': 0.237, 'compound': 0.4215}"
3,Apple suspends iPhone supplier Pegatron for labor violations,16,2020-11-09,"{'neg': 0.327, 'neu': 0.673, 'pos': 0.0, 'compound': -0.5267}"
4,"Sony, Apple linked to talks in key deal for podcasting's Wondery",23,2020-11-07,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}"


In [None]:
news['compound_sentiment']  = news['sentiment_vader'].apply(lambda score_dict: score_dict['compound'])
del news['sentiment_vader']
news.head()

Unnamed: 0,headlines,num of comments,date,compound_sentiment
0,Roku adds Apple AirPlay support (and another HBO Max workaround),47,2020-11-11,0.4019
1,"Apple launches MacBook Air and Pro, $699 Mac Mini with in-house silicon as Intel break...",188,2020-11-10,0.0
2,Apple's Mac with in-house silicon presents $15B sales opportunity - JPMorgan,34,2020-11-09,0.4215
3,Apple suspends iPhone supplier Pegatron for labor violations,16,2020-11-09,-0.5267
4,"Sony, Apple linked to talks in key deal for podcasting's Wondery",23,2020-11-07,0.0


In [None]:
# news.to_csv('/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Headlines_Sentiment.csv')

## Analyzing Processed headlines

In [None]:
# Read in processed headlines file 
headlines_sent = pd.read_csv(r'/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Headlines_Sentiment.csv')

del headlines_sent['Unnamed: 0']
headlines_sent.shape

(5202, 4)

In [None]:
headlines_sent.head()

Unnamed: 0,headlines,num of comments,date,compound_sentiment
0,Roku adds Apple AirPlay support (and another H...,47,2020-11-11,0.4019
1,"Apple launches MacBook Air and Pro, $699 Mac M...",188,2020-11-10,0.0
2,Apple's Mac with in-house silicon presents $15...,34,2020-11-09,0.4215
3,Apple suspends iPhone supplier Pegatron for la...,16,2020-11-09,-0.5267
4,"Sony, Apple linked to talks in key deal for po...",23,2020-11-07,0.0


In [None]:
# Check to see how Sentiment Vader is working 
pd.set_option('display.max_rows', None)
max_sent = headlines_sent[headlines_sent['compound_sentiment'] == max(headlines_sent['compound_sentiment'])]
max_sent

Unnamed: 0,headlines,num of comments,date,compound_sentiment
4916,We believe that the market is set up for anoth...,4,2010-03-14,0.9668


In [None]:
# select desired columns 
headlines_sent = headlines_sent[['date', 'num of comments', 'compound_sentiment']]
# double check date is datetime 
headlines_sent['date'] = pd.to_datetime(headlines_sent['date'])

# set date as the index 
headlines_sent = headlines_sent.set_index('date')

# find the mean number of comments and compound sentiment per day 
# there are more headlines than days under study 
headlines_sent = headlines_sent.resample('D').mean()
headlines_sent = headlines_sent.fillna(0)
headlines_sent = headlines_sent.loc[headlines_sent.index > pd.to_datetime('2008-12-31')]  
headlines_sent = headlines_sent.loc[headlines_sent.index < pd.to_datetime('2020-11-11')]  
headlines_sent.head()

Unnamed: 0_level_0,num of comments,compound_sentiment
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2009-01-01,0.0,0.0
2009-01-02,0.0,-0.0644
2009-01-03,0.0,-0.3818
2009-01-04,0.0,0.0
2009-01-05,0.666667,0.049483


In [None]:
headlines_sent.shape

(4332, 2)

## Twitter Analysis

### Scraping and Cleaning Tweets

reference debugging: https://github.com/Mottl/GetOldTweets3/issues/98

In [None]:
import snscrape.modules.twitter as sntwitter
import csv

keyword = '$AAPL'
# keyword = '$APPL'
# keyword = '#AAPL'
# keyword = '#APPL'


maxTweets = 3000000000

#Open/create a file to append data to
csvFile = open('cashaapl.csv', 'a', newline='', encoding='utf8')

#Use csv writer
# csvWriter = csv.writer(csvFile)
# csvWriter.writerow(['id','date','tweet']) 



years = list(range(2009, 2020))
months = list(range(1, 13))
for year in years:
    for month in months:
      s_y = year
      s_m = month
      if month != 12:
        e_y = s_y
        e_m = s_m+1
      else :
        e_y = s_y+1
        e_m = 1
      print(f' since:{str(s_y)}-{str(s_m).zfill(2)}-01 until:{str(e_y)}-{str(e_m).zfill(2)}-01 -filter:links -filter:replies')
      csvWriter = csv.writer(csvFile)  
      for i,tweet in enumerate(sntwitter.TwitterSearchScraper(keyword + f' since:{str(s_y)}-{str(s_m).zfill(2)}-01 until:{str(e_y)}-{str(e_m).zfill(2)}-01 -filter:links -filter:replies').get_items()) :
              if i > maxTweets :
                  break      
              csvWriter.writerow([tweet.username, tweet.date, tweet.content])
csvFile.close()


      
      
      

# after 2020 

# for i,tweet in enumerate(sntwitter.TwitterSearchScraper(keyword + ' since:2020-10-01 until:2020-11-01 -filter:links -filter:replies').get_items()) :
#         if i > maxTweets :
#             break      
#         csvWriter.writerow([tweet.username, tweet.date, tweet.content])
# csvFile.close()









Read in scraped files - AAPL cash and hash

In [None]:
# twitterdata_cash1 = pd.read_csv('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash_aapl/cashaapl.csv')
# twitterdata_cash2 = pd.read_csv('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash_aapl/cashaapl_2020.csv')
# frames = [twitterdata_cash1, twitterdata_cash2]
# twitterdata_cash = pd.concat(frames)

# twitterdata_hash1 = pd.read_csv('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash_aapl/hashaapl20092013.csv')
# twitterdata_hash2 = pd.read_csv('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash_aapl/hashaapl20132016.csv')
# twitterdata_hash3 = pd.read_csv('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash_aapl/hashaapl20162019.csv')
# twitterdata_hash4 = pd.read_csv('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash_aapl/hashaapl20192020.csv')
# frames = [twitterdata_hash1, twitterdata_hash2, twitterdata_hash3, twitterdata_hash4]
# twitterdata_hash = pd.concat(frames)

# twitterdata_cash.to_csv('/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/twitterdata_cash_id.csv')
# twitterdata_hash.to_csv('/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/twitterdata_hash_id.csv')



Read in scraped files - APPL cash and hash 

In [None]:
# twitterdata_cash2009 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash/cash2009.csv")
# twitterdata_cash2010 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash/cash2010.csv")
# twitterdata_cash2011 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash/cash2011.csv")
# twitterdata_cash2012 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash/cash2012.csv")
# twitterdata_cash2013 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash/cash2013.csv")
# twitterdata_cash2014 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash/cash2014.csv")
# twitterdata_cash2015 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash/cash2015.csv")
# twitterdata_cash2016 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash/cash2016.csv")
# twitterdata_cash2017 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash/cash2017.csv")
# twitterdata_cash2018 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash/cash2018.csv")
# twitterdata_cash20192020 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/cash/cash20192020.csv")



twitterdata_hash2009 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash/hash2009.csv")
# twitterdata_hash2010 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash/hash2010.csv")
# twitterdata_hash2011 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash/hash2011.csv")
# twitterdata_hash2012 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash/hash2012.csv")
# twitterdata_hash2013 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash/hash2013.csv")
# twitterdata_hash2014 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash/hash2014.csv")
# twitterdata_hash2015 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash/hash2015.csv")
# twitterdata_hash2016 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash/hash2016.csv")
# twitterdata_hash2017 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash/hash2017.csv")
# twitterdata_hash2018 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash/hash2018.csv")
# twitterdata_hash20192020 = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/hash/hash20192020.csv")



# frames = [twitterdata_cash2009, twitterdata_cash2010, twitterdata_cash2011, twitterdata_cash2012, twitterdata_cash2013, twitterdata_cash2014, twitterdata_cash2015, twitterdata_cash2016, twitterdata_cash2017, twitterdata_cash2018, twitterdata_cash20192020]
# twitterdata_cash = pd.concat(frames)


# frames = [twitterdata_hash2009, twitterdata_hash2010, twitterdata_hash2011, twitterdata_hash2012, twitterdata_hash2013, twitterdata_hash2014, twitterdata_hash2015, twitterdata_hash2016, twitterdata_hash2017, twitterdata_hash2018, twitterdata_hash20192020]
# twitterdata_hash = pd.concat(frames)



# twitterdata_cash.to_csv('/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/twitterdata_cash.csv')
# twitterdata_hash.to_csv('/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/twitterdata_hash.csv')


### Processing Raw data 
---

In [None]:
# twitterdata_cash_long = pd.read_csv(r'/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/twitterdata_cash_id.csv')

# twitterdata_cash = twitterdata_cash_long[: int(len(twitterdata_cash_long)/4)]
# twitterdata_cash = twitterdata_cash_long[int(len(twitterdata_cash_long)/4): 2*int(len(twitterdata_cash_long)/4)]

# twitterdata_cash = twitterdata_cash_long[2*int(len(twitterdata_cash_long)/4): 3*int(len(twitterdata_cash_long)/4)]
# twitterdata_cash = twitterdata_cash_long[3*int(len(twitterdata_cash_long)/4):]

# twitterdata_hash = pd.read_csv(r"/content/drive/MyDrive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Raw_Extracts/twitterdata_hash_id.csv")


  interactivity=interactivity, compiler=compiler, result=result)


In [None]:
# frames = [twitterdata_cash, twitterdata_hash]
# twitterdata = pd.concat(frames)
twitterdata = twitterdata_hash2009
twitterdata.shape

(123, 3)

In [None]:
twitterdata.head()

Unnamed: 0,id,date,tweet
0,Urvaksh,2009-12-29 21:43:40+00:00,"BTW, are all these TSA measures approved already? Or is this like the iSlate? #APPL"
1,geoffreydgraham,2009-12-28 16:37:16+00:00,The great ppl @BlueIon gave me an impromptu tour of their temp ofc this AM. They r mvg...
2,denisdubois,2009-12-27 17:22:42+00:00,RT @applespotlight: News: Apple stock closed at record high of $209.04 on Thursday. Me...
3,applespotlight,2009-12-26 19:51:36+00:00,News: Apple stock closed at record high of $209.04 on Thursday. Merry Christmas #APPL
4,joelfeder,2009-12-24 16:42:27+00:00,REALLY happy I own #APPL stock right now...... #Win


In [None]:
#twitterdata_cash.shape
#twitterdata_hash.shape
twitterdata = twitterdata.drop_duplicates()
twitterdata.shape


(122, 3)

In [None]:
pd.set_option('display.max_rows', 10)
# remove invalid dates 
twitterdata = twitterdata[twitterdata['date'] != 'date']
twitterdata['date'] = pd.to_datetime(twitterdata['date']) 
twitterdata['date'] = twitterdata['date'].dt.date 

pd.set_option('display.max_rows', 10)
twitterdata = twitterdata.dropna()
twitterdata.shape


(122, 3)

In [None]:
def remove_punct_num(text):
  text = [word.lower() for word in text if word.isalpha()]
  return text


def join_clean(tokens):
  text = " ".join([word for word in tokens])
  return text


In [None]:
# twitterdata['language'] = twitterdata['clean'].apply(lambda x: detect(x) if len(x)>15 else 'NA')
twitterdata['tokenized'] = twitterdata['tweet'].apply(lambda x: word_tokenize(x))
twitterdata['clean'] = twitterdata['tokenized'].apply(lambda x: remove_punct_num(x))
twitterdata['joint'] = twitterdata['clean'].apply(lambda x: join_clean(x))
twitterdata.dropna()
del twitterdata['tokenized']
del twitterdata['clean']



In [None]:
twitterdata['language'] = twitterdata['joint'].apply(lambda x: detect(x) if len(x)>6 else 'NA')
twitterdata.head()

Unnamed: 0,id,date,tweet,joint,language
0,Urvaksh,2009-12-29,"BTW, are all these TSA measures approved already? Or is this like the iSlate? #APPL",btw are all these tsa measures approved already or is this like the islate appl,en
1,geoffreydgraham,2009-12-28,The great ppl @BlueIon gave me an impromptu tour of their temp ofc this AM. They r mvg...,the great ppl blueion gave me an impromptu tour of their temp ofc this am they r mvg a...,en
2,denisdubois,2009-12-27,RT @applespotlight: News: Apple stock closed at record high of $209.04 on Thursday. Me...,rt applespotlight news apple stock closed at record high of on thursday merry christma...,en
3,applespotlight,2009-12-26,News: Apple stock closed at record high of $209.04 on Thursday. Merry Christmas #APPL,news apple stock closed at record high of on thursday merry christmas appl,en
4,joelfeder,2009-12-24,REALLY happy I own #APPL stock right now...... #Win,really happy i own appl stock right now win,en


In [None]:
(twitterdata['language']=='NA').sum()

0

In [None]:
translator = Translator()

In [None]:
# exclude and NA's
twitterdata = twitterdata[twitterdata['language']!='NA']
# Non English Tweets
twitterdata_nonen = twitterdata[twitterdata['language']!= 'en']

In [None]:
# Find the translated tweet for the Non English Tweets 
twitterdata_nonen['trans'] = twitterdata_nonen['joint'].apply(lambda x: translator.translate(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [None]:
pd.set_option('display.max_rows', 15)
twitterdata_nonen

Unnamed: 0,id,date,tweet,joint,language,trans
6,FernandoGaloooo,2009-12-13,só RT :: #bravovirtual Siga @bravovirtual para concorrer a UM iPhone 3GS #iPhone #appl,só rt bravovirtual siga bravovirtual para concorrer a um iphone iphone appl,ca,"Translated(src=pt, dest=en, text=just rt bravovirtual follow bravovirtual to compete f..."
7,FernandoGaloooo,2009-12-13,só RT : #bravovirtual Siga @bravovirtual para concorrer a UM iPhone 3GS #iPhone #appl,só rt bravovirtual siga bravovirtual para concorrer a um iphone iphone appl,ca,"Translated(src=pt, dest=en, text=just rt bravovirtual follow bravovirtual to compete f..."
8,FernandoGaloooo,2009-12-13,só RT : #bravovirtual Siga @bravovirtual para concorrer a UM iPhone 3GS #iPhone #appl,só rt bravovirtual siga bravovirtual para concorrer a um iphone iphone appl,it,"Translated(src=pt, dest=en, text=just rt bravovirtual follow bravovirtual to compete f..."
9,FernandoGaloooo,2009-12-13,só RT : #bravovirtual Siga @bravovirtual para concorrer a UM iPhone 3GS #iPho...,só rt bravovirtual siga bravovirtual para concorrer a um iphone iphone appl,ca,"Translated(src=pt, dest=en, text=just rt bravovirtual follow bravovirtual to compete f..."
10,FernandoGaloooo,2009-12-13,só RT : #bravovirtual Siga @bravovirtual para concorrer a UM iPhone 3GS #iPhone ...,só rt bravovirtual siga bravovirtual para concorrer a um iphone iphone appl,ca,"Translated(src=pt, dest=en, text=just rt bravovirtual follow bravovirtual to compete f..."
...,...,...,...,...,...,...
75,ppenzini,2009-09-04,voy a dar la accion que se beneficiara por #appl. por venta china de iphone,voy a dar la accion que se beneficiara por appl por venta china de iphone,es,"Translated(src=es, dest=en, text=I will give the action that will benefit by appl by c..."
81,imhassan,2009-08-25,"New ""Surprise"" Get a Mac ad? #FAIL #APPL",new surprise get a mac ad fail appl,fr,"Translated(src=en, dest=en, text=new surprise get a mac ad fail apple, pronunciation=n..."
82,econstantino,2009-08-10,"Meu Iphone ta falecendo... não ouço nada, nem tonton, fazendo ou recebendo ligações. A...",meu iphone ta falecendo não ouço nada nem tonton fazendo ou recebendo ligações amanhã ...,pt,"Translated(src=pt, dest=en, text=my iphone is dying i don’t hear anything or tonton ma..."
86,EeePcCanada,2009-07-22,By @Lunack le poids de mon #-netbook par rapport au #macbook ! pas photo #eeepc est bc...,by lunack le poids de mon par rapport au macbook pas photo eeepc est bcp plus pratique...,fr,"Translated(src=fr, dest=en, text=by lunack the weight of my compared to the macbook no..."


In [None]:
twitterdata_nonen = twitterdata_nonen.drop_duplicates()
twitterdata_nonen.head()

Unnamed: 0,id,date,tweet,joint,language,trans
6,FernandoGaloooo,2009-12-13,só RT :: #bravovirtual Siga @bravovirtual para concorrer a UM iPhone 3GS #iPhone #appl,só rt bravovirtual siga bravovirtual para concorrer a um iphone iphone appl,ca,"Translated(src=pt, dest=en, text=just rt bravovirtual follow bravovirtual to compete f..."
7,FernandoGaloooo,2009-12-13,só RT : #bravovirtual Siga @bravovirtual para concorrer a UM iPhone 3GS #iPhone #appl,só rt bravovirtual siga bravovirtual para concorrer a um iphone iphone appl,ca,"Translated(src=pt, dest=en, text=just rt bravovirtual follow bravovirtual to compete f..."
8,FernandoGaloooo,2009-12-13,só RT : #bravovirtual Siga @bravovirtual para concorrer a UM iPhone 3GS #iPhone #appl,só rt bravovirtual siga bravovirtual para concorrer a um iphone iphone appl,it,"Translated(src=pt, dest=en, text=just rt bravovirtual follow bravovirtual to compete f..."
9,FernandoGaloooo,2009-12-13,só RT : #bravovirtual Siga @bravovirtual para concorrer a UM iPhone 3GS #iPho...,só rt bravovirtual siga bravovirtual para concorrer a um iphone iphone appl,ca,"Translated(src=pt, dest=en, text=just rt bravovirtual follow bravovirtual to compete f..."
10,FernandoGaloooo,2009-12-13,só RT : #bravovirtual Siga @bravovirtual para concorrer a UM iPhone 3GS #iPhone ...,só rt bravovirtual siga bravovirtual para concorrer a um iphone iphone appl,ca,"Translated(src=pt, dest=en, text=just rt bravovirtual follow bravovirtual to compete f..."


In [None]:
twitterdata_en = twitterdata[twitterdata['language']== 'en']

In [None]:
twitterdata_nonen['language'].unique()

array(['ca', 'it', 'pt', 'de', 'nl', 'sv', 'es', 'fr'], dtype=object)

In [None]:
# get just the translated sentences 
twitterdata_nonen['trans'] = twitterdata_nonen['trans'].apply(lambda x: x.text)

In [None]:
#delete columns 
del twitterdata_nonen['tweet']

del twitterdata_nonen['joint']
del twitterdata_en['joint']

del twitterdata_en['language']
del twitterdata_nonen['language']

# rename columns so that non-english and english are the same 
pd.set_option('display.max_colwidth', 100)
twitterdata_nonen = twitterdata_nonen.rename(columns={"id": "id", "date": "date", "trans": "tweet"})

#twitterdata_en
twitterdata_nonen

Unnamed: 0,id,date,tweet
6,FernandoGaloooo,2009-12-13,just rt bravovirtual follow bravovirtual to compete for an iphone iphone appl
7,FernandoGaloooo,2009-12-13,just rt bravovirtual follow bravovirtual to compete for an iphone iphone appl
8,FernandoGaloooo,2009-12-13,just rt bravovirtual follow bravovirtual to compete for an iphone iphone appl
9,FernandoGaloooo,2009-12-13,just rt bravovirtual follow bravovirtual to compete for an iphone iphone appl
10,FernandoGaloooo,2009-12-13,just rt bravovirtual follow bravovirtual to compete for an iphone iphone appl
...,...,...,...
75,ppenzini,2009-09-04,I will give the action that will benefit by appl by china sale of iphone
81,imhassan,2009-08-25,new surprise get a mac ad fail apple
82,econstantino,2009-08-10,my iphone is dying i don’t hear anything or tonton making or receiving calls tomorrow i will tes...
86,EeePcCanada,2009-07-22,by lunack the weight of my compared to the macbook not photo eeepc is bcp more practical for a n...


In [None]:
twitterdata_nonen = twitterdata_nonen.drop_duplicates()
twitterdata_nonen.shape

(15, 3)

In [None]:
twitterdata_nonen['sentiment_vader'] = twitterdata_nonen['tweet'].apply(lambda x: sid.polarity_scores(x))
twitterdata_nonen.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,id,date,tweet,sentiment_vader
6,FernandoGaloooo,2009-12-13,just rt bravovirtual follow bravovirtual to compete for an iphone iphone appl,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}"
24,FernandoGaloooo,2009-12-13,rt bravovirtual follow bravovirtual to compete for an iphone iphone appl,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}"
35,FernandoGaloooo,2009-12-13,rt bravo virtual follow bravo virtual to compete for iphone iphone apple,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}"
42,FernandoGaloooo,2009-12-12,rt bravovirtual follow bravovirtual to compete for an iphone iphone appl,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}"
53,Starchild80,2009-11-18,the wap page offers me push messages like the iphone appl,"{'neg': 0.0, 'neu': 0.8, 'pos': 0.2, 'compound': 0.3612}"


In [None]:
twitterdata_en['sentiment_vader'] = twitterdata_en['tweet'].apply(lambda x: sid.polarity_scores(x))
twitterdata_en.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,id,date,tweet,sentiment_vader
0,Urvaksh,2009-12-29,"BTW, are all these TSA measures approved already? Or is this like the iSlate? #APPL","{'neg': 0.0, 'neu': 0.683, 'pos': 0.317, 'compound': 0.7214}"
1,geoffreydgraham,2009-12-28,The great ppl @BlueIon gave me an impromptu tour of their temp ofc this AM. They r mvg 2 an amaz...,"{'neg': 0.0, 'neu': 0.752, 'pos': 0.248, 'compound': 0.836}"
2,denisdubois,2009-12-27,RT @applespotlight: News: Apple stock closed at record high of $209.04 on Thursday. Merry Christ...,"{'neg': 0.0, 'neu': 0.811, 'pos': 0.189, 'compound': 0.5423}"
3,applespotlight,2009-12-26,News: Apple stock closed at record high of $209.04 on Thursday. Merry Christmas #APPL,"{'neg': 0.0, 'neu': 0.788, 'pos': 0.212, 'compound': 0.5423}"
4,joelfeder,2009-12-24,REALLY happy I own #APPL stock right now...... #Win,"{'neg': 0.0, 'neu': 0.597, 'pos': 0.403, 'compound': 0.6933}"


In [None]:
twitterdata_en['compound_sentiment']  = twitterdata_en['sentiment_vader'].apply(lambda score_dict: score_dict['compound'])
twitterdata_nonen['compound_sentiment']  = twitterdata_nonen['sentiment_vader'].apply(lambda score_dict: score_dict['compound'])

del twitterdata_nonen['sentiment_vader']
del twitterdata_en['sentiment_vader']
twitterdata_nonen.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,id,date,tweet,compound_sentiment
6,FernandoGaloooo,2009-12-13,just rt bravovirtual follow bravovirtual to compete for an iphone iphone appl,0.0
24,FernandoGaloooo,2009-12-13,rt bravovirtual follow bravovirtual to compete for an iphone iphone appl,0.0
35,FernandoGaloooo,2009-12-13,rt bravo virtual follow bravo virtual to compete for iphone iphone apple,0.0
42,FernandoGaloooo,2009-12-12,rt bravovirtual follow bravovirtual to compete for an iphone iphone appl,0.0
53,Starchild80,2009-11-18,the wap page offers me push messages like the iphone appl,0.3612


In [None]:
twitterdata_en.head()

Unnamed: 0,id,date,tweet,compound_sentiment
0,Urvaksh,2009-12-29,"BTW, are all these TSA measures approved already? Or is this like the iSlate? #APPL",0.7214
1,geoffreydgraham,2009-12-28,The great ppl @BlueIon gave me an impromptu tour of their temp ofc this AM. They r mvg 2 an amaz...,0.836
2,denisdubois,2009-12-27,RT @applespotlight: News: Apple stock closed at record high of $209.04 on Thursday. Merry Christ...,0.5423
3,applespotlight,2009-12-26,News: Apple stock closed at record high of $209.04 on Thursday. Merry Christmas #APPL,0.5423
4,joelfeder,2009-12-24,REALLY happy I own #APPL stock right now...... #Win,0.6933


Save files 

In [None]:
processed_dfs = [twitterdata_en, twitterdata_nonen]
twitter_df = pd.concat(processed_dfs)
twitter_df.head()
# twitter_df.to_csv('/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Combined_Tweet_Sent.csv')


Unnamed: 0,id,date,tweet,compound_sentiment
0,Urvaksh,2009-12-29,"BTW, are all these TSA measures approved already? Or is this like the iSlate? #APPL",0.7214
1,geoffreydgraham,2009-12-28,The great ppl @BlueIon gave me an impromptu tour of their temp ofc this AM. They r mvg 2 an amaz...,0.836
2,denisdubois,2009-12-27,RT @applespotlight: News: Apple stock closed at record high of $209.04 on Thursday. Merry Christ...,0.5423
3,applespotlight,2009-12-26,News: Apple stock closed at record high of $209.04 on Thursday. Merry Christmas #APPL,0.5423
4,joelfeder,2009-12-24,REALLY happy I own #APPL stock right now...... #Win,0.6933


In [None]:
# twitterdata_en.to_csv('/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/English_Tweet_Sent.csv')
# twitterdata_nonen.to_csv('/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Non_English_Tweet_Sent.csv')

# twitterdata_en.to_csv('/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/English_Tweet_Sent_hashid.csv')
# twitterdata_nonen.to_csv('/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Non_English_Tweet_Sent_hashid.csv')

In [None]:
# twitter_df.to_csv('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Combined_Tweet_Sent_mag.csv')
# twitter_df.to_csv('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Combined_Tweet_Sent_ani.csv')
# twitter_df.to_csv('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Combined_Tweet_Sent_bip.csv')
# twitter_df.to_csv('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Combined_Tweet_Sent_hamz.csv')


### Using Processed Tweets to create Weighted Sentiment

In [None]:
# Read in files 
appl = pd.read_csv('/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Combined_Tweet_Sent_appl.csv')
del appl['Unnamed: 0']
print(appl.columns)
aapl1 = pd.read_csv('/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Combined_Tweet_Sent_bip.csv')
del aapl1['Unnamed: 0']
print(aapl1.columns)
aapl2 = pd.read_csv('/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Combined_Tweet_Sent_mag.csv')
del aapl2['Unnamed: 0']
print(aapl2.columns)
aapl3 = pd.read_csv('/content/drive/My Drive/MENG_AI/EE8225-IoT/GROUP-EE8225-IoT/Project_Data/Combined_Tweet_Sent_hamz_actual.csv')
del aapl3['Unnamed: 0']
print(aapl3.columns)

dfs = [appl, aapl1, aapl2, aapl3]
twitterdf = pd.concat(dfs, axis=0)
twitterdf.shape



Index(['id', 'date', 'tweet', 'compound_sentiment'], dtype='object')
Index(['id', 'date', 'tweet', 'compound_sentiment'], dtype='object')
Index(['id', 'date', 'tweet', 'compound_sentiment'], dtype='object')
Index(['id', 'date', 'tweet', 'compound_sentiment'], dtype='object')


(655233, 4)

In [None]:
twitterdf = twitterdf.drop_duplicates()
twitterdf.shape

(639392, 4)

In [None]:
twitterdf.head()

Unnamed: 0,id,date,tweet,compound_sentiment
0,TzTrader83,2009-12-31,I think $APPL will be swing up & down until the earnings before breaking the 213 resistance #mkt $$,0.0
1,TheToddNewberg,2009-12-30,"Michael Dell once said the day $APPL would be bigger than $DELL ""hell would freeze over"". $APPL...",-0.6597
2,daytradnanimal,2009-12-30,back in $APPL love this stock got in earlier,0.6369
3,brokerz_gold,2009-12-30,"RT @theback9 - Short , $GS, $BAC, $STT $JPM $APPL $SINA $SOHU $SPY and Long $VIX $QID, $GLD(adde...",0.0
4,theback9,2009-12-30,"Short , $GS, $BAC, $STT $JPM $APPL $SINA $SOHU $SPY and Long $VIX $QID, $GLD(added today) $$",0.0


Influencers in stock: https://traderlife.co.uk/features/lunch-break-reads/10-trading-twitter-accounts-to-follow-in-2020/ 

https://www.investopedia.com/financial-edge/0712/10-twitter-feeds-investors-should-follow.aspx


https://blog.mywallst.com/best-investing-twitter-accounts/

In [None]:
# assign higher weight for influencers 

In [None]:
twitterdf = twitterdf[['id','date', 'compound_sentiment']]

influencers =['Stocktwits','PeterLBrandt', 'CNBC', 'SJosephBurns', 'elerianm', 'IBDinvestors', 
              'TheStalwart', 'jimcramer', 'bespokeinvest', 'steve_hanke', 'MarketWatch','Benzinga'
              'BreakoutStocks', 'WSJmarkets', 'Stephanie_Link', 'nytimesbusiness', 'WSJDealJournal',
              'LizAnnSonders', 'morganhousel', 'charliebilello', 'eWhispers', 'profgalloway', 'emmetlsavage', 'TMFJMo', 'awealthofcs', 
              'stratechery']
twitterdf['weight'] = twitterdf['id'].apply (lambda x: 2 if x in influencers else 1)
twitterdf['compound_sentiment']= twitterdf['compound_sentiment'] * twitterdf['weight']
del twitterdf['weight']
twitterdf.head()

Unnamed: 0,id,date,compound_sentiment
0,TzTrader83,2009-12-31,0.0
1,TheToddNewberg,2009-12-30,-0.6597
2,daytradnanimal,2009-12-30,0.6369
3,brokerz_gold,2009-12-30,0.0
4,theback9,2009-12-30,0.0


In [None]:
non_0 = twitterdf[twitterdf.compound_sentiment != 0]
non_0.shape

(380100, 3)

In [None]:
testset = twitterdf.copy()
testset = testset[['date', 'compound_sentiment']]

testset['date'] = pd.to_datetime(testset['date'])

testset = testset.set_index('date')
testset = testset.resample('D').mean()
testset = testset.fillna(0)
testset

Unnamed: 0_level_0,compound_sentiment
date,Unnamed: 1_level_1
2009-01-01,0.275800
2009-01-02,0.180712
2009-01-03,-0.230400
2009-01-04,0.000000
2009-01-05,0.080633
...,...
2020-11-15,0.000000
2020-11-16,0.263350
2020-11-17,0.180600
2020-11-18,0.458800


In [None]:
testset.to_csv('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Combined_Tweet_Sent.csv')


## Google Searches

In [None]:
aapl_trend = pd.read_excel('/content/drive/Shareddrives/IoTAnalytics/GROUP-EE8225-IoT/Project_Data/Google Trend data/Excel/AAPL_Trend.xlsx')
#aapl_trend = aapl_trend.fillna(method='ffill')

aapl_trend['date'] = pd.to_datetime(aapl_trend['Month'])
aapl_trend = aapl_trend[['date', 'AAPL: (Worldwide)']]

aapl_trend = aapl_trend.set_index('date')
aapl_trend = aapl_trend.resample('D').mean()
aapl_trend = aapl_trend.fillna(method='ffill')
pd.set_option('display.max_rows', 15)


In [None]:
#test = aapl_trend[aapl_trend['date'] > '2010-02-31']
aapl_trend

Unnamed: 0_level_0,AAPL: (Worldwide)
date,Unnamed: 1_level_1
2009-01-01,38.0
2009-01-02,38.0
2009-01-03,38.0
2009-01-04,38.0
2009-01-05,38.0
...,...
2020-10-28,68.0
2020-10-29,68.0
2020-10-30,68.0
2020-10-31,68.0


# Unused Methods

In [None]:
a = [1,2,3,4]
df = pd.DataFrame(a)
df.head()
df['Lag_Close_value'] = df['Close'].shift(1)
df.head()

Unnamed: 0,0,lag
0,1,
1,2,1.0
2,3,2.0
3,4,3.0


## Scrape tweets with Tweepy 

In [None]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
%tensorflow_version 2.x
import tensorflow as tf
import timeit

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

def cpu():
  with tf.device('/cpu:0'):
    random_image_cpu = tf.random.normal((100, 100, 100, 3))
    net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
    return tf.math.reduce_sum(net_cpu)

def gpu():
  with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)
  
# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()

# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

In [None]:
!pip install tweepy

In [None]:
consumer_key = "GhMMSRU6uILmFHAXYwnnxQhoK"
consumer_secret = "07gbmfxF5IuWkViESsqOKtyuEbV4nCuck4sAJ4LFYKgirCyG0E"
access_token = "268302165-ekBWwy7mPT7iSzqhIUePBpEHqJkfirafShsHcvhb"
access_token_secret = "VGFFG41FBUQ28iva1LHY2D60bwNyKD4mfKvCj5eiTcXPh"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)

In [None]:
import tweepy
import pandas as pd
import time

In [None]:
text_query = '$APPL'
count = 2
try:
 # Creation of query method using parameters
 tweets = tweepy.Cursor(api.search,q=text_query).items(count)
 
 # Pulling information from tweets iterable object
 tweets_list = [[tweet.created_at, tweet.id, tweet.text] for tweet in tweets]
 
 # Creation of dataframe from tweets list
 # Add or remove columns as you remove tweet information
 tweets_df = pd.DataFrame(tweets_list)
 
except BaseException as e:
    print('failed on_status,',str(e))
    time.sleep(3)

## Scrape Tweets with GetOldTweets3

In [None]:
!pip install GetOldTweets3

In [None]:
import GetOldTweets3 as got

In [None]:
text_query = '#apple'
since_date = '2019-01-01'
until_date = '2019-01-31'
count = 20
# Creation of query object
tweetCriteria = got.manager.TweetCriteria().setQuerySearch(text_query).setSince(since_date).setUntil(until_date).setMaxTweets(count)
# Creation of list that contains all tweets
tweets = got.manager.TweetManager.getTweets(tweetCriteria)
# Creating list of chosen tweet data
text_tweets = [[tweet.date, tweet.text] for tweet in tweets]
# Creation of dataframe from tweets list
tweets_df = pd.DataFrame(text_tweets)