### The program is running on a cloud server and each day it automatically runs at 00:00. The Ip of the server is 20.81.153.226

In [2]:
'''
Author: Shayan Hodai shayan.hodai@gmail.com
Date: 4 March 2023
Purpose: Scrape tweets from choosen accounts, do sentiment analysis on threads and replies and build REST API with 6 endpoints on a cloud server to serve requests
'''
import scrapper
import sentiment
import api
import snscrape.modules.twitter as sntwitter
import numpy as np
import pandas as pd
import re
from transformers import pipeline
import warnings
warnings.filterwarnings("ignore")

In [3]:
accounts = ['elonmusk', 'ylecun', 'BarackObama'] #accounts which we want to scrape tweets from

### Step 1. Scrape tweets

In [4]:
tweets_df = scrapper.tweets(accounts)

In [5]:
tweets_df.head()

Unnamed: 0,Id,Date,Username,Tweet
0,1633985037375877120,2023-03-10 00:16:05+00:00,elonmusk,To be alerted about new tweets from any accoun...
1,1633963976945008643,2023-03-09 22:52:24+00:00,elonmusk,https://t.co/IYlRo3fd59
2,1633876365651918848,2023-03-09 17:04:16+00:00,elonmusk,Concerning
3,1633767312565125120,2023-03-09 09:50:55+00:00,elonmusk,Sometimes we miss here at Starbase https://t.c...
4,1633597893860020224,2023-03-08 22:37:43+00:00,elonmusk,🤬 😂 https://t.co/zPXgj1cN5v


In [6]:
threads_df = scrapper.threads(accounts, tweets_df)

In [7]:
threads_df.head()

Unnamed: 0,Id,Thread
0,1632681788941139972,[(real article from organization calling itsel...
1,1631725881378082816,[Stage 1 to stage 2 mass ratio is too high on ...
2,1631055228089794560,[Detailed whitepaper with calculations &amp; a...
3,1629901954234105857,"[But, all things considered with regard to AGI..."
4,1629633384338149378,"[https://t.co/nBq4YHkaK4, Disturbingly accurate]"


In [8]:
replies_df = scrapper.replies(accounts, tweets_df)

In [9]:
replies_df.head()

Unnamed: 0,Id,Replied_User,Reply
0,1633985037375877120,"[DaoKwonDo, ignacy_aw, ahmeedy199, krishnaeyee...",[@elonmusk Dude that’s what I told people last...
1,1633963976945008643,"[Luckyjs37, CryptoVam, deepfilip, I_G_DEFI, op...","[@elonmusk $BBIG 👨‍🚀, @elonmusk Sauron, is dat..."
2,1633876365651918848,"[NovakGOATovic, KrisPersson, JasonFyk, ImNotTh...",[@elonmusk Any thoughts on how the vaccine hyp...
3,1633767312565125120,"[Michell81918935, GayRepublicSwag, Cooper_Hime...",[@elonmusk Should there be a sprinkler on righ...
4,1633597893860020224,"[ethan_harsell, Gritty20202, SanteSuzie, willi...",[@elonmusk Yay pizza party https://t.co/yZ1mSP...


In [10]:
replies_df = scrapper.rm_dup(threads_df['Thread'], replies_df)

In [11]:
replies_df.head()

Unnamed: 0,Id,Replied_User,Reply
0,1633985037375877120,"[DaoKwonDo, ignacy_aw, ahmeedy199, krishnaeyee...",[@elonmusk Dude that’s what I told people last...
1,1633963976945008643,"[Luckyjs37, CryptoVam, deepfilip, I_G_DEFI, op...","[@elonmusk $BBIG 👨‍🚀, @elonmusk Sauron, is dat..."
2,1633876365651918848,"[NovakGOATovic, KrisPersson, JasonFyk, ImNotTh...",[@elonmusk Any thoughts on how the vaccine hyp...
3,1633767312565125120,"[Michell81918935, GayRepublicSwag, Cooper_Hime...",[@elonmusk Should there be a sprinkler on righ...
4,1633597893860020224,"[ethan_harsell, Gritty20202, SanteSuzie, willi...",[@elonmusk Yay pizza party https://t.co/yZ1mSP...


In [12]:
all_tweets = scrapper.merge_dfs(tweets_df, threads_df, replies_df)

In [13]:
all_tweets.head()

Unnamed: 0,Id,Date,Username,Thread,Replied_User,Reply
0,1633985037375877120,2023-03-10 00:16:05+00:00,elonmusk,[To be alerted about new tweets from any accou...,"[DaoKwonDo, ignacy_aw, ahmeedy199, krishnaeyee...",[@elonmusk Dude that’s what I told people last...
1,1633963976945008643,2023-03-09 22:52:24+00:00,elonmusk,[https://t.co/IYlRo3fd59],"[Luckyjs37, CryptoVam, deepfilip, I_G_DEFI, op...","[@elonmusk $BBIG 👨‍🚀, @elonmusk Sauron, is dat..."
2,1633876365651918848,2023-03-09 17:04:16+00:00,elonmusk,[Concerning],"[NovakGOATovic, KrisPersson, JasonFyk, ImNotTh...",[@elonmusk Any thoughts on how the vaccine hyp...
3,1633767312565125120,2023-03-09 09:50:55+00:00,elonmusk,[Sometimes we miss here at Starbase https://t....,"[Michell81918935, GayRepublicSwag, Cooper_Hime...",[@elonmusk Should there be a sprinkler on righ...
4,1633597893860020224,2023-03-08 22:37:43+00:00,elonmusk,[🤬 😂 https://t.co/zPXgj1cN5v],"[ethan_harsell, Gritty20202, SanteSuzie, willi...",[@elonmusk Yay pizza party https://t.co/yZ1mSP...


In [14]:
all_tweets = scrapper.preprocess(all_tweets)

In [15]:
all_tweets.head()

Unnamed: 0,Id,Date,Account,Thread,Audience,Reply
0,1633985037375877120,2023-03-10 00:16:05+00:00,elonmusk,[to be alerted about new tweets from any accou...,"[DaoKwonDo, ignacy_aw, ahmeedy199, krishnaeyee...","[ dude thats what i told people last week, co..."
1,1633876365651918848,2023-03-09 17:04:16+00:00,elonmusk,[concerning],"[NovakGOATovic, KrisPersson, JasonFyk, ImNotTh...",[ any thoughts on how the vaccine hypocrisy ha...
2,1633767312565125120,2023-03-09 09:50:55+00:00,elonmusk,[sometimes we miss here at starbase ],"[Michell81918935, GayRepublicSwag, Cooper_Hime...",[ should there be a sprinkler on right as well...
3,1633597893860020224,2023-03-08 22:37:43+00:00,elonmusk,[🤬 😂 ],"[ethan_harsell, Gritty20202, SanteSuzie, willi...","[ yay pizza party , shitting on your entire s..."
4,1633587625092169730,2023-03-08 21:56:55+00:00,elonmusk,[&lt;insert cartridge&gt; &lt;run program&gt; ],"[reggiewatts, mrfarhadiii, lyssasphere, _Bongz...",[ it should read elon musk has become the enem...


### Step 2. Sentiment analysis using transforms from hugging face API 

In [27]:
tweets_and_sentiments = sentiment.do(all_tweets)

Downloading (…)lve/main/config.json: 100%|██████████| 949/949 [00:00<00:00, 159kB/s]
Downloading pytorch_model.bin: 100%|██████████| 540M/540M [00:05<00:00, 97.9MB/s] 
Downloading (…)okenizer_config.json: 100%|██████████| 338/338 [00:00<00:00, 37.7kB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 843k/843k [00:00<00:00, 8.19MB/s]
Downloading (…)solve/main/bpe.codes: 100%|██████████| 1.08M/1.08M [00:00<00:00, 10.2MB/s]
Downloading (…)in/added_tokens.json: 100%|██████████| 22.0/22.0 [00:00<00:00, 2.21kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 167/167 [00:00<00:00, 18.6kB/s]


In [28]:
tweets_and_sentiments.head()

Unnamed: 0,Id,Date,Account,Thread,Audience,Reply,Thread_Sentiment,Reply_Sentiment
0,1633985037375877120,2023-03-10 00:16:05+00:00,elonmusk,[to be alerted about new tweets from any accou...,"[DaoKwonDo, ignacy_aw, ahmeedy199, krishnaeyee...","[ dude thats what i told people last week, co...","{'NEG': '0.00', 'NEU': '0.95', 'POS': '0.00'}","{'NEG': '0.20', 'NEU': '0.49', 'POS': '0.14'}"
1,1633876365651918848,2023-03-09 17:04:16+00:00,elonmusk,[concerning],"[NovakGOATovic, KrisPersson, JasonFyk, ImNotTh...",[ any thoughts on how the vaccine hypocrisy ha...,"{'NEG': '0.00', 'NEU': '0.90', 'POS': '0.00'}","{'NEG': '0.51', 'NEU': '0.26', 'POS': '0.07'}"
2,1633767312565125120,2023-03-09 09:50:55+00:00,elonmusk,[sometimes we miss here at starbase ],"[Michell81918935, GayRepublicSwag, Cooper_Hime...",[ should there be a sprinkler on right as well...,"{'NEG': '0.00', 'NEU': '0.71', 'POS': '0.00'}","{'NEG': '0.12', 'NEU': '0.49', 'POS': '0.24'}"
3,1633597893860020224,2023-03-08 22:37:43+00:00,elonmusk,[🤬 😂 ],"[ethan_harsell, Gritty20202, SanteSuzie, willi...","[ yay pizza party , shitting on your entire s...","{'NEG': '0.00', 'NEU': '0.91', 'POS': '0.00'}","{'NEG': '0.23', 'NEU': '0.47', 'POS': '0.15'}"
4,1633587625092169730,2023-03-08 21:56:55+00:00,elonmusk,[&lt;insert cartridge&gt; &lt;run program&gt; ],"[reggiewatts, mrfarhadiii, lyssasphere, _Bongz...",[ it should read elon musk has become the enem...,"{'NEG': '0.00', 'NEU': '0.97', 'POS': '0.00'}","{'NEG': '0.37', 'NEU': '0.37', 'POS': '0.11'}"


### Step3. Initialize Rest API with 6 endpoints using flask

In [18]:
api.initial(tweets_and_sentiments)

### Done!

http://127.0.0.1:5000/accounts -> returns json file of the accounts

http://127.0.0.1:5000/threads -> returns json file of the threads

http://127.0.0.1:5000/replies -> returns json file of the replies

http://127.0.0.1:5000/audience -> returns json file of the audince (the accounts who replied to the tweet)

http://127.0.0.1:5000/th -> returns json file of the replies

http://127.0.0.1:5000/threads_sentiment -> returns json file of the threads sentiment

http://127.0.0.1:5000/replies_sentiment -> returns json file of the replies sentiment

or either have access from your local machine through my virtual machine on a cloud server 20.81.153.226:5000/same endpoints as above