# Twitter scraper

In [2]:
#imports
import snscrape.modules.twitter as sntwitter
import pandas as pd

In [3]:
# Creating list to append tweet data to
tweets_list2 = []

# Using TwitterSearchScraper to scrape data and append tweets to list
for i, tweet in enumerate(
    sntwitter.TwitterSearchScraper(
        "TSLA since:2023-01-01"# until:2020-07-31"
    ).get_items()
):
    if i > 500:
        break
    tweets_list2.append(
        [tweet.date, tweet.id, tweet.content, tweet.user.username]
    )


  [tweet.date, tweet.id, tweet.content, tweet.user.username]


In [4]:
# Creating a dataframe from the tweets list above
tweets_df2 = pd.DataFrame(
    tweets_list2, columns=["Datetime", "Tweet Id", "Text", "Username"]
)
tweets_df2

Unnamed: 0,Datetime,Tweet Id,Text,Username
0,2023-04-11 22:50:21+00:00,1645922260782579714,4.11.23 TUE\n\n++-\n\n$kmx $ww $mrna $aci $dhi...,TityoSatya
1,2023-04-11 22:50:19+00:00,1645922253367054339,This Tesla energy ecosystem home gives us a gl...,StockMarketPete
2,2023-04-11 22:50:17+00:00,1645922245787697152,"#Tesla ($TSLA) price at close, 2023-04-11, is ...",tslashare
3,2023-04-11 22:48:53+00:00,1645921892451381248,@robertlufkinmd You are seeing it in Asia as s...,Duka_Tsla
4,2023-04-11 22:48:53+00:00,1645921890819801089,"Tesla hits 45,000 Superchargers globally\n\n$T...",StockMarketPete
...,...,...,...,...
496,2023-04-11 20:37:40+00:00,1645888871564025861,"#1 Chatroom interms of \n\nalert,calls,Analysi...",RAJURAY48184
497,2023-04-11 20:37:40+00:00,1645888868317827073,"#Tesla $TSLA Inc. stock rises Tuesday, outperf...",Stock_Market_Pr
498,2023-04-11 20:37:20+00:00,1645888787900248065,$AUPH a 5+5=10 watchlist play to perfection\n\...,tenDAYswings
499,2023-04-11 20:36:35+00:00,1645888598271721472,"@PriapusIQ @dunnde Nah, we're at Club Med righ...",BradMunchen


# Sentimental Analysis

In [5]:
#more imports
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer 

In [6]:
# Download the VADER lexicon
nltk.download('vader_lexicon')

# Load the sentiment analyzer
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\adity\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [7]:
# Define a function to calculate the sentiment score for each tweet
def calculate_sentiment_score(tweet):
    """
    Calculates the sentiment score for a tweet using the VADER sentiment analyzer.
    """
    sentiment_score = sia.polarity_scores(tweet)['compound']
    return sentiment_score

# Apply the sentiment analysis function to each tweet in the DataFrame
tweets_df2['sentiment_score'] = tweets_df2['Text'].apply(calculate_sentiment_score)

# Display the resulting DataFrame
(tweets_df2)

Unnamed: 0,Datetime,Tweet Id,Text,Username,sentiment_score
0,2023-04-11 22:50:21+00:00,1645922260782579714,4.11.23 TUE\n\n++-\n\n$kmx $ww $mrna $aci $dhi...,TityoSatya,0.0000
1,2023-04-11 22:50:19+00:00,1645922253367054339,This Tesla energy ecosystem home gives us a gl...,StockMarketPete,0.4939
2,2023-04-11 22:50:17+00:00,1645922245787697152,"#Tesla ($TSLA) price at close, 2023-04-11, is ...",tslashare,0.0000
3,2023-04-11 22:48:53+00:00,1645921892451381248,@robertlufkinmd You are seeing it in Asia as s...,Duka_Tsla,-0.2500
4,2023-04-11 22:48:53+00:00,1645921890819801089,"Tesla hits 45,000 Superchargers globally\n\n$T...",StockMarketPete,0.0000
...,...,...,...,...,...
496,2023-04-11 20:37:40+00:00,1645888871564025861,"#1 Chatroom interms of \n\nalert,calls,Analysi...",RAJURAY48184,0.0000
497,2023-04-11 20:37:40+00:00,1645888868317827073,"#Tesla $TSLA Inc. stock rises Tuesday, outperf...",Stock_Market_Pr,0.0000
498,2023-04-11 20:37:20+00:00,1645888787900248065,$AUPH a 5+5=10 watchlist play to perfection\n\...,tenDAYswings,0.9246
499,2023-04-11 20:36:35+00:00,1645888598271721472,"@PriapusIQ @dunnde Nah, we're at Club Med righ...",BradMunchen,0.5423


In [8]:
# Define a function to map the sentiment score to a word
def map_sentiment_score(sentiment_score):
    """
    Maps a sentiment score to a word ('positive', 'negative', or 'neutral').
    """
    if sentiment_score > 0:
        return 'positive'
    elif sentiment_score < 0:
        return 'negative'
    else:
        return 'neutral'

In [9]:
# Apply the sentiment mapping function to each sentiment score in the DataFrame
tweets_df2['sentiment'] = tweets_df2['sentiment_score'].apply(map_sentiment_score)

tweets_df2

Unnamed: 0,Datetime,Tweet Id,Text,Username,sentiment_score,sentiment
0,2023-04-11 22:50:21+00:00,1645922260782579714,4.11.23 TUE\n\n++-\n\n$kmx $ww $mrna $aci $dhi...,TityoSatya,0.0000,neutral
1,2023-04-11 22:50:19+00:00,1645922253367054339,This Tesla energy ecosystem home gives us a gl...,StockMarketPete,0.4939,positive
2,2023-04-11 22:50:17+00:00,1645922245787697152,"#Tesla ($TSLA) price at close, 2023-04-11, is ...",tslashare,0.0000,neutral
3,2023-04-11 22:48:53+00:00,1645921892451381248,@robertlufkinmd You are seeing it in Asia as s...,Duka_Tsla,-0.2500,negative
4,2023-04-11 22:48:53+00:00,1645921890819801089,"Tesla hits 45,000 Superchargers globally\n\n$T...",StockMarketPete,0.0000,neutral
...,...,...,...,...,...,...
496,2023-04-11 20:37:40+00:00,1645888871564025861,"#1 Chatroom interms of \n\nalert,calls,Analysi...",RAJURAY48184,0.0000,neutral
497,2023-04-11 20:37:40+00:00,1645888868317827073,"#Tesla $TSLA Inc. stock rises Tuesday, outperf...",Stock_Market_Pr,0.0000,neutral
498,2023-04-11 20:37:20+00:00,1645888787900248065,$AUPH a 5+5=10 watchlist play to perfection\n\...,tenDAYswings,0.9246,positive
499,2023-04-11 20:36:35+00:00,1645888598271721472,"@PriapusIQ @dunnde Nah, we're at Club Med righ...",BradMunchen,0.5423,positive


## Work to be done
- need cleaning of twitter data
    - remove unnecessary symbols 
- we need to make the model more accurate
    - remove as many neutrals 
    - train it to be more accurate in terms of 'stocks'
- integrate with stock data (other model)
- add support for scrapping any stock symbol dynamically with user input
- create way to display and merge both.