In [11]:
import re
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize VADER analyzer
analyzer = SentimentIntensityAnalyzer()

# Load the CSV file
file_path = 'election_day_tweets_data/election_day_tweets.csv' 
elections_2016 = pd.read_csv(file_path)

elections_2016.sort_values(by='created_at').info()


# Preprocessing function for tweets
def preprocess_tweet(tweet):
    tweet = re.sub(r"http\S", "", tweet)  # Remove URLs, mentions, hashtags
    tweet = re.sub(r"[^a-zA-Z\s]", "", tweet)       # Remove non-alphanumeric characters
    tweet = tweet.lower().strip()
    return tweet


# Sentiment analysis function for specific keywords
def entity_sentiment(text, keywords):
    """
    Perform sentiment analysis on text specific to one or more keywords.

    Parameters:
        text (str): The input text to analyze.
        keywords (list): A list of keywords to look for in the text.

    Returns:
        str: Sentiment specific to the keywords ('positive', 'negative', 'neutral', or 'not mentioned').
    """
    # Check if any of the keywords exist in the text
    if any(keyword.lower() in text for keyword in keywords):
        # Find sentences mentioning any of the keywords
        sentences = text.split('.')
        relevant_sentences = [s for s in sentences if any(keyword.lower() in s for keyword in keywords)]
        # Compute sentiment for relevant sentences
        keyword_sentiment = []
        for sentence in relevant_sentences:
            sentiment_scores = analyzer.polarity_scores(sentence)
            keyword_sentiment.append(sentiment_scores['compound'])  # Use compound score
        # Aggregate scores (average sentiment for the keywords)
        if keyword_sentiment:
            avg_sentiment = sum(keyword_sentiment) / len(keyword_sentiment)
            if avg_sentiment >= 0.05:
                return 'positive'
            elif avg_sentiment <= -0.05:
                return 'negative'
            else:
                return 'neutral'
    return 'not mentioned'




<class 'pandas.core.frame.DataFrame'>
Index: 397629 entries, 162840 to 92778
Data columns (total 34 columns):
 #   Column                         Non-Null Count   Dtype  
---  ------                         --------------   -----  
 0   text                           397629 non-null  object 
 1   created_at                     397629 non-null  object 
 2   geo                            2564 non-null    object 
 3   lang                           397629 non-null  object 
 4   place                          30832 non-null   object 
 5   coordinates                    2564 non-null    object 
 6   user.favourites_count          397629 non-null  int64  
 7   user.statuses_count            397629 non-null  int64  
 8   user.description               353403 non-null  object 
 9   user.location                  302677 non-null  object 
 10  user.id                        397629 non-null  int64  
 11  user.created_at                397629 non-null  object 
 12  user.verified                  

In [8]:
# Load the CSV file
file_path = 'election_day_tweets_data/election_day_tweets.csv' 
elections_2016 = pd.read_csv(file_path)

elections_2016.sort_values(by='created_at').info()


<class 'pandas.core.frame.DataFrame'>
Index: 397629 entries, 162840 to 92778
Data columns (total 34 columns):
 #   Column                         Non-Null Count   Dtype  
---  ------                         --------------   -----  
 0   text                           397629 non-null  object 
 1   created_at                     397629 non-null  object 
 2   geo                            2564 non-null    object 
 3   lang                           397629 non-null  object 
 4   place                          30832 non-null   object 
 5   coordinates                    2564 non-null    object 
 6   user.favourites_count          397629 non-null  int64  
 7   user.statuses_count            397629 non-null  int64  
 8   user.description               353403 non-null  object 
 9   user.location                  302677 non-null  object 
 10  user.id                        397629 non-null  int64  
 11  user.created_at                397629 non-null  object 
 12  user.verified                  

In [9]:
print(elections_2016['text'].head())
# Apply the cleaning function to the 'text' column
elections_2016['cleaned_text'] = elections_2016['text'].apply(preprocess_tweet)
print(elections_2016[['cleaned_text']].head())
# Define specific words to search for
specific_words = ['hillary', 'clinton']

# Combine the words into a single regular expression pattern
pattern = '|'.join(rf'\b{word}\b' for word in specific_words)  # Matches whole words only

# Filter rows where the cleaned text contains any of the specific words
trump_tweets = elections_2016[elections_2016['cleaned_text'].str.contains('trump', case=False, na=False)].copy()
hillary_tweets = elections_2016[elections_2016['cleaned_text'].str.contains(pattern, flags=re.IGNORECASE, na=False)].copy()


# Save the filtered results
#filtered_elections_2016.to_csv('filtered_words2.csv', index=False)

# Display example Hillary Tweets
print("Example Hillary Tweets:")
print(hillary_tweets[['cleaned_text']].head())  # Show the first 5 tweets related to Hillary

# Display example Trump Tweets
print("\nExample Trump Tweets:")
print(trump_tweets[['cleaned_text']].head())  # Show the first 5 tweets related to Trump



0      .@Lawrence @HillaryClinton Two first  @SenSchumer tomorrow. @TheLastWord #brooklyn  TheRealAmerica #Vote #Democrats #NastyWomenVote #Senate
1    My @latimesopinion op-ed on historic #California #Senate race. First time an elected woman senator succeeds another.\nhttps://t.co/cbjQTK0Q1V
2                                        #Senate Wisconsin Senate Preview: Johnson vs. Feingold, The Sequel https://t.co/XHq4p0V4El @SenRonJohnson
3           If Rubio Wins and #Trump Loses in #Florida... #HillaryClinton #Senate #RepublicanPrimary #Senaterace #Miami... https://t.co/zIeNEcVnMO
4                                #Senate Wisconsin Senate Preview: Johnson vs. Feingold, The Sequel https://t.co/VSd6arFMe5 SenRonJohnson NTA•NEWS
Name: text, dtype: object
                                                                                                                       cleaned_text
0  lawrence hillaryclinton two first  senschumer tomorrow thelastword brooklyn  therealamerica vote democra

In [15]:
hillary_keywords = ["hillary", "clinton"]

trump_tweets = trump_tweets.sample(10)  # Random 10 samples
hillary_tweets = hillary_tweets.sample(10)

# Apply entity-specific sentiment analysis
trump_tweets['trump_sentiment'] = trump_tweets['cleaned_text'].apply(lambda x: entity_sentiment(x, "Trump"))
trump_tweets['hillary_sentiment'] = trump_tweets['cleaned_text'].apply(lambda x: entity_sentiment(x, hillary_keywords))

hillary_tweets['trump_sentiment'] = hillary_tweets['cleaned_text'].apply(lambda x: entity_sentiment(x, "Trump"))
hillary_tweets['hillary_sentiment'] = hillary_tweets['cleaned_text'].apply(lambda x: entity_sentiment(x, hillary_keywords))

# Display results
pd.set_option('display.max_colwidth', None)  # Ensure full text is displayed

print("\nExample Trump Tweets with Sentiment:")
print(trump_tweets[['cleaned_text', 'trump_sentiment', 'hillary_sentiment']])

print("\nExample Hillary Tweets with Sentiment:")
print(hillary_tweets[['cleaned_text', 'trump_sentiment', 'hillary_sentiment']])



Example Trump Tweets with Sentiment:
                                                                                                                       cleaned_text  \
361322                     i now see why trump and his supporters hate the mediai mean they have the ability to make sense election   
341501                         the true meaning of makeamericagreatagain \n\ndontbestupid\ndontvotefortrump\nelection tcocxbbtqpzha   
309404                                                      trump and hillys battle of twittersburg tcobwiwwiua election tcopuorcsm   
63187   me too\n\nthis doesnt change my view of trumps fitness for office though i hope now that congress reasserts its tcoejavllhh   
184711                             trump winning in new hampshire \n\nelection \nelectionnight \n\ntrumppencelandslide tcouppxofwtf   
383451     reuters did eric trump just break the law with a ballot photo find out and follow the latest election news  tcoxjhtpnmwn   
315434           