In [7]:
# Import necessary libraries
!pip install python-dotenv
from dotenv import load_dotenv
load_dotenv()
import os
!pip install tweepy
import tweepy
import pandas as pd
import re
import string
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('corpus')
nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

# Define Twitter API credentials
access_token = os.environ.get("access_token")
access_token_secret = os.environ.get("access_token_secret")
consumer_key = os.environ.get("consumer_key")
consumer_secret = os.environ.get("consumer_secret")

# Authenticate with Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

# Define emotions and corresponding hashtags to search for
emotions = {'happy': ['#happy', '#joy', '#love'], 
            'sad': ['#sad', '#depressed', '#heartbroken'], 
            'angry': ['#angry', '#frustrated', '#mad']}

# Define function to preprocess tweets
def preprocess_tweet(text):
    # Remove URLs and mentions
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'@\S+', '', text)
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    # Convert to lowercase
    text = text.lower()
    # Tokenize text
    tokens = word_tokenize(text)
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    # Lemmatize tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    # Join tokens back into text
    text = ' '.join(tokens)
    return text

# Define function to extract tweets and their emotions
def extract_tweets(emotions, num_tweets=100):
    tweets = []
    for emotion, hashtags in emotions.items():
        for hashtag in hashtags:
            query = hashtag + ' -filter:retweets'
            searched_tweets = api.search_tweets(q=query, lang='en', count=num_tweets)
            for tweet in searched_tweets:
                text = preprocess_tweet(tweet.text)
                tweets.append({'text': text, 'emotion': emotion})
    return pd.DataFrame(tweets)

# Extract tweets and emotions
tweets_df = extract_tweets(emotions)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(tweets_df['text'], tweets_df['emotion'], test_size=0.2, random_state=42)

# Define vectorizer to convert text to bag-of-words features
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train_vec, y_train)

# Evaluate model on test set
accuracy = clf.score(X_test_vec, y_test)
print('Accuracy:', accuracy)

# Predict emotions of new tweets
new_tweets = ['I am so happy today!', 'Feeling sad and lonely', 'I am really angry right now']
new_tweets_vec = vectorizer.transform(new_tweets)
predicted_emotions = clf.predict(new_tweets_vec)
print('Predicted emotions:', predicted_emotions)


Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0


[nltk_data] Downloading package punkt to C:\Users\Vanshika
[nltk_data]     Patel\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Vanshika
[nltk_data]     Patel\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Error loading corpus: Package 'corpus' not found in index
[nltk_data] Downloading package wordnet to C:\Users\Vanshika
[nltk_data]     Patel\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to C:\Users\Vanshika
[nltk_data]     Patel\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Accuracy: 0.8579545454545454
Predicted emotions: ['happy' 'sad' 'angry']
