In [1]:
pip install tweepy pandas nltk scikit-learn



In [2]:
import tweepy
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import nltk

In [3]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [4]:
# Replace with your own credentials
consumer_key = 'your_consumer_key'
consumer_secret = 'your_consumer_secret'
access_token = 'your_access_token'
access_token_secret = 'your_access_token_secret'

auth = tweepy.OAuth1UserHandler(consumer_key, consumer_secret, access_token, access_token_secret)
api = tweepy.API(auth)

In [6]:
def preprocess_text(text):
    # Tokenize the text
    tokens = word_tokenize(text.lower())

    # Remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    words = [word for word in tokens if word.isalpha() and word not in stop_words]

    # Lemmatize the words
    lemmatizer = WordNetLemmatizer()
    lemmas = [lemmatizer.lemmatize(word) for word in words]

    return ' '.join(lemmas)

# Create or load your DataFrame here
# Example:
tweets_df = pd.DataFrame({'text': ['This is a sample tweet.', 'Another tweet for preprocessing.']})

tweets_df['cleaned_text'] = tweets_df['text'].apply(preprocess_text)
print(tweets_df)

                               text                 cleaned_text
0           This is a sample tweet.                 sample tweet
1  Another tweet for preprocessing.  another tweet preprocessing


In [7]:
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(tweets_df['cleaned_text'])

In [14]:
# Assuming we have a labeled dataset in a CSV file
labeled_data = pd.read_csv('sentimentdataset.csv')  # This CSV should have 'text' and 'sentiment' columns
labeled_data['cleaned_text'] = labeled_data['Sentiment'].apply(preprocess_text)

X_labeled = vectorizer.fit_transform(labeled_data['cleaned_text'])
y = labeled_data['Sentiment']

X_train, X_test, y_train, y_test = train_test_split(X_labeled, y, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)


In [15]:
y_pred = model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred))


Accuracy: 0.23809523809523808
                        precision    recall  f1-score   support

         Acceptance          0.00      0.00      0.00         2
           Admiration        0.00      0.00      0.00         1
        Admiration           0.00      0.00      0.00         1
         Affection           0.00      0.00      0.00         1
      Ambivalence            0.00      0.00      0.00         1
         Anger               0.00      0.00      0.00         1
        Anticipation         0.00      0.00      0.00         1
        Arousal              0.00      0.00      0.00         3
                  Awe        0.00      0.00      0.00         1
         Awe                 0.00      0.00      0.00         1
                  Bad        0.00      0.00      0.00         1
             Betrayal        0.00      0.00      0.00         2
        Betrayal             0.00      0.00      0.00         1
         Bitter              0.00      0.00      0.00         1
         

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
def classify_tweets(tweets_df):
    tweets_df['cleaned_text'] = tweets_df['text'].apply(preprocess_text)
    X_new = vectorizer.transform(tweets_df['cleaned_text'])
    tweets_df['sentiment'] = model.predict(X_new)
    return tweets_df

classified_tweets = classify_tweets(tweets_df)
print(classified_tweets[['text', 'sentiment']])


                               text    sentiment
0           This is a sample tweet.   Positive  
1  Another tweet for preprocessing.   Positive  
