In [1]:


import pandas as pd

data = pd.read_csv('tweet_emotions.csv')  

print(data.head(10))


     tweet_id   sentiment                                            content
0  1956967341       empty  @tiffanylue i know  i was listenin to bad habi...
1  1956967666     sadness  Layin n bed with a headache  ughhhh...waitin o...
2  1956967696     sadness                Funeral ceremony...gloomy friday...
3  1956967789  enthusiasm               wants to hang out with friends SOON!
4  1956968416     neutral  @dannycastillo We want to trade with someone w...
5  1956968477       worry  Re-pinging @ghostridah14: why didn't you go to...
6  1956968487     sadness  I should be sleep, but im not! thinking about ...
7  1956968636       worry               Hmmm. http://www.djhero.com/ is down
8  1956969035     sadness            @charviray Charlene my love. I miss you
9  1956969172     sadness         @kelcouch I'm sorry  at least it's Friday?


In [2]:
# Module 2: Data Preprocessing

import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

nltk.download('stopwords')

def clean_text(text):
    text = text.lower()  
    text = re.sub(r'[^a-zA-Z\s]', '', text)  
    text = ' '.join([word for word in word_tokenize(text) if word not in stopwords.words('english')])  
    return text


data['cleaned_text'] = data['content'].apply(clean_text)


print(data['cleaned_text'].head())


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Asus\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


0    tiffanylue know listenin bad habit earlier sta...
1               layin n bed headache ughhhhwaitin call
2                        funeral ceremonygloomy friday
3                              wants hang friends soon
4    dannycastillo want trade someone houston ticke...
Name: cleaned_text, dtype: object


In [3]:
# Module 3: Model Building

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report


X = data['cleaned_text']
y = data['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)


svm_classifier = SVC(kernel='linear', C=1.0)
svm_classifier.fit(X_train_tfidf, y_train)


y_pred = svm_classifier.predict(X_test_tfidf)


accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))


Accuracy: 0.344875
              precision    recall  f1-score   support

       anger       0.00      0.00      0.00        19
     boredom       0.00      0.00      0.00        31
       empty       0.00      0.00      0.00       162
  enthusiasm       0.00      0.00      0.00       163
         fun       0.12      0.02      0.03       338
   happiness       0.31      0.38      0.34      1028
        hate       0.42      0.19      0.27       268
        love       0.48      0.38      0.43       762
     neutral       0.33      0.56      0.42      1740
      relief       0.35      0.02      0.04       352
     sadness       0.35      0.23      0.28      1046
    surprise       0.36      0.04      0.08       425
       worry       0.34      0.46      0.39      1666

    accuracy                           0.34      8000
   macro avg       0.24      0.18      0.17      8000
weighted avg       0.33      0.34      0.31      8000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
# Module 4: Inference

def analyze_emotion(text, model, vectorizer):
    cleaned_text = clean_text(text)
    tfidf_text = vectorizer.transform([cleaned_text])
    emotion = model.predict(tfidf_text)[0]
    return emotion


text_to_analyze = "I feel happy today"
predicted_emotion = analyze_emotion(text_to_analyze, svm_classifier, tfidf_vectorizer)
print(f'Predicted Emotion: {predicted_emotion}')


Predicted Emotion: worry


In [7]:
from langdetect import detect

def analyze_emotion(text, model, vectorizer):
    # Detect the language of the input text
    detected_language = detect(text)
    
    if detected_language != 'en':
        return "Unsupported Language: Currently, the emotional analyzer only supports English text."

    cleaned_text = clean_text(text)
    tfidf_text = vectorizer.transform([cleaned_text])
    emotion = model.predict(tfidf_text)[0]
    return emotion

# Example usage
text_to_analyze = "I am sad today"  # Example non-English input
predicted_emotion = analyze_emotion(text_to_analyze, svm_classifier, tfidf_vectorizer)
print(f'Predicted Emotion: {predicted_emotion}')

Predicted Emotion: Unsupported Language: Currently, the emotional analyzer only supports English text.
