In [7]:
# Step 1: Install and Import Libraries
import pandas as pd
import numpy as np
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Step 2: Load Dataset
data = pd.read_csv("/tweet_emotions.csv")
print(data.head())

# Step 3: Preprocessing
nltk.download('stopwords')
stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))

def preprocess(text):
    text = re.sub(r'[^a-zA-Z]', ' ', text)    # Remove punctuation/numbers
    text = text.lower()
    words = text.split()
    words = [stemmer.stem(word) for word in words if word not in stop_words]
    return ' '.join(words)

data['clean_text'] = data['content'].apply(preprocess)  # assuming 'content' column has text

# Step 4: Train/Test Split
X = data['clean_text']
y = data['sentiment']   # assuming 'sentiment' column has emotion labels

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Vectorization
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Step 6: Model Training
model = LogisticRegression()
model.fit(X_train_vec, y_train)

# Step 7: Prediction and Evaluation
y_pred = model.predict(X_test_vec)

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))


     tweet_id   sentiment                                            content
0  1956967341       empty  @tiffanylue i know  i was listenin to bad habi...
1  1956967666     sadness  Layin n bed with a headache  ughhhh...waitin o...
2  1956967696     sadness                Funeral ceremony...gloomy friday...
3  1956967789  enthusiasm               wants to hang out with friends SOON!
4  1956968416     neutral  @dannycastillo We want to trade with someone w...


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Classification Report:
               precision    recall  f1-score   support

       anger       0.00      0.00      0.00        19
     boredom       0.00      0.00      0.00        31
       empty       0.33      0.01      0.01       162
  enthusiasm       0.00      0.00      0.00       163
         fun       0.04      0.01      0.01       338
   happiness       0.32      0.34      0.33      1028
        hate       0.49      0.18      0.26       268
        love       0.49      0.38      0.43       762
     neutral       0.34      0.56      0.42      1740
      relief       0.32      0.02      0.04       352
     sadness       0.35      0.25      0.29      1046
    surprise       0.30      0.04      0.07       425
       worry       0.32      0.47      0.38      1666

    accuracy                           0.34      8000
   macro avg       0.25      0.17      0.17      8000
weighted avg       0.33      0.34      0.31      8000

Accuracy Score: 0.341625


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [8]:
# Function to predict emotion for custom input
def predict_emotion(text):
    # Preprocess the input text
    text_clean = preprocess(text)
    # Vectorize the input text
    text_vec = vectorizer.transform([text_clean])
    # Predict the emotion
    prediction = model.predict(text_vec)
    return prediction[0]

# Example Testing
sample_texts = [
    "I am feeling so wonderful and happy today!",
    "This is the worst day of my life. I'm devastated.",
    "I'm so excited about the concert tonight!",
    "Why does everything make me so angry lately?",
    "I feel lonely and sad sometimes.",
    "That was hilarious! I can't stop laughing."
]

for text in sample_texts:
    emotion = predict_emotion(text)
    print(f"Text: {text}\nPredicted Emotion: {emotion}\n")


Text: I am feeling so wonderful and happy today!
Predicted Emotion: happiness

Text: This is the worst day of my life. I'm devastated.
Predicted Emotion: worry

Text: I'm so excited about the concert tonight!
Predicted Emotion: happiness

Text: Why does everything make me so angry lately?
Predicted Emotion: worry

Text: I feel lonely and sad sometimes.
Predicted Emotion: sadness

Text: That was hilarious! I can't stop laughing.
Predicted Emotion: happiness



In [12]:
import pickle

# Save the trained model
with open('emotion_detection_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

# Save the TF-IDF vectorizer
with open('tfidf_vectorizer.pkl', 'wb') as vec_file:
    pickle.dump(vectorizer, vec_file)

print("Model and vectorizer have been saved successfully.")


Model and vectorizer have been saved successfully.
