In [26]:
!pip install neattext



In [27]:
import re
import os
import joblib
import nltk
import pandas as pd
import neattext.functions as nfx
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [28]:
nltk.download('stopwords')
nltk.download('wordnet')
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [29]:
# === Load Data ===
csv_file = "/content/Emotion_dataset.csv"  # Ensure your CSV has 'text' and 'label' columns
if not os.path.exists(csv_file):
    raise FileNotFoundError(f"CSV file '{csv_file}' not found.")

# Read the CSV
# Try reading with different encodings if utf-8 fails
try:
    df = pd.read_csv(csv_file, encoding='utf-8')
except UnicodeDecodeError:
    try:
        df = pd.read_csv(csv_file, encoding='latin-1')
    except UnicodeDecodeError:
        df = pd.read_csv(csv_file, encoding='cp1252')


if 'text' not in df.columns or 'label' not in df.columns:
    raise ValueError("CSV must contain 'text' and 'label' columns.")

In [41]:
df.shape

(50, 3)

In [42]:
display(df)

Unnamed: 0,text,label,clean_text
0,I'm feeling great today!,Happy,im feeling great today
1,What a wonderful experience!,Happy,wonderful experience
2,Life is beautiful and I love it.,Happy,life beautiful love
3,Yay! I got the job!,Happy,yay got job
4,That made me smile so much!,Happy,made smile much
5,Everything is amazing!,Happy,everything amazing
6,I'm grateful for everything.,Happy,im grateful everything
7,I feel so happy and alive!,Happy,feel happy alive
8,I'm thrilled to start this journey.,Happy,im thrilled start journey
9,My heart is full of joy!,Happy,heart full joy


In [30]:
# === Preprocessing Function ===
def preprocess_text(text):
    # Ensure text is a string before processing
    if not isinstance(text, str):
        return ""
    text = text.lower()
    text = nfx.remove_userhandles(text)
    text = nfx.remove_punctuations(text)
    text = re.sub(r'\d+', '', text)
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return " ".join(words)


In [31]:
# Apply preprocessing
df['clean_text'] = df['text'].apply(preprocess_text)

# === Feature Extraction ===
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['clean_text'])
y = df['label']

# === Train/Test Split ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

In [32]:
# === Model Training ===
# Change the model here to Support Vector Classifier (SVC)
model = SVC()
model.fit(X_train, y_train)

# Save the trained SVC model
joblib.dump(model, "emotion_model_svc.pkl")

# === Evaluation ===
y_pred = model.predict(X_test)
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred, zero_division=0))

accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy Score: {accuracy}")


Classification Report:

              precision    recall  f1-score   support

       Angry       0.00      0.00      0.00         3
       Happy       0.44      1.00      0.62         4
         Sad       1.00      0.33      0.50         3

    accuracy                           0.50        10
   macro avg       0.48      0.44      0.37        10
weighted avg       0.48      0.50      0.40        10


Accuracy Score: 0.5


In [33]:
# === Model Training ===
# Change the model here to Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the trained SVC model
joblib.dump(model, "emotion_model_rf.pkl")

# === Evaluation ===
y_pred = model.predict(X_test)
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred, zero_division=0))

accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy Score: {accuracy}")


Classification Report:

              precision    recall  f1-score   support

       Angry       1.00      0.33      0.50         3
       Happy       0.50      1.00      0.67         4
         Sad       1.00      0.33      0.50         3

    accuracy                           0.60        10
   macro avg       0.83      0.56      0.56        10
weighted avg       0.80      0.60      0.57        10


Accuracy Score: 0.6


In [34]:
# === Prediction Function ===
def predict_emotion(text):
    clean_text = preprocess_text(text)
    vect = vectorizer.transform([clean_text])
    return model.predict(vect)[0]

In [35]:
# === Sample Predictions ===
samples = [
    "I just won the game!",
    "I'm so sad and depressed.",
    "I can't tolerate this anymore!"
]

print("\nSample Predictions:")
for s in samples:
    print(f"Text: '{s}' → {predict_emotion(s)}")


Sample Predictions:
Text: 'I just won the game!' → Happy
Text: 'I'm so sad and depressed.' → Sad
Text: 'I can't tolerate this anymore!' → Angry


In [36]:
text_input=input("Enter the text:")
print(predict_emotion(text_input))

Enter the text:Celebrate this Amazing win!
Happy


In [38]:
text_input=input("Enter the text:")
print(predict_emotion(text_input))

Enter the text:I'm not happy today.
Sad


In [39]:
text_input=input("Enter the text:")
print(predict_emotion(text_input))

Enter the text:I feel like punching the wall!
Angry


In [None]:
#                                                             End of the Project