In [1]:
# pip install pandas scikit-learn nltk
!pip install pandas scikit-learn nltk



In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import nltk
import string
nltk.download('stopwords')
from nltk.corpus import stopwords

# 1. Sample dataset
data = {
    'text': [
        "I am so happy today!", "Feeling great and joyful",
        "This is really sad...", "I feel very depressed",
        "I am angry at you", "You made me furious",
        "What a surprise!", "Wow! I didn't expect this",
        "I am scared of the dark", "Feeling terrified right now"
    ],
    'emotion': ['happy', 'happy', 'sad', 'sad', 'angry', 'angry', 'surprise', 'surprise', 'fear', 'fear']
}


df = pd.DataFrame(data)

# 2. Preprocessing function
stop_words = set(stopwords.words('english'))

def preprocess(text):
    text = text.lower()
    text = ''.join([c for c in text if c not in string.punctuation])
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

df['text'] = df['text'].apply(preprocess)

# 3. Split dataset
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['emotion'], test_size=0.2, random_state=42)

# 4. Feature extraction
tfidf = TfidfVectorizer()
X_train_vec = tfidf.fit_transform(X_train)
X_test_vec = tfidf.transform(X_test)

# 5. Model training
model = LogisticRegression()
model.fit(X_train_vec, y_train)

# 6. Prediction
y_pred = model.predict(X_test_vec)

# 7. Evaluation
print(classification_report(y_test, y_pred))

# 8. Predicting new text
new_text = "I feel very joyful today!"
new_text = preprocess(new_text)
new_vec = tfidf.transform([new_text])
prediction = model.predict(new_vec)
print("Emotion:", prediction[0])

              precision    recall  f1-score   support

        fear       0.00      0.00      0.00       1.0
       happy       0.00      0.00      0.00       1.0
         sad       0.00      0.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0

Emotion: sad


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
