<a href="https://colab.research.google.com/github/AI-Enthusiast-ATK/My-AI-Playground/blob/main/Text_Emotion_Classifiaction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import string
import spacy
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Setup
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
nlp = spacy.load('en_core_web_sm')

# Load data
train_data = pd.read_csv('/content/training.csv')
test_data = pd.read_csv('/content/test.csv')
validation_data = pd.read_csv('/content/validation.csv')

# Preprocessing
def preprocess(text):
    doc = nlp(text)
    return ' '.join([token.text.lower() for token in doc if token.text.lower() not in stop_words and token.text not in string.punctuation])

train_data['cleaned_text'] = train_data['text'].apply(preprocess)
validation_data['cleaned_text'] = validation_data['text'].apply(preprocess)
test_data['cleaned_text'] = test_data['text'].apply(preprocess)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=10000)
X_train = vectorizer.fit_transform(train_data['cleaned_text'])
X_val = vectorizer.transform(validation_data['cleaned_text'])
X_test = vectorizer.transform(test_data['cleaned_text'])

# Labels
y_train = train_data['label']
y_val = validation_data['label']
y_test = test_data['label']

# Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Evaluation
val_preds = model.predict(X_val)
test_preds = model.predict(X_test)

print("Validation Accuracy:", accuracy_score(y_val, val_preds))
print("Test Accuracy:", accuracy_score(y_test, test_preds))
print("\nClassification Report:\n", classification_report(y_test, test_preds))




[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Validation Accuracy: 0.8775
Test Accuracy: 0.866

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.92      0.91       581
           1       0.85      0.95      0.90       695
           2       0.81      0.60      0.69       159
           3       0.88      0.83      0.86       275
           4       0.87      0.80      0.84       224
           5       0.87      0.50      0.63        66

    accuracy                           0.87      2000
   macro avg       0.86      0.77      0.80      2000
weighted avg       0.87      0.87      0.86      2000



In [None]:
def predict(text):
    cleaned = preprocess(text)
    vectorized = vectorizer.transform([cleaned])
    label = model.predict(vectorized)[0]
    return label

In [None]:
print("Prediction:", predict(" i am feeling grouchy ."))


Prediction: 3
