In [1]:
!pip install scikit-learn pandas numpy matplotlib




In [1]:
import pandas as pd
import numpy as np
import joblib
import nltk
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE

In [2]:
nltk.download('stopwords')
from nltk.corpus import stopwords

# Function to clean text
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\W', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
train_df = pd.read_csv(r"D:\text_emotion\text_emotion\archive\training.csv")
test_df = pd.read_csv(r"D:\text_emotion\text_emotion\archive\test.csv")
val_df = pd.read_csv(r"D:\text_emotion\text_emotion\archive\validation.csv")

In [4]:
print(train_df.head())


                                                text  label
0                            i didnt feel humiliated      0
1  i can go from feeling so hopeless to so damned...      0
2   im grabbing a minute to post i feel greedy wrong      3
3  i am ever feeling nostalgic about the fireplac...      2
4                               i am feeling grouchy      3


In [5]:
label_mapping = {0: "Happy", 1: "Sad", 2: "Angry", 3: "Neutral", 4: "Excited", 5: "Fear"}
train_df['label'] = train_df['label'].map(label_mapping)
test_df['label'] = test_df['label'].map(label_mapping)
val_df['label'] = val_df['label'].map(label_mapping)


In [6]:
train_df['text'] = train_df['text'].apply(clean_text)
test_df['text'] = test_df['text'].apply(clean_text)
val_df['text'] = val_df['text'].apply(clean_text)

In [7]:
label_encoder = LabelEncoder()
train_df['encoded_label'] = label_encoder.fit_transform(train_df['label'])
test_df['encoded_label'] = label_encoder.transform(test_df['label'])
val_df['encoded_label'] = label_encoder.transform(val_df['label'])

In [8]:
joblib.dump(label_encoder, 'label_encoder_text.pkl')



['label_encoder_text.pkl']

In [9]:
vectorizer = TfidfVectorizer(max_features=5000, stop_words=stopwords.words('english'))
X_train = vectorizer.fit_transform(train_df['text']).toarray()
X_test = vectorizer.transform(test_df['text']).toarray()
X_val = vectorizer.transform(val_df['text']).toarray()

In [10]:
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

['tfidf_vectorizer.pkl']

In [11]:

y_train = train_df['encoded_label']
y_test = test_df['encoded_label']
y_val = val_df['encoded_label']

In [12]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [13]:
scaler = StandardScaler()
X_train_resampled = scaler.fit_transform(X_train_resampled)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)


In [14]:
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [15]:
knn_model = KNeighborsClassifier(n_neighbors=7, weights='distance')
knn_model.fit(X_train_resampled, y_train_resampled)



In [16]:
y_pred = knn_model.predict(X_test)
print("KNN Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

KNN Accuracy: 0.3215

Classification Report:
               precision    recall  f1-score   support

           0       0.17      0.34      0.23       159
           1       0.18      0.44      0.25       224
           2       0.05      0.26      0.09        66
           3       0.70      0.33      0.45       581
           4       0.29      0.31      0.30       275
           5       0.80      0.28      0.42       695

    accuracy                           0.32      2000
   macro avg       0.37      0.33      0.29      2000
weighted avg       0.56      0.32      0.37      2000



In [17]:
joblib.dump(knn_model, 'knn_text_emotion.pkl')

['knn_text_emotion.pkl']

In [18]:
def predict_text_emotion(text):
    knn_model = joblib.load('knn_text_emotion.pkl')
    vectorizer = joblib.load('tfidf_vectorizer.pkl')
    scaler = joblib.load('scaler.pkl')
    label_encoder = joblib.load('label_encoder_text.pkl')

    text_cleaned = clean_text(text)
    text_vectorized = vectorizer.transform([text_cleaned]).toarray()
    text_vectorized = scaler.transform(text_vectorized)

    emotion_index = knn_model.predict(text_vectorized)[0]
    emotion = label_encoder.inverse_transform([emotion_index])[0]

    print("\n Predicted Emotion:", emotion)
    return emotion

# Test again
predict_text_emotion("I am very proud")



 Predicted Emotion: Sad


'Sad'