In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import nltk
from nltk.corpus import stopwords
import string

In [2]:
# Download necessary NLTK data
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\jisma\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
df=pd.read_csv('nlp_dataset.csv')

In [4]:
df.head()

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5937 entries, 0 to 5936
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Comment  5937 non-null   object
 1   Emotion  5937 non-null   object
dtypes: object(2)
memory usage: 92.9+ KB


In [6]:
df.Emotion.value_counts()

Emotion
anger    2000
joy      2000
fear     1937
Name: count, dtype: int64

In [7]:
# Preprocess the data
stop_words = set(stopwords.words('english'))
def preprocess_text(text):
    text = text.lower()  # Lowercase text
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    text = ' '.join(word for word in text.split() if word not in stop_words)  # Remove stopwords
    return text

In [8]:
df['Comment'] = df['Comment'].apply(preprocess_text)

In [9]:
df.head()

Unnamed: 0,Comment,Emotion
0,seriously hate one subject death feel reluctan...,fear
1,im full life feel appalled,anger
2,sit write start dig feelings think afraid acce...,fear
3,ive really angry r feel like idiot trusting fi...,joy
4,feel suspicious one outside like rapture happe...,fear


In [10]:
# Feature extraction
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['Comment'])
y = df['Emotion']

In [15]:
# Print the vocabulary
print("\nVocabulary:")
print(vectorizer.get_feature_names_out())

# Get the total number of items in the vocabulary
vocabulary_size = len(vectorizer.get_feature_names_out())
print(f'Total number of items in the vocabulary: {vocabulary_size}')



Vocabulary:
['aa' 'aac' 'aaron' ... 'zonisamide' 'zq' 'zumba']
Total number of items in the vocabulary: 8817


In [11]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [16]:
# Train the model
model = MultinomialNB()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report(y_test, y_pred))

Accuracy: 0.8956228956228957
Classification Report:
              precision    recall  f1-score   support

       anger       0.88      0.93      0.90       392
        fear       0.89      0.93      0.90       416
         joy       0.93      0.83      0.88       380

    accuracy                           0.90      1188
   macro avg       0.90      0.89      0.89      1188
weighted avg       0.90      0.90      0.90      1188



In [20]:
from sklearn.svm import SVC
# Train the SVM model
svc_model = SVC(kernel='linear')  # You can experiment with different kernels (e.g., 'rbf', 'poly')
svc_model.fit(X_train, y_train)

# Make predictions
svc_pred = svc_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, svc_pred)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report(y_test, svc_pred))

Accuracy: 0.9553872053872053
Classification Report:
              precision    recall  f1-score   support

       anger       0.94      0.96      0.95       392
        fear       0.97      0.94      0.95       416
         joy       0.95      0.97      0.96       380

    accuracy                           0.96      1188
   macro avg       0.96      0.96      0.96      1188
weighted avg       0.96      0.96      0.96      1188



In [17]:
# Define new unseen comments
new_comments = [
    "I feel so happy and content with my recent achievements!",
    "I can't believe how terrible this situation is, it's driving me mad.",
    "I am unsure and nervous about the upcoming exam."
]

# Preprocess the new comments
new_comments_processed = [preprocess_text(comment) for comment in new_comments]

# Transform the new comments using the same vectorizer
new_comments_vectorized = vectorizer.transform(new_comments_processed)

# Predict the emotions using the trained model
predictions = model.predict(new_comments_vectorized)

# Print the predictions
for comment, prediction in zip(new_comments, predictions):
    print(f"Comment: {comment}")
    print(f"Predicted Emotion: {prediction}")
    print()


Comment: I feel so happy and content with my recent achievements!
Predicted Emotion: joy

Comment: I can't believe how terrible this situation is, it's driving me mad.
Predicted Emotion: anger

Comment: I am unsure and nervous about the upcoming exam.
Predicted Emotion: fear



In [21]:
# Define new unseen comments
new_comments = [
    "I feel so happy and content with my recent achievements!",
    "I can't believe how terrible this situation is, it's driving me mad.",
    "I am unsure and nervous about the upcoming exam."
]

# Preprocess the new comments
new_comments_processed = [preprocess_text(comment) for comment in new_comments]

# Transform the new comments using the same vectorizer
new_comments_vectorized = vectorizer.transform(new_comments_processed)

# Predict the emotions using the trained model
predictions = svc_model.predict(new_comments_vectorized)

# Print the predictions
for comment, prediction in zip(new_comments, predictions):
    print(f"Comment: {comment}")
    print(f"Predicted Emotion: {prediction}")
    print()


Comment: I feel so happy and content with my recent achievements!
Predicted Emotion: joy

Comment: I can't believe how terrible this situation is, it's driving me mad.
Predicted Emotion: anger

Comment: I am unsure and nervous about the upcoming exam.
Predicted Emotion: fear

