# Import necessary libraries

In [2]:
import pandas as pd

In [3]:
import nltk

In [4]:
import re

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
from sklearn.svm import SVC

In [7]:
from nltk.corpus import stopwords

In [8]:
from sklearn.metrics import accuracy_score,f1_score

In [9]:
from sklearn.naive_bayes import MultinomialNB

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Download necessary resources

In [11]:
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

In [12]:
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\midhu\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\midhu\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

# 1. Loading and Preprocessing 

In [18]:
# Load the Dataset
df = pd.read_csv("C:/Users/midhu/Downloads/nlp_dataset.csv")
# check first few row of the dataset
df.head()

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [19]:
df.columns

Index(['Comment', 'Emotion'], dtype='object')

In [22]:
# Text Cleaning Function
def clean_text(text):
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE) # Remove URLs
    text = re.sub(r'\W', ' ',text) # Remove punctuation and numbers
    text = nltk.word_tokenize(text) # Tokenization
    text = [word for word in text if word not in stopwords.words('english')] # Remove stopwords
    text = ' '.join(text) # Convert list back to string
    return text

In [23]:
# Apply text cleanong to the "Comment' column
df['clean_text'] = df['Comment'].apply(clean_text)
df[['Comment', 'clean_text']].head()

Unnamed: 0,Comment,clean_text
0,i seriously hate one subject to death but now ...,seriously hate one subject death feel reluctan...
1,im so full of life i feel appalled,im full life feel appalled
2,i sit here to write i start to dig out my feel...,sit write start dig feelings think afraid acce...
3,ive been really angry with r and i feel like a...,ive really angry r feel like idiot trusting fi...
4,i feel suspicious if there is no one outside l...,feel suspicious one outside like rapture happe...


In [25]:
# Split dataset into training and testing sets using 'clean_text' and 'Emotion'
X_train, X_test, y_train, y_test = train_test_split(df['clean_text'], df['Emotion'],test_size=0.2, random_state=42)

# 2. Feature Extraction

In [43]:
# Initialize TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=5000)

# Fit and transform the training data
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)

# Transform the test data
X_test_tfidf = tfidf_vectorizer.transform(X_test)
print(X_train_tfidf.shape)
print(X_test_tfidf.shape)

(4749, 5000)
(1188, 5000)


# 3. Model Development

# a)Naive Bayes



In [44]:

# Initialize the Naive Bayes classifier
nb_model = MultinomialNB()

# Train the model
nb_model.fit(X_train_tfidf, y_train)

# Make predictions on the test set
y_pred_nb = nb_model.predict(X_test_tfidf)

# Evaluate the model
nb_accuracy = accuracy_score(y_test, y_pred_nb)
nb_f1 = f1_score(y_test, y_pred_nb, average='weighted')

print(f"Naive Bayes Accuracy: {nb_accuracy}")
print(f"Naive Bayes F1-Score: {nb_f1}")

Naive Bayes Accuracy: 0.9082491582491582
Naive Bayes F1-Score: 0.9081403588135297


# b)Support Vector Machine 

In [45]:
# Initialize the Support Vector Machine classifier
svm_model = SVC(kernel='linear')

# Train the model
svm_model.fit(X_train_tfidf, y_train)

# Make predictions on the test set
y_pred_svm = svm_model.predict(X_test_tfidf)

# Evaluate the model
svm_accuracy = accuracy_score(y_test, y_pred_svm)
svm_f1 = f1_score(y_test, y_pred_svm, average='weighted')

print(f"SVM Accuracy: {svm_accuracy}")
print(f"SVM F1-Score: {svm_f1}")

SVM Accuracy: 0.9444444444444444
SVM F1-Score: 0.9443890125616586


# 4. Model Comparison

In [46]:
# Print the results
print(f"Naive Bayes - Accuracy: {nb_accuracy}, F1-Score: {nb_f1}")
print(f"SVM - Accuracy: {svm_accuracy}, F1-Score: {svm_f1}")

Naive Bayes - Accuracy: 0.9082491582491582, F1-Score: 0.9081403588135297
SVM - Accuracy: 0.9444444444444444, F1-Score: 0.9443890125616586
