In [1]:
import pandas as pd
import re
import string
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB

In [2]:
# Load the dataset
df = pd.read_csv('stress_dataset_550.csv')

In [3]:
# Remove punctuation
def remove_punct(response):
    translator = str.maketrans("", "", string.punctuation)
    return response.translate(translator)

df["response"] = df.response.map(remove_punct)

In [4]:
# Remove stopwords
import nltk
nltk.download('stopwords')

stop = set(stopwords.words('english'))
def remove_stopwords(response):
    filtered_words = [word.lower() for word in response.split() if word.lower() not in stop]
    return " ".join(filtered_words)

df["response"] = df.response.map(remove_stopwords)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(df['response'], df['label'], test_size=0.2, random_state=42)

In [6]:
# Vectorize the data using CountVectorizer
count_vectorizer = CountVectorizer()
X_train_counts = count_vectorizer.fit_transform(X_train)
X_val_counts = count_vectorizer.transform(X_val)

In [7]:
# Transform the count vectors to TF-IDF representation
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
X_val_tfidf = tfidf_transformer.transform(X_val_counts)

In [8]:
# Train a Naive Bayes classifier
naive_bayes_model = MultinomialNB()
naive_bayes_model.fit(X_train_tfidf, y_train)

In [14]:
# Evaluate the model
accuracy = naive_bayes_model.score(X_val_tfidf, y_val)
print("Validation Accuracy:", accuracy)
accuracy = naive_bayes_model.score(X_train_tfidf, y_train)
print("Training Accuracy:", accuracy)

Validation Accuracy: 0.9545454545454546
Training Accuracy: 1.0


In [10]:
import pickle

# Save the trained Naive Bayes model to a file
with open('naive_bayes_model.pkl', 'wb') as model_file:
    pickle.dump(naive_bayes_model, model_file)

# Save the CountVectorizer object to a file
with open('count_vectorizer.pkl', 'wb') as cv_file:
    pickle.dump(count_vectorizer, cv_file)

# Save the TfidfTransformer object to a file
with open('tfidf_transformer.pkl', 'wb') as tfidf_file:
    pickle.dump(tfidf_transformer, tfidf_file)

In [11]:
# Predict using the model
custom_text = "its a beautiful dream i saw"
custom_text = remove_punct(custom_text)
custom_text = remove_stopwords(custom_text)
custom_count = count_vectorizer.transform([custom_text])
custom_tfidf = tfidf_transformer.transform(custom_count)
prediction = naive_bayes_model.predict(custom_tfidf)
if prediction == "Stress":
    print("Predicted class: Stress")
else:
    print("Predicted class: Normal")

Predicted class: Normal
