In [1]:
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC

# Load data from JSON file
def load_data(filename):
  with open(filename, 'r') as f:
    data = json.load(f)
  messages = [message['message'] for message in data]
  stress_levels = [message['stress_level'] for message in data]
  return messages, stress_levels



In [2]:
import re
import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
  text = text.lower()  # Lowercase conversion
  text = re.sub(r"[^a-z0-9\s]", "", text)  # Remove punctuation
  text = re.sub(r"\s+", " ", text)  # Remove extra spaces
  # Add stemming or lemmatization (comment out one)
  # text = stemmer.stem(text)  # Stemming (reduce words to their root form)
  text = lemmatizer.lemmatize(text)  # Lemmatization (reduce words to their dictionary form)
  return text

In [3]:
# Train the SVM model
def train_svm(messages, stress_levels):
  # Preprocess text data
  preprocessed_messages = [preprocess_text(message) for message in messages]

  # Feature extraction using TF-IDF vectorizer
  vectorizer = TfidfVectorizer()
  features = vectorizer.fit_transform(preprocessed_messages)

  # Train the SVM model
  model = SVC(kernel='linear')  # You can experiment with different kernels
  model.fit(features, stress_levels)
  return model, vectorizer


In [4]:

# Load data
messages, stress_levels = load_data('train.json')  # Replace with your actual filename

# Train the model
model, vectorizer = train_svm(messages, stress_levels)



In [25]:
# Example usage: predict stress level for a new message
new_message = "hi jyothika is sleeping"
preprocessed_message = preprocess_text(new_message)
new_message_features = vectorizer.transform([preprocessed_message])
predicted_stress_level = model.predict(new_message_features)[0]

print("Predicted stress level for '", new_message, "' :", predicted_stress_level)


Predicted stress level for ' hi jyothika is sleeping ' : 4
