In [1]:
!pip install pandas numpy scikit-learn nltk




In [None]:
import pandas as pd
import numpy as np
import nltk

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

nltk.download('punkt')
nltk.download('punkt_tab')


In [None]:
data = {
    "text": [
        "I feel tired and overloaded with work",
        "Everything is fine and going smooth",
        "Too many things to finish today",
        "I am relaxed and happy today",
        "I cannot handle this pressure anymore",
        "Feeling okay nothing much"
    ],
    "screen_time": [8, 3, 7, 2, 9, 4],
    "night_usage": [1, 0, 1, 0, 1, 0],
    "unlock_count": [75, 20, 65, 15, 90, 30],
    "stress_level": [1, 0, 1, 0, 1, 0]  # 1 = High stress, 0 = Low stress
}

df = pd.DataFrame(data)
df


In [None]:
def extract_text_features(text):
    words = nltk.word_tokenize(text.lower())

    word_count = len(words)
    repetition = word_count - len(set(words))

    if word_count > 0:
        avg_word_len = np.mean([len(w) for w in words])
    else:
        avg_word_len = 0

    return word_count, repetition, avg_word_len


In [None]:
df[['word_count', 'repetition', 'avg_word_len']] = (
    df['text']
    .astype(str)
    .apply(lambda x: pd.Series(extract_text_features(x)))
)

df


In [None]:
X = df[['screen_time', 'night_usage', 'unlock_count',
        'word_count', 'repetition', 'avg_word_len']]

y = df['stress_level']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)

model.fit(X_train, y_train)


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Feature names
features = X.columns

# Feature importance values
importances = model.feature_importances_

# Create DataFrame
fi_df = pd.DataFrame({
    "Feature": features,
    "Importance": importances
}).sort_values(by="Importance", ascending=False)

# Plot graph
plt.figure()
plt.bar(fi_df["Feature"], fi_df["Importance"])
plt.xticks(rotation=45)
plt.xlabel("Features")
plt.ylabel("Importance")
plt.title("Feature Importance for Digital Stress Prediction")
plt.show()


In [None]:
y_pred = model.predict(X_test)
print("âœ… Model Accuracy:", accuracy_score(y_test, y_pred))


In [None]:
new_text = "I am mentally exhausted and cannot manage work"
screen_time = 9
night_usage = 1
unlock_count = 85

wc, rep, avg_len = extract_text_features(new_text)

new_user = [[screen_time, night_usage, unlock_count, wc, rep, avg_len]]

result = model.predict(new_user)

if result[0] == 1:
    print("ðŸš¨ HIGH DIGITAL STRESS DETECTED")
else:
    print("âœ… LOW DIGITAL STRESS")
