In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression # The "Brain"

# --- STEP 1: THE DATA (Passive Phenotyping) ---
# Imagine you collected this from users
data = {
    "Dwell_Time": [100, 120, 900, 110, 850, 130, 950, 105], # Evidence
    "Status":     [0,   0,   1,   0,   1,   0,   1,   0]    # 0=Normal, 1=Depressed
}
df = pd.DataFrame(data)

X = df[['Dwell_Time']]  # Features (Must be 2D, hence double brackets)
y = df['Status']        # Target (1D)

# --- STEP 2: THE SPLIT ---
# Hide 20% of data to test the student later
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- STEP 3: THE TRAINING (The "Fit") ---
# We choose a simple brain: Logistic Regression (Good for Yes/No questions)
model = LogisticRegression()

# This is the magic line. The model looks at X_train and learns the pattern.
print("Training the brain...")
model.fit(X_train, y_train) 
print("Training complete! Brain is ready.")

# --- STEP 4: THE PREDICTION ---
# Let's test it on a NEW user who types really slowly (1000ms dwell time)
new_user_data = [[1000]] 
prediction = model.predict(new_user_data)

if prediction[0] == 1:
    print(f"Result: Model thinks this user is DEPRESSED (Code {prediction[0]})")
else:
    print(f"Result: Model thinks this user is NORMAL (Code {prediction[0]})")

Training the brain...
Training complete! Brain is ready.
Result: Model thinks this user is DEPRESSED (Code 1)




In [5]:
from sklearn.metrics import accuracy_score

# 1. The Exam: Ask the model to predict the HIDDEN data
# We only give it the questions (X_test), not the answers
y_pred = model.predict(X_test)

# 2. The Grading: Compare the Guesses (y_pred) to the Real Answers (y_test)
score = accuracy_score(y_test, y_pred)

print("--- The Report Card ---")
print(f"Real Answers:     {y_test.values}") # .values just makes it look cleaner
print(f"Model Predictions:{y_pred}")
print(f"Accuracy Score:   {score * 100}%")

--- The Report Card ---
Real Answers:     [0 0]
Model Predictions:[0 0]
Accuracy Score:   100.0%


In [6]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[2]]




In [8]:
# 1. Import the necessary tools
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# 2. CREATE THE DATA (The "Textbook")
# Each row is [Speed, Jitter]
# Speed: Lower is faster (0.1s is fast, 0.5s is slow)
# Jitter: Higher is messier (0.01 is clean, 0.9 is messy)

X_train = [
    [0.5, 0.01], # Slow & Clean -> Relaxed
    [0.6, 0.02], # Slow & Clean -> Relaxed
    [0.1, 0.80], # Fast & Messy -> Stressed
    [0.1, 0.90], # Fast & Messy -> Stressed
]

# The Labels for the data above (0 = Relaxed, 1 = Stressed)
y_train = [0, 0, 1, 1]

# 3. CREATE THE BRAIN
# We create a new, empty Random Forest
brain = RandomForestClassifier(n_estimators=10)

# 4. TRAIN THE BRAIN (The Learning Phase)
# .fit() means "Look at this data and learn the pattern"
print("Training the model...")
brain.fit(X_train, y_train)

# 5. TEST THE BRAIN (The Prediction Phase)
# Let's give it a NEW keystroke it has never seen before.
# Case: Super fast (0.09) and super messy (0.85)
new_data = [[0.55, 0.03]]

prediction = brain.predict(new_data)

# 6. SHOW THE RESULT
if prediction[0] == 0:
    print(f"Prediction: {prediction[0]} (User is Relaxed)")
else:
    print(f"Prediction: {prediction[0]} (User is Stressed!)")

Training the model...
Prediction: 0 (User is Relaxed)
