In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

df = pd.read_csv("/kaggle/input/90-days-student-screen-time-tracker/data.csv")
df.head()

df.isnull().sum()
df = df.dropna()

def productivity(row):
    if row['Study_hours'] >= 4 and row['Screen_time_hours'] <= 5:
        return 'High'
    elif row['Study_hours'] >= 2:
        return 'Medium'
    else:
        return 'Low'

df['Productivity_Level'] = df.apply(productivity, axis=1)

sns.countplot(x='Productivity_Level', data=df)
plt.show()

sns.boxplot(x='Productivity_Level', y='Screen_time_hours', data=df)
plt.show()

X = df[['Screen_time_hours', 'Study_hours', 'Sleep_hours', 'Notifications']]
y = df['Productivity_Level']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

lr = LogisticRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)

print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print(classification_report(y_test, rf_pred))

cm = confusion_matrix(y_test, rf_pred)
sns.heatmap(cm, annot=True, fmt='d')
plt.show()
