In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score


In [None]:
# Load dataset
df = pd.read_csv("Student Insomnia and Educational Outcomes Dataset.csv")


In [None]:
freq_map = {
    "Never": 0,
    "Rarely (1-2 times a week)": 1,
    "Rarely (1-2 times a month)": 1,
    "Sometimes (1-2 times a week)": 2,
    "Sometimes (3-4 times a week)": 2,
    "Often (3-4 times a week)": 3,
    "Often (5-6 times a week)": 4,
    "Every night": 5,
    "Every day": 5
}

sleep_quality_map = {
    "Poor": 1,
    "Average": 2,
    "Good": 3,
    "Very good": 4
}

stress_map = {
    "No stress": 0,
    "Low stress": 1,
    "High stress": 2,
    "Extremely high stress": 3
}

performance_map = {
    "Poor": 1,
    "Below Average": 2,
    "Average": 3,
    "Good": 4,
    "Excellent": 5
}



In [None]:
freq_columns = [
    '3. How often do you have difficulty falling asleep at night? ',
    '5. How often do you wake up during the night and have trouble falling back asleep?',
    '7. How often do you experience difficulty concentrating during lectures or studying due to lack of sleep?',
    '8. How often do you feel fatigued during the day, affecting your ability to study or attend classes?',
    '9. How often do you miss or skip classes due to sleep-related issues (e.g., insomnia, feeling tired)?',
    '11. How often do you use electronic devices (e.g., phone, computer) before going to sleep?',
    '12. How often do you consume caffeine (coffee, energy drinks) to stay awake or alert?',
    '13. How often do you engage in physical activity or exercise?'
]

for col in freq_columns:
    df[col] = df[col].map(freq_map)


In [None]:
df['6. How would you rate the overall quality of your sleep?'] = df[
    '6. How would you rate the overall quality of your sleep?'
].map(sleep_quality_map)

df['14. How would you describe your stress levels related to academic workload?'] = df[
    '14. How would you describe your stress levels related to academic workload?'
].map(stress_map)

df['15. How would you rate your overall academic performance (GPA or grades) in the past semester?'] = df[
    '15. How would you rate your overall academic performance (GPA or grades) in the past semester?'
].map(performance_map)


In [None]:
sleep_map = {
    "4-5 hours": 4.5,
    "6-7 hours": 6.5,
    "7-8 hours": 7.5,
    "More than 8 hours": 8.5
}

df['4. On average, how many hours of sleep do you get on a typical day?'] = df[
    '4. On average, how many hours of sleep do you get on a typical day?'
].map(sleep_map)


In [None]:
X = df[
    [
        '4. On average, how many hours of sleep do you get on a typical day?',
        '6. How would you rate the overall quality of your sleep?',
        '7. How often do you experience difficulty concentrating during lectures or studying due to lack of sleep?',
        '8. How often do you feel fatigued during the day, affecting your ability to study or attend classes?',
        '11. How often do you use electronic devices (e.g., phone, computer) before going to sleep?',
        '12. How often do you consume caffeine (coffee, energy drinks) to stay awake or alert?',
        '13. How often do you engage in physical activity or exercise?',
        '14. How would you describe your stress levels related to academic workload?'
    ]
]

y = df['15. How would you rate your overall academic performance (GPA or grades) in the past semester?']


In [None]:
from sklearn.impute import SimpleImputer

# Impute missing values with column median
imputer = SimpleImputer(strategy='median')

X = imputer.fit_transform(X)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(
    n_estimators=200,
    max_depth=5,
    random_state=42
)

rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("RF MAE:", mean_absolute_error(y_test, y_pred_rf))
print("RF R¬≤:", r2_score(y_test, y_pred_rf))




In [None]:
def predict_student_performance():
    print("\nüéì STUDENT PERFORMANCE PREDICTOR\n")

    sleep_hours = float(input("Sleep hours (4.5 / 6.5 / 7.5 / 8.5): "))
    sleep_quality = int(input("Sleep quality (1=Poor, 2=Average, 3=Good, 4=Very good): "))
    concentration = int(input("Difficulty concentrating (0‚Äì5): "))
    fatigue = int(input("Daytime fatigue (0‚Äì5): "))
    screen = int(input("Device usage before sleep (0‚Äì5): "))
    caffeine = int(input("Caffeine usage (0‚Äì5): "))
    exercise = int(input("Exercise frequency (0‚Äì5): "))
    stress = int(input("Stress level (0=None, 1=Low, 2=High, 3=Extreme): "))

    user_input = np.array([[sleep_hours, sleep_quality, concentration,
                            fatigue, screen, caffeine,
                            exercise, stress]])

    prediction = model.predict(user_input)[0]
    prediction = round(prediction, 2)

    print("\nüìä Predicted Academic Performance Score:", prediction)

    # Verdict
    if sleep_hours <= 5 and stress >= 2:
        print("‚ùå Verdict: All-nighters are a MYTH.")
    elif sleep_hours <= 5:
        print("‚ö†Ô∏è Verdict: Short-term gain, long-term damage.")
    else:
        print("‚úÖ Verdict: Sleep + balance beats all-nighters.")


In [None]:
predict_student_performance()
