<a href="https://colab.research.google.com/github/GerardoMunoz/ML_2025/blob/main/AutoML_University_Mental_Health_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  AutoML Demo: University Mental Health Dataset

This notebook demonstrates how to use AutoML to predict mental health status based on environmental and behavioral data from IoT sensors.

## Step 1: Upload Dataset
1. Go to: https://www.kaggle.com/datasets/ziya07/iot-based-environmental-dataset  
2. Download the file: `university_mental_health_iot_dataset.csv`  
3. Upload it below:

In [1]:
#from google.colab import files
#uploaded = files.upload()

##  Step 2: Load and Preview the Data

In [2]:
import pandas as pd

df = pd.read_csv("university_mental_health_iot_dataset.csv")
df.head()

Unnamed: 0,timestamp,location_id,temperature_celsius,humidity_percent,air_quality_index,noise_level_db,lighting_lux,crowd_density,stress_level,sleep_hours,mood_score,mental_health_status
0,2024-05-01 08:00:00,104,24.328184,62.987529,67,54.429034,323.015836,45,23,7.22,2.3,0
1,2024-05-01 08:15:00,105,26.1773,52.482089,77,62.336451,412.211259,17,54,5.02,1.7,1
2,2024-05-01 08:30:00,103,25.443028,55.736424,120,51.967691,386.972786,35,39,4.96,2.9,0
3,2024-05-01 08:45:00,105,24.671652,71.484457,131,54.658851,339.056874,25,39,8.24,0.0,0
4,2024-05-01 09:00:00,105,21.628577,61.132704,81,60.62344,341.460697,36,18,6.61,3.0,0


##  Step 3: Data Preparation

In [3]:
# Drop timestamp column (not predictive)
df = df.drop(columns=['timestamp'])

# Separate features and target
X = df.drop(columns=['mental_health_status'])
y = df['mental_health_status']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

##  Step 4: Run AutoML (H2O)

In [4]:
#!pip install -f http://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o


In [5]:
import h2o
from h2o.automl import H2OAutoML

# Start H2O server
h2o.init()

# Load your dataset (from CSV or pandas)
import pandas as pd
df = pd.read_csv("university_mental_health_iot_dataset.csv")

# Convert to H2OFrame
hf = h2o.H2OFrame(df)
# Split H2OFrame into train/test
train, test = hf.split_frame(ratios=[0.8], seed=1234)


Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.27" 2025-04-15; OpenJDK Runtime Environment (build 11.0.27+6-post-Ubuntu-0ubuntu122.04); OpenJDK 64-Bit Server VM (build 11.0.27+6-post-Ubuntu-0ubuntu122.04, mixed mode, sharing)
  Starting server from /usr/local/lib/python3.11/dist-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmp1tjnpwgu
  JVM stdout: /tmp/tmp1tjnpwgu/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmp1tjnpwgu/h2o_unknownUser_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,02 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.7
H2O_cluster_version_age:,1 month and 25 days
H2O_cluster_name:,H2O_from_python_unknownUser_rjwvfh
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,20.87 Gb
H2O_cluster_total_cores:,12
H2O_cluster_allowed_cores:,12


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [6]:
# Set target and features
target = 'mental_health_status'
features = [col for col in hf.columns if col != target]

# Run H2O AutoML
aml = H2OAutoML(max_models=10, seed=1)
aml.train(x=features, y=target, training_frame=hf)


AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%


key,value
Stacking strategy,cross_validation
Number of base models (used / total),3/5
# GBM base models (used / total),1/1
# XGBoost base models (used / total),1/1
# DRF base models (used / total),1/2
# GLM base models (used / total),0/1
Metalearner algorithm,GLM
Metalearner fold assignment scheme,Random
Metalearner nfolds,5
Metalearner fold_column,

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
aic,-604.05475,83.617874,-640.18854,-565.37115,-681.3259,-657.25684,-476.13123
loglikelihood,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mae,0.0361055,0.0041119,0.0354431,0.0343156,0.0323022,0.0353631,0.0431036
mean_residual_deviance,0.0028874,0.0012051,0.0024174,0.0030626,0.0019241,0.0021313,0.0049019
mse,0.0028874,0.0012051,0.0024174,0.0030626,0.0019241,0.0021313,0.0049019
null_deviance,52.58296,1.9488599,53.20698,53.291176,50.7526,50.50533,55.15871
r2,0.9890618,0.0039649,0.99064,0.9884424,0.9923032,0.9914715,0.9824517
residual_deviance,0.5744466,0.2296114,0.4931432,0.5972025,0.3906006,0.4305197,0.9607667
rmse,0.0529103,0.010485,0.0491667,0.0553406,0.043865,0.0461659,0.0700134
rmsle,0.0353384,0.0046101,0.0348335,0.0316005,0.0335133,0.0334204,0.0433244


In [7]:
# Leaderboard
lb = aml.leaderboard
lb.head()




model_id,rmse,mse,mae,rmsle,mean_residual_deviance
StackedEnsemble_BestOfFamily_1_AutoML_1_20250522_223859,0.0543705,0.00295616,0.037112,0.0361435,0.00295616
StackedEnsemble_AllModels_1_AutoML_1_20250522_223859,0.0551984,0.00304687,0.0361689,0.0352999,0.00304687
XGBoost_3_AutoML_1_20250522_223859,0.0671478,0.00450883,0.0460693,0.0499574,0.00450883
GBM_2_AutoML_1_20250522_223859,0.0796308,0.00634106,0.0444935,0.0452951,0.00634106
GBM_1_AutoML_1_20250522_223859,0.0838215,0.00702604,0.0336966,0.0409696,0.00702604
GBM_4_AutoML_1_20250522_223859,0.0840459,0.00706371,0.0489104,0.0481978,0.00706371
DRF_1_AutoML_1_20250522_223859,0.0846033,0.00715771,0.0431758,0.0530616,0.00715771
GBM_3_AutoML_1_20250522_223859,0.0885012,0.00783247,0.0541864,0.0528387,0.00783247
XGBoost_1_AutoML_1_20250522_223859,0.0986695,0.00973568,0.0523327,0.0539768,0.00973568
XGBoost_2_AutoML_1_20250522_223859,0.117032,0.0136965,0.0758653,0.0722831,0.0136965


In [8]:
# Predict
preds = aml.leader.predict(test)
preds

stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%


predict
-0.0183733
-0.0144157
1.00955
-0.0188756
-0.0320194
1.02746
-0.000617565
-0.0273952
1.01975
-0.0241023


## **Activity:**

Compare H2O AutoML with Other Machine Learning Models Published on Kaggle Using This Dataset