# Autism Screening Tool — ML Training (Google Colab)

This notebook runs your **age-specific training pipeline** in Colab.


In [None]:
# Step 1: Install dependencies
!pip install -q pandas numpy scikit-learn matplotlib seaborn scipy joblib imbalanced-learn

print('OK: packages installed')


In [None]:
# Step 2: Upload ZIPs
# Upload: ML_TRAINING.zip, Online_Datasets.zip, (optional) SAMPLE_DATASETS.zip
from google.colab import files
import zipfile
import os

print('Upload ML_TRAINING.zip, Online_Datasets.zip, and optionally SAMPLE_DATASETS.zip')
uploaded = files.upload()

for name in uploaded.keys():
    if not name.lower().endswith('.zip'):
        print('Skipping non-zip:', name)
        continue
    with zipfile.ZipFile(name, 'r') as z:
        z.extractall('/content')
    print('Extracted:', name)

print('OK: uploads extracted to /content')


In [None]:
# Step 3: Fix folder names / paths and verify expected files
import os
from pathlib import Path

# Sometimes ZIPs create an extra top-level folder. This tries to find ML_TRAINING.
content_root = Path('/content')

candidates = list(content_root.rglob('ML_TRAINING'))
if len(candidates) == 0:
    raise FileNotFoundError('Could not find ML_TRAINING folder under /content. Check your ZIP contents.')

# Prefer the shortest path
ml_training_dir = sorted(candidates, key=lambda p: len(str(p)))[0]
print('ML_TRAINING found at:', ml_training_dir)

# Find Online Datasets folder
online_candidates = list(content_root.rglob('Online Datasets'))
online_dir = sorted(online_candidates, key=lambda p: len(str(p)))[0] if online_candidates else None
print('Online Datasets found at:', online_dir)

# Find SAMPLE_DATASETS folder (optional)
sample_candidates = list(content_root.rglob('SAMPLE_DATASETS'))
sample_dir = sorted(sample_candidates, key=lambda p: len(str(p)))[0] if sample_candidates else None
print('SAMPLE_DATASETS found at:', sample_dir)

# Change directory to ML_TRAINING
os.chdir(ml_training_dir)
print('CWD:', os.getcwd())

required = [
    'config.py',
    'preprocessing/prepare_age_2_3_5_data.py',
    'training/train_age_2_3_5_model.py',
    'utils/data_augmentation.py',
]

for f in required:
    print(('OK ' if Path(f).exists() else 'MISSING '), f)


In [None]:
# Step 4: Prepare Age 2–3.5 datasets (creates train + test CSVs)
# If you did NOT upload SAMPLE_DATASETS.zip, the test set may be empty.
!python preprocessing/prepare_age_2_3_5_data.py


In [None]:
# Step 5: Train Age 2–3.5 model (trains + evaluates automatically)
!python training/train_age_2_3_5_model.py


In [None]:
# Step 6: (Optional) Prepare + Train the other age-group models
# NOTE: These require you to have prepared game CSVs:
# - SAMPLE_DATASETS/prepared/game_age_3_5_5_5_frog_jump.csv
# - SAMPLE_DATASETS/prepared/game_age_5_5_6_9_color_shape.csv

# Prepare auxiliary questionnaire datasets
!python preprocessing/prepare_age_3_5_5_5_data.py
!python preprocessing/prepare_age_5_5_6_9_data.py

# Train (will warn/fail if the game CSVs are not present)
!python training/train_age_3_5_5_5_model.py
!python training/train_age_5_5_6_9_model.py


In [None]:
# Step 7: Download trained models + results
from google.colab import files
import os
import zipfile

zip_name = 'trained_models_and_results.zip'

with zipfile.ZipFile(zip_name, 'w') as z:
    # models/
    for root, dirs, fs in os.walk('models'):
        for f in fs:
            z.write(os.path.join(root, f))
    # output/
    for root, dirs, fs in os.walk('output'):
        for f in fs:
            z.write(os.path.join(root, f))

print('Created:', zip_name)
files.download(zip_name)


# Autism Screening Tool — ML Training (Google Colab)

This notebook runs your **age-specific training pipeline** in Colab.

## What you will upload
- `ML_TRAINING.zip` (zip of your `ML_TRAINING/` folder)
- `Online_Datasets.zip` (zip of your `Online Datasets/` folder)
- (Optional) `SAMPLE_DATASETS.zip` (zip of your `SAMPLE_DATASETS/` folder, if you want to include hospital test data)

## What this notebook will produce
- Trained models saved under: `ML_TRAINING/models/`
- Training outputs saved under: `ML_TRAINING/output/`
- A downloadable zip: `trained_models_and_results.zip`


In [None]:
# Step 1: Install dependencies
!pip install -q pandas numpy scikit-learn matplotlib seaborn scipy joblib imbalanced-learn

print('OK: packages installed')
