In [1]:
# ==============================
# STEP 0: INSTALL & IMPORTS
# ==============================

import warnings
warnings.filterwarnings("ignore")
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow warnings

import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv1D, MaxPooling1D, Dense,
    Dropout, Flatten, BatchNormalization
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score

In [2]:
# ==============================
# STEP 1: LOAD DATA
# ==============================

FILE_PATH = "ABIDEII_Composite_Phenotypic.csv"

df = pd.read_csv(FILE_PATH, encoding='latin1', engine='python')
print("Dataset shape:", df.shape)
df.head()

Dataset shape: (1114, 348)


Unnamed: 0,SITE_ID,SUB_ID,NDAR_GUID,DX_GROUP,PDD_DSM_IV_TR,ASD_DSM_5,AGE_AT_SCAN,SEX,HANDEDNESS_CATEGORY,HANDEDNESS_SCORES,...,ADI_R_C3_TOTAL,ADI_R_C4_REPETITIVE_USE_OBJECTS,ADI_R_C4_HIGHER,ADI_R_C4_UNUSUAL_SENSORY_INTERESTS,ADI_R_C4_TOTAL,ADI_R_D_AGE_PARENT_NOTICED,ADI_R_D_AGE_FIRST_SINGLE_WORDS,ADI_R_D_AGE_FIRST_PHRASES,ADI_R_D_AGE_WHEN_ABNORMALITY,ADI_R_D_INTERVIEWER_JUDGMENT
0,ABIDEII-BNI_1,29006,,1,,,48.0,1,1.0,,...,,,,,,,,,,
1,ABIDEII-BNI_1,29007,,1,,,41.0,1,1.0,,...,,,,,,,,,,
2,ABIDEII-BNI_1,29008,,1,,,59.0,1,1.0,,...,,,,,,,,,,
3,ABIDEII-BNI_1,29009,,1,,,57.0,1,1.0,,...,,,,,,,,,,
4,ABIDEII-BNI_1,29010,,1,,,45.0,1,1.0,,...,,,,,,,,,,


In [3]:
# ==============================
# STEP 2: TARGET & FEATURE SPLIT
# ==============================

TARGET = "DX_GROUP"   # 1 = ASD, 2 = Control

y = df[TARGET].map({1: 1, 2: 0})

drop_cols = ["DX_GROUP", "SUB_ID", "FILE_ID", "SITE_ID"]
X = df.drop(columns=[c for c in drop_cols if c in df.columns])

print("X shape:", X.shape)
print("y distribution:\n", y.value_counts())

X shape: (1114, 345)
y distribution:
 DX_GROUP
0    593
1    521
Name: count, dtype: int64


In [4]:
# ==============================
# STEP 3: PREPROCESSING
# ==============================

num_cols = X.select_dtypes(include=[np.number]).columns
cat_cols = X.select_dtypes(include=["object"]).columns

num_imputer = SimpleImputer(strategy="median")
cat_imputer = SimpleImputer(strategy="most_frequent")

# --- Impute numerical columns ---
if not num_cols.empty:
    for col in num_cols:
        median_val = X[col].median()
        if pd.isna(median_val):
            X[col] = X[col].fillna(0)
        else:
            X[col] = X[col].fillna(median_val)

# --- Impute categorical columns ---
if not cat_cols.empty:
    for col in cat_cols:
        col_values = X[col].astype(str).values.reshape(-1, 1)
        X.loc[:, col] = pd.Series(cat_imputer.fit_transform(col_values).ravel(), index=X.index)

    for col in cat_cols:
        X[col] = LabelEncoder().fit_transform(X[col].astype(str))

# Scale numerical columns
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

# Reshape for Conv1D (samples, timesteps, channels)
X = X.values.reshape(X.shape[0], X.shape[1], 1)

print("CNN input shape:", X.shape)

CNN input shape: (1114, 345, 1)


In [5]:
# ==============================
# STEP 4: TRAIN / TEST SPLIT
# ==============================

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

print("Train:", X_train.shape)
print("Test:", X_test.shape)

Train: (891, 345, 1)
Test: (223, 345, 1)


In [6]:
# ==============================
# STEP 5: BUILD 1D CNN MODEL
# ==============================

model = Sequential([
    Conv1D(64, kernel_size=3, activation="relu", input_shape=X.shape[1:]),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Conv1D(128, kernel_size=3, activation="relu"),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Conv1D(256, kernel_size=3, activation="relu"),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()

In [7]:
# ==============================
# STEP 6: SETUP CALLBACKS
# ==============================

callbacks = [
    EarlyStopping(
        monitor="val_loss",
        patience=8,
        restore_best_weights=True
    ),
    ModelCheckpoint(
        "best_1d_cnn_asd_model.keras",
        monitor="val_accuracy",
        save_best_only=True,
        verbose=1
    )
]

print("✅ Callbacks configured!")

✅ Callbacks configured!


In [8]:
# ==============================
# STEP 7: TRAIN MODEL
# ==============================

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/100
[1m22/23[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 46ms/step - accuracy: 0.6761 - loss: 0.8239
Epoch 1: val_accuracy improved from None to 0.44134, saving model to best_1d_cnn_asd_model.keras

Epoch 1: finished saving model to best_1d_cnn_asd_model.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 77ms/step - accuracy: 0.7570 - loss: 0.5815 - val_accuracy: 0.4413 - val_loss: 1.1213
Epoch 2/100
[1m22/23[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 64ms/step - accuracy: 0.8836 - loss: 0.2932
Epoch 2: val_accuracy did not improve from 0.44134
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 69ms/step - accuracy: 0.9031 - loss: 0.2496 - val_accuracy: 0.4413 - val_loss: 0.8314
Epoch 3/100
[1m22/23[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 49ms/step - accuracy: 0.8972 - loss: 0.2255
Epoch 3: val_accuracy did not improve from 0.44134
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step - ac

In [9]:
# ==============================
# STEP 8: EVALUATION
# ==============================

y_pred_prob = model.predict(X_test).ravel()
y_pred = (y_pred_prob > 0.5).astype(int)

acc = accuracy_score(y_test, y_pred) * 100
roc = roc_auc_score(y_test, y_pred_prob)

print(f"Accuracy: {acc:.2f}%")
print(f"ROC-AUC: {roc:.4f}")

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step
Accuracy: 98.21%
ROC-AUC: 0.9989

Classification Report:

              precision    recall  f1-score   support

           0       0.98      0.99      0.98       119
           1       0.99      0.97      0.98       104

    accuracy                           0.98       223
   macro avg       0.98      0.98      0.98       223
weighted avg       0.98      0.98      0.98       223



In [10]:
# ==============================
# STEP 9: SAVE MODEL
# ==============================

model.save("final_1d_cnn_asd_model.keras")
print("✅ Model saved as final_1d_cnn_asd_model.keras")

✅ Model saved as final_1d_cnn_asd_model.keras
