In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

In [2]:
# Load dataset
file = 'C:/Users/mahes/OneDrive/Desktop/FinalYearProjects/Projects/ASD-Detection-App/app/data/dataset-asd.csv'
data = pd.read_csv(file)

In [3]:
# Drop the 'Case_No' column (not useful for training)
data = data.drop(columns=['Case_No'])

In [4]:
# print out multiple patients at the same time
data.loc[:10]

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,Age_Mons,Qchat-10-Score,Sex,Ethnicity,Jaundice,Family_mem_with_ASD,Who completed the test,Class/ASD Traits
0,0,0,0,0,0,0,1,1,0,1,28,3,f,middle eastern,yes,no,family member,No
1,1,1,0,0,0,1,1,0,0,0,36,4,m,White European,yes,no,family member,Yes
2,1,0,0,0,0,0,1,1,0,1,36,4,m,middle eastern,yes,no,family member,Yes
3,1,1,1,1,1,1,1,1,1,1,24,10,m,Hispanic,no,no,family member,Yes
4,1,1,0,1,1,1,1,1,1,1,20,9,f,White European,no,yes,family member,Yes
5,1,1,0,0,1,1,1,1,1,1,21,8,m,black,no,no,family member,Yes
6,1,0,0,1,1,1,0,0,1,0,33,5,m,asian,yes,no,family member,Yes
7,0,1,0,0,1,0,1,1,1,1,33,6,m,asian,yes,no,family member,Yes
8,0,0,0,0,0,0,1,0,0,1,36,2,m,asian,no,no,family member,No
9,1,1,1,0,1,1,0,1,1,1,22,8,m,south asian,no,no,Health Care Professional,Yes


In [5]:
# print out a description of the dataframe
data.describe()

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,Age_Mons,Qchat-10-Score
count,1055.0,1055.0,1055.0,1055.0,1055.0,1055.0,1055.0,1055.0,1055.0,1055.0,1055.0,1055.0
mean,0.565877,0.451185,0.402844,0.514692,0.524171,0.580095,0.649289,0.458768,0.490995,0.587678,27.977251,5.225592
std,0.501583,0.503532,0.492632,0.505681,0.499652,0.505174,0.477419,0.498533,0.502049,0.494409,8.741244,2.936764
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.0,3.0
50%,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,30.0,5.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,36.0,8.0
max,3.0,3.0,2.0,3.0,1.0,4.0,1.0,1.0,2.0,2.0,144.0,19.0


In [6]:
# Encode categorical features
categorical_cols = ['Sex', 'Ethnicity', 'Jaundice', 'Family_mem_with_ASD', 'Who completed the test']
data = pd.get_dummies(data, columns=categorical_cols)

In [7]:
data.loc[:10]

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,...,Ethnicity_south asian,Jaundice_no,Jaundice_yes,Family_mem_with_ASD_no,Family_mem_with_ASD_yes,Who completed the test_Health Care Professional,Who completed the test_Health care professional,Who completed the test_Others,Who completed the test_Self,Who completed the test_family member
0,0,0,0,0,0,0,1,1,0,1,...,False,False,True,True,False,False,False,False,False,True
1,1,1,0,0,0,1,1,0,0,0,...,False,False,True,True,False,False,False,False,False,True
2,1,0,0,0,0,0,1,1,0,1,...,False,False,True,True,False,False,False,False,False,True
3,1,1,1,1,1,1,1,1,1,1,...,False,True,False,True,False,False,False,False,False,True
4,1,1,0,1,1,1,1,1,1,1,...,False,True,False,False,True,False,False,False,False,True
5,1,1,0,0,1,1,1,1,1,1,...,False,True,False,True,False,False,False,False,False,True
6,1,0,0,1,1,1,0,0,1,0,...,False,False,True,True,False,False,False,False,False,True
7,0,1,0,0,1,0,1,1,1,1,...,False,False,True,True,False,False,False,False,False,True
8,0,0,0,0,0,0,1,0,0,1,...,False,True,False,True,False,False,False,False,False,True
9,1,1,1,0,1,1,0,1,1,1,...,True,True,False,True,False,True,False,False,False,False


In [8]:
# Encode target variable ('Class/ASD Traits ') → Convert 'Yes' to 1 and 'No' to 0
data['Class/ASD Traits '] = data['Class/ASD Traits '].map({'Yes': 1, 'No': 0})

In [10]:
# Define feature (X) and target (Y) variables
X = data.drop(columns=['Class/ASD Traits '])  # All features except target
Y = data['Class/ASD Traits ']

In [11]:
# print the new categorical column labels
X.columns.values

array(['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10',
       'Age_Mons', 'Qchat-10-Score', 'Sex_f', 'Sex_m',
       'Ethnicity_Hispanic', 'Ethnicity_Latino',
       'Ethnicity_Native Indian', 'Ethnicity_Others',
       'Ethnicity_Pacifica', 'Ethnicity_White European',
       'Ethnicity_asian', 'Ethnicity_black', 'Ethnicity_middle eastern',
       'Ethnicity_mixed', 'Ethnicity_south asian', 'Jaundice_no',
       'Jaundice_yes', 'Family_mem_with_ASD_no',
       'Family_mem_with_ASD_yes',
       'Who completed the test_Health Care Professional',
       'Who completed the test_Health care professional',
       'Who completed the test_Others', 'Who completed the test_Self',
       'Who completed the test_family member'], dtype=object)

In [12]:
# print an example patient from the categorical data
X.loc[1]

A1                                                     1
A2                                                     1
A3                                                     0
A4                                                     0
A5                                                     0
A6                                                     1
A7                                                     1
A8                                                     0
A9                                                     0
A10                                                    0
Age_Mons                                              36
Qchat-10-Score                                         4
Sex_f                                              False
Sex_m                                               True
Ethnicity_Hispanic                                 False
Ethnicity_Latino                                   False
Ethnicity_Native Indian                            False
Ethnicity_Others               

In [13]:
Y.iloc[:10]

0    0
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    0
9    1
Name: Class/ASD Traits , dtype: int64

In [15]:
# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [16]:
# Convert to NumPy arrays
X_train = np.array(X_train, dtype=np.float32)
X_test = np.array(X_test, dtype=np.float32)
Y_train = np.array(Y_train, dtype=np.float32)
Y_test = np.array(Y_test, dtype=np.float32)

### Building the Network - Keras


In [17]:
# Define the neural network model
def create_model():
    model = Sequential()
    model.add(Dense(16, input_dim=X_train.shape[1], activation='relu'))  # Adjusted input size
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification (output 1 neuron)
    
    # Compile the model
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
    return model

### Training the Network

Now it's time for the fun! Training a Keras model is as simple as calling model.fit().

In [18]:
# Create and train the model
model = create_model()
print(model.summary())

# Train the model
model.fit(X_train, Y_train, epochs=50, batch_size=10, verbose=1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None
Epoch 1/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6513 - loss: 0.6510
Epoch 2/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6748 - loss: 0.5224
Epoch 3/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8639 - loss: 0.3462
Epoch 4/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9419 - loss: 0.2223
Epoch 5/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9529 - loss: 0.1686
Epoch 6/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9580 - loss: 0.1416
Epoch 7/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9664 - loss: 0.1194
Epoch 8/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9575 - loss: 0.1090
Epoch 9/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1fccc254e80>

In [20]:
# Make predictions
predictions = (model.predict(X_test) > 0.5).astype(int)  # Convert probabilities to binary (0/1)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


In [21]:
# Print results
print('Results for Binary Classification Model:')
print('Accuracy:', accuracy_score(Y_test, predictions))
print(classification_report(Y_test, predictions))

Results for Binary Classification Model:
Accuracy: 1.0
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        67
         1.0       1.00      1.00      1.00       144

    accuracy                           1.00       211
   macro avg       1.00      1.00      1.00       211
weighted avg       1.00      1.00      1.00       211

