Load Data:

In [4]:
import numpy as np
import pandas as pd

data = pd.read_csv('./autism/Toddler Autism dataset July 2018.csv')

selected_columns = ['A1', 'A2', 'A3', 'A4', 'Age_Mons', 'Qchat-10-Score' , 'Family_mem_with_ASD', 'Class/ASD Traits ']
df = data[selected_columns]


print(df.head())


   A1  A2  A3  A4  Age_Mons  Qchat-10-Score Family_mem_with_ASD  \
0   0   0   0   0        28               3                  no   
1   1   1   0   0        36               4                  no   
2   1   0   0   0        36               4                  no   
3   1   1   1   1        24              10                  no   
4   1   1   0   1        20               9                 yes   

  Class/ASD Traits   
0                No  
1               Yes  
2               Yes  
3               Yes  
4               Yes  


In [8]:
# Encode categorical variables
data['Family_mem_with_ASD'] = data['Family_mem_with_ASD'].map({'yes': 1, 'no': 0})
data['Class/ASD Traits '] = data['Class/ASD Traits '].map({'Yes': 1, 'No': 0})

# Normalize numerical columns
numerical_cols = ['Age_Mons', 'Qchat-10-Score']
data[numerical_cols] = data[numerical_cols].apply(pd.to_numeric, errors='coerce')
data[numerical_cols] = (data[numerical_cols] - data[numerical_cols].mean()) / data[numerical_cols].std()

# Select features and labels
X = df[selected_columns[:-1]].apply(pd.to_numeric, errors='coerce').fillna(0).values
y = df[selected_columns[-1]].map({'Yes': 1, 'No': 0}).values.reshape(-1, 1)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def log_loss(y, y_hat):
    m = len(y)
    return - (1/m) * np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))

def logistic_regression(X, y, learning_rate=0.01, epochs=1000, regularization=0.01):
    m , n = X.shape
    W = np.zeros((n, 1))  
    b = 0                 
    for epoch in range(epochs):
        z = np.dot(X, W) + b
        y_hat = sigmoid(z)
         # Compute loss with L2 regularization
        loss = log_loss(y, y_hat) + (regularization / (2 * m)) * np.sum(W**2)

        dW = (1/m) * np.dot(X.T, (y_hat - y)) + (regularization / m) * W
        db = (1/m) * np.sum(y_hat - y)
        W = W - learning_rate * dW
        b = b - learning_rate * db

        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss}")
    return W, b

# Train logistic regression model
W, b = logistic_regression(X, y)


# Predict new data
def predict(X, W, b):
    z = np.dot(X, W) + b
    y_hat = sigmoid(z)
    return (y_hat >= 0.5).astype(int)

Epoch 0, Loss: 0.6931471805599453
Epoch 100, Loss: 0.2796471806009027
Epoch 200, Loss: 0.23079770533613805
Epoch 300, Loss: 0.20965841168715504
Epoch 400, Loss: 0.19739856255147875
Epoch 500, Loss: 0.18921122283313702
Epoch 600, Loss: 0.18326014775904606
Epoch 700, Loss: 0.17867956158062454
Epoch 800, Loss: 0.17500378313596976
Epoch 900, Loss: 0.1719585089682983


In [9]:
X_new = np.array([[1, 1, 0, 1, 24, 7, 0]])  # New sample
X_new[:, 4:6] = (X_new[:, 4:6] - data[numerical_cols].mean().values) / data[numerical_cols].std().values  # Normalize
prediction = predict(X_new, W, b)
print("Prediction for new data:", "Autistic" if prediction[0] == 1 else "Not Autistic")

Prediction for new data: Autistic
