## **Step 1: Import Libraries**
Import necessary libraries like:

pandas for handling data,

numpy for numerical operations,

scikit-learn for preprocessing and evaluation.

In [38]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score

# Step 2: Load and Clean the Dataset
Load the CSV file: autism_screening.csv

In [39]:
df = pd.read_csv("autism_screening.csv")

In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 609 entries, 0 to 703
Data columns (total 19 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   A1_Score         609 non-null    int64  
 1   A2_Score         609 non-null    int64  
 2   A3_Score         609 non-null    int64  
 3   A4_Score         609 non-null    int64  
 4   A5_Score         609 non-null    int64  
 5   A6_Score         609 non-null    int64  
 6   A7_Score         609 non-null    int64  
 7   A8_Score         609 non-null    int64  
 8   A9_Score         609 non-null    int64  
 9   A10_Score        609 non-null    int64  
 10  age              609 non-null    float64
 11  gender           609 non-null    int64  
 12  ethnicity        609 non-null    int64  
 13  jundice          609 non-null    int64  
 14  austim           609 non-null    int64  
 15  used_app_before  609 non-null    int64  
 16  result           609 non-null    float64
 17  relation         609 


**Remove irrelevant columns: 'age_desc' and 'contry_of_res'**

**Replace '?' with NaN and drop rows with missing values to clean the dataset.**

In [40]:
# Drop irrelevant columns
df = df.drop(['age_desc', 'contry_of_res'], axis=1)

# Replace '?' with NaN and drop missing values
df.replace('?', np.nan, inplace=True)
df.dropna(inplace=True)

# **Step 3: Encode Categorical Features**
All categorical columns are label-encoded using LabelEncoder, converting text into numerical values (like Yes → 1, No → 0).

#  Step 4: Define Features and Target
X: All features (input variables)

y: Target column 'Class/ASD' (whether the person has ASD or not)

In [41]:
# Encode all categorical columns
label_encoders = {}
for column in df.columns:
    if df[column].dtype == 'object':
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le

# Features and target
X = df.drop('Class/ASD', axis=1).values
y = df['Class/ASD'].values.reshape(-1, 1)  # Reshape y for MLP

# Step 5: Split the Data
We split the dataset into training (80%) and testing (20%) using train_test_split.

In [42]:
# Step 6: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Standardize the Data
StandardScaler is used to bring all features to a similar scale (mean = 0, std = 1), which helps in faster training of neural networks.

In [43]:
# Step 7: Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 7: Define Activation Function
We use the sigmoid function for both hidden and output layers:

In [44]:
# Step 8: Activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Step 8: Create the MLP Class
This class handles:

Initialization of weights & biases

Forward pass (input → hidden → output)

Training using backpropagation

Prediction

It’s a simple 2-layer neural network:

Input layer

1 Hidden layer with 5 neurons

Output layer with 1 neuron (for binary classification)



In [45]:
# Step 9: MLP class
class MLP:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.weights1 = np.random.randn(input_dim, hidden_dim)
        self.bias1 = np.zeros((1, hidden_dim))
        self.weights2 = np.random.randn(hidden_dim, output_dim)
        self.bias2 = np.zeros((1, output_dim))

    def forward(self, X):
        self.z1 = X.dot(self.weights1) + self.bias1
        self.a1 = sigmoid(self.z1)
        self.z2 = self.a1.dot(self.weights2) + self.bias2
        self.a2 = sigmoid(self.z2)
        return self.a2

    def train(self, X, y, learning_rate=0.01, epochs=100):
        for epoch in range(epochs):
            # Forward pass
            output = self.forward(X)

            # Compute error
            error = output - y

            # Backpropagation
            d_output = error * sigmoid_derivative(output)
            d_hidden = d_output.dot(self.weights2.T) * sigmoid_derivative(self.a1)

            # Update weights and biases
            self.weights2 -= self.a1.T.dot(d_output) * learning_rate
            self.bias2 -= np.sum(d_output, axis=0, keepdims=True) * learning_rate
            self.weights1 -= X.T.dot(d_hidden) * learning_rate
            self.bias1 -= np.sum(d_hidden, axis=0, keepdims=True) * learning_rate

    def predict(self, X):
        output = self.forward(X)
        return (output > 0.5).astype(int)

# Step 9: Train the MLP
The model is trained using gradient descent for 300 epochs with a learning rate of 0.1.

# Step 10: Evaluate the Model
Predictions are made on the test set.

Accuracy is calculated using accuracy_score.

In [57]:
# Step 10: Train and evaluate
input_dim = X_train.shape[1]
hidden_dim = 5
output_dim = 1

mlp_model = MLP(input_dim, hidden_dim, output_dim)
mlp_model.train(X_train, y_train, learning_rate=0.1, epochs=300)

y_pred = mlp_model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(" MLP Model Accuracy:", acc)

 MLP Model Accuracy: 0.9918032786885246
