**IMPORTING LIBRARIES**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

**DATA PREPROCESSING**

In [None]:
df= pd.read_csv("/New_data.csv")

df.replace('?', pd.NA, inplace=True)

# List of categorical columns for which you want to impute missing values
categorical_cols = ['ethnicity', 'relation']

# Impute missing values with the mode of each column
for col in categorical_cols:
    mode_val = df[col].mode().iloc[0]  # mode() returns a Series, so use iloc[0] to get the first value
    df[col].fillna(mode_val, inplace=True)


numerical_cols = ['age']  # Add all your numerical columns here
df[numerical_cols] = df[numerical_cols].apply(pd.to_numeric, errors='coerce')

# Replace missing values with the mean for each numerical column
for col in numerical_cols:
    mean_val = df[col].mean()
    df[col].fillna(mean_val, inplace=True)

columns_to_replace = ['jundice', 'Class/ASD','gender','relation']
df[columns_to_replace] = df[columns_to_replace].replace({'Self':1,'self':1,'Parent':1,'Healthcare':1,'Health care professional':1,'Health Care Professional':1,'Others':0,'Relative':0,'family member':0,'f':0, 'm':1, 'YES': 1, 'NO': 0, 'yes':1,'no':0,'Yes':1,'No':0})

#df=df.drop('ethnicity',axis=1)

df = pd.get_dummies(df, columns=['ethnicity'], prefix='ethnicity', drop_first=True)
print(df)


      A1_Score  A2_Score  A3_Score  A4_Score  A5_Score  A6_Score  A7_Score  \
0            1         1         1         1         0         0         1   
1            1         1         0         1         0         0         0   
2            1         1         0         1         1         0         1   
3            1         1         0         1         0         0         1   
4            1         0         0         0         0         0         0   
...        ...       ...       ...       ...       ...       ...       ...   
2149         0         0         0         0         0         0         0   
2150         0         0         1         1         1         0         1   
2151         1         0         1         1         1         1         1   
2152         1         0         0         0         0         0         0   
2153         1         1         0         0         1         1         0   

      A8_Score  A9_Score  A10_Score  ...  ethnicity_South Asian

**DATA STANDARDIZATION**

In [None]:
X = df.drop(columns = 'Class/ASD', axis=1)
Y = df['Class/ASD']
scaler = StandardScaler()
scaler.fit(X)
standardized_data = scaler.transform(X)
print(standardized_data)

[[ 0.76179885  1.07024218  1.04463493 ... -0.06105625 -0.02155152
  -0.16927284]
 [ 0.76179885  1.07024218 -0.95727222 ... -0.06105625 -0.02155152
  -0.16927284]
 [ 0.76179885  1.07024218 -0.95727222 ... -0.06105625 -0.02155152
  -0.16927284]
 ...
 [ 0.76179885 -0.93436796  1.04463493 ... -0.06105625 -0.02155152
  -0.16927284]
 [ 0.76179885 -0.93436796 -0.95727222 ... -0.06105625 -0.02155152
  -0.16927284]
 [ 0.76179885  1.07024218 -0.95727222 ... -0.06105625 -0.02155152
  -0.16927284]]


**TRAINING ADABOOST MODEL**

In [None]:
X = standardized_data
Y = df['Class/ASD']
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)
print(X.shape, X_train.shape, X_test.shape)
ada_clf = AdaBoostClassifier(n_estimators=50, random_state=42)

# Fit the model on the training data
ada_clf.fit(X_train, Y_train)

(2154, 32) (1723, 32) (431, 32)


**MODEL EVALUATION**

In [None]:
# Accuracy on training set
X_pred = ada_clf.predict(X_train)
training_data_accuracy = accuracy_score(X_pred, Y_train)
print('Accuracy of training data : ', training_data_accuracy)

# Accuracy on test set
Y_pred = ada_clf.predict(X_test)
testing_data_accuracy = accuracy_score(Y_test, Y_pred)
print('Accuracy of testing data: ',testing_data_accuracy)

Accuracy of training data :  1.0
Accuracy of testing data:  1.0


**MAKING PREDICTIONS**

In [None]:
def preprocess_input(input_data):
    replacement_mapping = {
        'Self': 1, 'Parent': 1, 'Healthcare Professional': 1,
        'Others': 0, 'Relative': 0, 'Female': 0,
        'Male': 1, 'YES': 1, 'NO': 0, 'yes': 1, 'no': 0, 'Yes': 1, 'No': 0
    }
    mapped_data = tuple(replacement_mapping.get(value, value) for value in input_data)
    return mapped_data

input_data = (1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 22, 'Female', 'No', 'Self')
preprocessed_input_data = preprocess_input(input_data)

# changing the input_data to numpy array
input_data_as_numpy_array = np.asarray(preprocessed_input_data)

# reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardize the input data
std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction = ada_clf.predict(std_data)
print(prediction)

if (prediction[0] == 0):
  print('The person doesn\'t have Autism spectrum disorder')
else:
  print('The person has Autism spectrum disorder')

[[ 0.76179885  1.07024218  1.04463493  0.95816313 -1.1204898  -1.01779999
   0.88326966  0.93436796 -0.90512803 -1.23619133  0.87560129 -1.30875756
  -0.51605613  1.14940014]]
[0]
The person doesn't have Autism spectrum disorder


