# Training the model


In [12]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import joblib
# Load the dataset
data = pd.read_csv('csv_result-Autism-Child-Data.csv')
# Preprocessing: Handle missing data
data = data.replace('?', None)
data = data.dropna()
# Initialize label encoders
le_gender = LabelEncoder()
le_ethnicity = LabelEncoder()
le_jundice = LabelEncoder()
le_austim = LabelEncoder()
le_used_app_before = LabelEncoder()
le_relation = LabelEncoder()
le_class = LabelEncoder()
# Encode categorical columns
data['gender'] = le_gender.fit_transform(data['gender'])
data['ethnicity'] = le_ethnicity.fit_transform(data['ethnicity'])
data['jundice'] = le_jundice.fit_transform(data['jundice'])
data['austim'] = le_austim.fit_transform(data['austim'])
data['used_app_before'] = le_used_app_before.fit_transform(data['used_app_before'])
data['relation'] = le_relation.fit_transform(data['relation'])
data['Class/ASD'] = le_class.fit_transform(data['Class/ASD'])
# Features and target
X = data.drop(['id', 'Class/ASD', 'result', 'age_desc', 'contry_of_res'], axis=1)
y = data['Class/ASD']
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train a Random Forest Classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
# Save the trained model and scaler
joblib.dump(clf, 'autism_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(le_gender, 'le_gender.pkl')
joblib.dump(le_ethnicity, 'le_ethnicity.pkl')
joblib.dump(le_jundice, 'le_jundice.pkl')
joblib.dump(le_austim, 'le_austim.pkl')
joblib.dump(le_used_app_before, 'le_used_app_before.pkl')
joblib.dump(le_relation, 'le_relation.pkl')
joblib.dump(le_class, 'le_class.pkl')
# Evaluate the model
from sklearn.metrics import accuracy_score, classification_report
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(f"Classification Report: \n{classification_report(y_test, y_pred)}")

Accuracy: 0.92
Classification Report: 
              precision    recall  f1-score   support

           0       1.00      0.87      0.93        30
           1       0.83      1.00      0.91        20

    accuracy                           0.92        50
   macro avg       0.92      0.93      0.92        50
weighted avg       0.93      0.92      0.92        50

