In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle

In [2]:
data = pd.read_csv('train.csv')
print("Dataset preview:")
print(data.head())
print("Columns:", data.columns.tolist())

Dataset preview:
   ID  A1_Score  A2_Score  A3_Score  A4_Score  A5_Score  A6_Score  A7_Score  \
0   1         1         0         1         0         1         0         1   
1   2         0         0         0         0         0         0         0   
2   3         1         1         1         1         1         1         1   
3   4         0         0         0         0         0         0         0   
4   5         0         0         0         0         0         0         0   

   A8_Score  A9_Score  ...  gender       ethnicity jaundice austim  \
0         0         1  ...       f               ?       no     no   
1         0         0  ...       m               ?       no     no   
2         1         1  ...       m  White-European       no    yes   
3         0         0  ...       f               ?       no     no   
4         0         0  ...       m               ?       no     no   

   contry_of_res used_app_before     result     age_desc  relation Class/ASD  
0       

In [7]:
# Step 2: Preprocessing
# Assuming the target column is named 'diagnosis' (autism label: e.g., 0 or 1, 'Yes' or 'No')
# Replace 'diagnosis' with your actual target column name
target_column = 'austim'


In [8]:
# Separate features and target
X = data.drop(columns=[target_column])
y = data[target_column]

In [9]:
categorical_cols = X.select_dtypes(include=['object']).columns
label_encoders = {}

In [10]:
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

In [11]:
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

In [12]:
if y.dtype == 'object':
    target_encoder = LabelEncoder()
    y = target_encoder.fit_transform(y)
else:
    target_encoder = None

In [None]:
#Training the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [14]:
# Use RandomForest as the model (you can swap with another, e.g., LogisticRegression, SVM)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [15]:
# Evaluate the model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.81875
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.97      0.90       128
           1       0.64      0.22      0.33        32

    accuracy                           0.82       160
   macro avg       0.73      0.59      0.61       160
weighted avg       0.79      0.82      0.78       160



In [16]:
# Save label encoders
for col, encoder in label_encoders.items():
    with open(f'{col}_label_encoder.pkl', 'wb') as f:
        pickle.dump(encoder, f)


In [17]:
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)


In [18]:
if target_encoder:
    with open('target_encoder.pkl', 'wb') as f:
        pickle.dump(target_encoder, f)

In [19]:
with open('autism_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [20]:
print("Model and encoders saved successfully!")


Model and encoders saved successfully!


In [32]:
def predict_autism(new_data):
    # new_data should be a DataFrame with the same structure as X
    loaded_encoders = {}
    for col in categorical_cols:
        with open(f'{col}_label_encoder.pkl', 'rb') as f:
            loaded_encoders[col] = pickle.load(f)
        new_data[col] = loaded_encoders[col].transform(new_data[col])
    new_data[numerical_cols] = loaded_scaler.transform(new_data[numerical_cols])
    with open('autism_model.pkl', 'rb') as f:
        loaded_model = pickle.load(f)
    
    prediction = loaded_model.predict(new_data)
    
    if target_encoder:
        with open('target_encoder.pkl', 'rb') as f:
            loaded_target_encoder = pickle.load(f)
        prediction = loaded_target_encoder.inverse_transform(prediction)
    
    return prediction
    