In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pickle

# Try to import XGBoost (optional)
try:
    from xgboost import XGBClassifier
    xgb_installed = True
except ImportError:
    xgb_installed = False
    print("XGBoost not installed. Skipping XGBoost model.")

# Load the data
data = pd.read_csv(r'Autism_Adolescent_Preprocessed.csv')

# Drop missing values
data.dropna(inplace=True)

# Extract features and target
data_raw = data['Class']
features_raw = data[['age', 'gender', 'ethnicity', 'jundice', 'autism', 'relation',
                     'contry_of_res', 'A1_Score', 'A2_Score', 'A3_Score', 'A4_Score',
                     'A5_Score', 'A6_Score', 'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score', 'result']]

# Apply both MinMaxScaler and StandardScaler
minmax_scaler = MinMaxScaler()
standard_scaler = StandardScaler()

num_features = ['age', 'result']
features_transformed = pd.DataFrame(data=features_raw)

# Apply MinMaxScaler
features_transformed[num_features] = minmax_scaler.fit_transform(features_raw[num_features])

# Apply StandardScaler
features_transformed[num_features] = standard_scaler.fit_transform(features_transformed[num_features])

# One-Hot Encoding
features_final = pd.get_dummies(features_transformed)

# Encode target variable
data_classes = data_raw.apply(lambda x: 1 if x == 'YES' else 0)

# Split data
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(features_final, data_classes, test_size=0.2, random_state=1)

# Define classification models
models = {
    "Logistic Regression": LogisticRegression(max_iter=200, random_state=1),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=1),
    "SVM": SVC(kernel='linear', probability=True, random_state=1),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, random_state=1),
    "KNN": KNeighborsClassifier(n_neighbors=5)
}

if xgb_installed:
    models["XGBoost"] = XGBClassifier(n_estimators=100, random_state=1, use_label_encoder=False, eval_metric='logloss')

# Train and evaluate models
best_model = None
best_accuracy = 0.0
best_model_name = ""

for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.4f}")

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model
        best_model_name = name

# Save the best model
if best_model:
    with open('best_adolescents_model.pkl', 'wb') as f:
        pickle.dump(best_model, f)

# Save the scalers
with open('minmax_scaler(adolescents).pkl', 'wb') as f:
    pickle.dump(minmax_scaler, f)

with open('standard_scaler(adolescents).pkl', 'wb') as f:
    pickle.dump(standard_scaler, f)

print(f"Best Model: {best_model_name} with Accuracy: {best_accuracy:.4f}")
print("Best model and scalers have been saved to pickle files.")


Training Logistic Regression...
Logistic Regression Accuracy: 0.9524
Training Random Forest...
Random Forest Accuracy: 1.0000
Training SVM...
SVM Accuracy: 0.9048
Training Gradient Boosting...
Gradient Boosting Accuracy: 1.0000
Training KNN...
KNN Accuracy: 0.9524
Training XGBoost...
XGBoost Accuracy: 1.0000
Best Model: Random Forest with Accuracy: 1.0000
Best model and scalers have been saved to pickle files.


Parameters: { "use_label_encoder" } are not used.



In [1]:
import pickle
import pandas as pd

# Load the trained model from the pickle file
with open(r'best_adolescents_model.pkl', 'rb') as f:
    best_model = pickle.load(f)

# Load the scalers used for preprocessing
with open(r'minmax_scaler(adolescents).pkl', 'rb') as f:
    minmax_scaler = pickle.load(f)

with open(r'standard_scaler(adolescents).pkl', 'rb') as f:
    standard_scaler = pickle.load(f)

# Load the Autism_Adolescent_Data.xlsx file to get the unique values for string labels
data = pd.read_csv(r'Autism_Adolescent_Preprocessed.csv')

# Get unique values for categorical features
unique_genders = data['gender'].unique()
unique_ethnicities = data['ethnicity'].unique()
unique_jundice = data['jundice'].unique()
unique_austim = data['autism'].unique()
unique_countries = data['contry_of_res'].unique()
unique_relations = data['relation'].unique()

# Define the feature columns
feature_columns = ['age', 'gender', 'ethnicity', 'jundice', 'autism', 'contry_of_res', 'result',
                   'relation', 'A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score', 'A6_Score',
                   'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score']

# Preprocess the training data to get the reference columns
data.dropna(inplace=True)
data_raw = data['Class']
features_raw = data[['age', 'gender', 'ethnicity', 'jundice', 'autism', 'relation',
                     'contry_of_res', 'A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score',
                     'A6_Score', 'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score', 'result']]

features_transform = pd.DataFrame(data=features_raw)

# Apply both MinMaxScaler and StandardScaler
features_transform[['age', 'result']] = minmax_scaler.transform(features_raw[['age', 'result']])
features_transform[['age', 'result']] = standard_scaler.transform(features_transform[['age', 'result']])

features_final = pd.get_dummies(features_transform)
reference_columns = features_final.columns

# Function to preprocess user input
def preprocess_input(user_input):
    # Convert user input to DataFrame
    input_df = pd.DataFrame([user_input], columns=feature_columns)
    
    # Apply the same preprocessing steps as in the training script
    input_df[['age', 'result']] = minmax_scaler.transform(input_df[['age', 'result']])
    input_df[['age', 'result']] = standard_scaler.transform(input_df[['age', 'result']])
    
    input_df = pd.get_dummies(input_df)
    
    # Ensure all columns are present
    for col in reference_columns:
        if col not in input_df.columns:
            input_df[col] = 0
    
    # Reorder columns to match the training data
    input_df = input_df[reference_columns]
    
    return input_df

# Function to get user input and make prediction
def get_user_input_and_predict():
    # Get user input
    user_input = {
        'age': float(input("Enter age (e.g., 12-16): ")),
        'gender': input(f"Enter gender ({'/'.join(unique_genders)}): ").strip().upper(),
        'ethnicity': input(f"Enter ethnicity ({'/'.join(unique_ethnicities)}): ").strip(),
        'jundice': input(f"Had jaundice ({'/'.join(unique_jundice)}): ").strip().lower(),
        'autism': input(f"Family member with autism ({'/'.join(unique_austim)}): ").strip().lower(),
        'contry_of_res': input(f"Enter country of residence ({'/'.join(unique_countries)}): ").strip(),
        'result': float(input("Enter result (e.g., 10.0): ")),
        'relation': input(f"Enter relation ({'/'.join(unique_relations)}): ").strip(),
        'A1_Score': int(input("Enter A1_Score (0 or 1): ")),
        'A2_Score': int(input("Enter A2_Score (0 or 1): ")),
        'A3_Score': int(input("Enter A3_Score (0 or 1): ")),
        'A4_Score': int(input("Enter A4_Score (0 or 1): ")),
        'A5_Score': int(input("Enter A5_Score (0 or 1): ")),
        'A6_Score': int(input("Enter A6_Score (0 or 1): ")),
        'A7_Score': int(input("Enter A7_Score (0 or 1): ")),
        'A8_Score': int(input("Enter A8_Score (0 or 1): ")),
        'A9_Score': int(input("Enter A9_Score (0 or 1): ")),
        'A10_Score': int(input("Enter A10_Score (0 or 1): "))
    }
    
    # Preprocess the input
    input_df = preprocess_input(user_input)
    
    # Make prediction
    prediction = best_model.predict(input_df)
    
    # Print the prediction
    if prediction[0] == 1:
        print("The model predicts that the individual has ASD.")
    else:
        print("The model predicts that the individual does not have ASD.")

# Call the function to get user input and make prediction
get_user_input_and_predict()


ValueError: invalid literal for int() with base 10: ''