In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Generate a synthetic dataset
def generate_dataset(num_samples=1000, num_questions=17):
    np.random.seed(42)  # Ensure reproducibility
    # Simulate responses (0='never', 1='rarely', 2='sometimes', 3='often')
    data = np.random.randint(0, 4, size=(num_samples, num_questions))
    # Simulate a simple rule for labels: higher scores indicate higher probability of ASD
    labels = np.sum(data, axis=1) > (num_questions * 2)  # Adjust this threshold as needed
    return data, labels

# Prepare dataset
X, y = generate_dataset()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train_scaled, y_train)

# Evaluate the model
predictions = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, predictions)
print(f"Model accuracy: {accuracy:.2f}")
print(classification_report(y_test, predictions))


Model accuracy: 0.99
              precision    recall  f1-score   support

       False       0.99      1.00      1.00       197
        True       1.00      0.67      0.80         3

    accuracy                           0.99       200
   macro avg       1.00      0.83      0.90       200
weighted avg       1.00      0.99      0.99       200

