In [1]:
# model_train.ipynb (Training all three models)

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load the dataset
df = pd.read_csv("tod_balanced.csv")

# Fix column name if needed
df.rename(columns={"Class/ASD Traits ": "Class/ASD Traits"}, inplace=True)

# Define features (X) and target (y)
X = df.drop(columns=["Class/ASD Traits"])  # Features
y = df["Class/ASD Traits"]  # Target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_accuracy = accuracy_score(y_test, rf_model.predict(X_test))
joblib.dump(rf_model, "tod_random_forest.pkl")

# Train Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_accuracy = accuracy_score(y_test, dt_model.predict(X_test))
joblib.dump(dt_model, "tod_decision_tree.pkl")

# Train XGBoost
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
xgb_accuracy = accuracy_score(y_test, xgb_model.predict(X_test))
joblib.dump(xgb_model, "tod_xgboost.pkl")

# Print model accuracies
print(f"Random Forest Accuracy: {rf_accuracy:.2f}")
print(f"Decision Tree Accuracy: {dt_accuracy:.2f}")
print(f"XGBoost Accuracy: {xgb_accuracy:.2f}")


# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Load the dataset
df = pd.read_csv("tod_balanced.csv")

# Fix column name if needed
df.rename(columns={"Class/ASD Traits ": "Class/ASD Traits"}, inplace=True)

# Define features (X) and target (y)
X = df.drop(columns=["Class/ASD Traits"])  # Features
y = df["Class/ASD Traits"]  # Target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_accuracy = accuracy_score(y_test, rf_model.predict(X_test))
rf_confusion_matrix = confusion_matrix(y_test, rf_model.predict(X_test))
logging.info(f"Random Forest Accuracy: {rf_accuracy:.2f}")
logging.info(f"Random Forest Confusion Matrix:\n{rf_confusion_matrix}")
joblib.dump(rf_model, "random_forest.pkl")

# Train Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_accuracy = accuracy_score(y_test, dt_model.predict(X_test))
dt_confusion_matrix = confusion_matrix(y_test, dt_model.predict(X_test))
logging.info(f"Decision Tree Accuracy: {dt_accuracy:.2f}")
logging.info(f"Decision Tree Confusion Matrix:\n{dt_confusion_matrix}")
joblib.dump(dt_model, "decision_tree.pkl")

# Train XGBoost
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
xgb_accuracy = accuracy_score(y_test, xgb_model.predict(X_test))
xgb_confusion_matrix = confusion_matrix(y_test, xgb_model.predict(X_test))
logging.info(f"XGBoost Accuracy: {xgb_accuracy:.2f}")
logging.info(f"XGBoost Confusion Matrix:\n{xgb_confusion_matrix}")
joblib.dump(xgb_model, "xgboost.pkl")



Parameters: { "use_label_encoder" } are not used.

2025-03-14 20:33:52,392 - INFO - Random Forest Accuracy: 1.00
2025-03-14 20:33:52,392 - INFO - Random Forest Confusion Matrix:
[[66  0]
 [ 0 65]]


Random Forest Accuracy: 1.00
Decision Tree Accuracy: 1.00
XGBoost Accuracy: 1.00


2025-03-14 20:33:52,425 - INFO - Decision Tree Accuracy: 1.00
2025-03-14 20:33:52,429 - INFO - Decision Tree Confusion Matrix:
[[66  0]
 [ 0 65]]
Parameters: { "use_label_encoder" } are not used.

2025-03-14 20:33:52,517 - INFO - XGBoost Accuracy: 1.00
2025-03-14 20:33:52,517 - INFO - XGBoost Confusion Matrix:
[[66  0]
 [ 0 65]]


['xgboost.pkl']