### Starting The End

In [1]:
import pandas as pd
import itertools
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
import joblib


In [2]:
import pandas as pd

def load_and_select(filepath: str, cols: list, id_col: str = "child_id") -> pd.DataFrame:
    """Read a CSV and keep only the needed columns, properly handling boolean and numeric fields."""
    
    # Read CSV
    df = pd.read_csv(filepath, dtype=str)  # read everything as string first
    
    # Strip whitespace
    df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)


    
    # Keep only selected columns
    df = df[cols].copy()
    
    # Ensure child_id is clean
    df[id_col] = df[id_col].astype(str).str.strip()


    
    # Convert TRUE/FALSE strings to boolean
    for col in ["isasd"]:
        if col in df.columns:
            df[col] = df[col].str.upper().map({"TRUE": True, "FALSE": False})

    
    # Convert numeric columns if they exist
    numeric_cols = [c for c in df.columns if "score" in c or c == "average_score"]
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors="coerce")


    
    return df


In [3]:
game_columns = {
    "dance_doodle_game": [
        "child_id", "isasd", "age", "cool_arms", "crossy_play", "open_wings",
          "shh_fun", "silly_boxer", "happy_stand", "stretch"
    ],
    "gaze_game": [
        "child_id", "isasd", "age", "round1count", "round2count", "round3count"
    ],
    "gesture_game": [
        "child_id", "isasd", "age", "butterfly", "closed_fist", "dua", "heart",
          "open_palm", "pointing_up", "spectacle", "thumbs_down", "thumbs_up", 
          "victory", "iloveyou"
    ],
    "mirror_posture_game": [
        "child_id", "isasd", "age", "kiss", "mouth_open", "showing_teeth", "looking_sideways"
    ],
    "repeat_with_me_game": [
        "child_id", "isasd", "age", "average_score", "round1score", "round2score", 
        "round3score", "round4score", "round5score", "round6score", "round7score", 
        "round8score", "round9score", "round10score", "round11score", "round12score"
    ]
}


In [5]:
# Example filepaths - update with your actual CSV file paths
filepaths = {
    "dance_doodle_game": "C:/everything of neuronurture/NeuroNurture/ALI_Model/dataset/dance_doodle_game.csv",
    "gaze_game": "C:/everything of neuronurture/NeuroNurture/ALI_Model/dataset/gaze_game.csv",
    "gesture_game": "C:/everything of neuronurture/NeuroNurture/ALI_Model/dataset/gesture_game.csv",
    "mirror_posture_game": "C:/everything of neuronurture/NeuroNurture/ALI_Model/dataset/mirror_posture_game.csv",
    "repeat_with_me_game": "C:/everything of neuronurture/NeuroNurture/ALI_Model/dataset/repeat_with_me_game.csv"
}

game_dfs = {
    game: load_and_select(path, game_columns[game])
    for game, path in filepaths.items()
}


In [6]:


import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Example game list in lexicographic order
games = ["dance_doodle_game", "gaze_game", "gesture_game", "mirror_posture_game", "repeat_with_me_game"]

results = []

for i in range(1, 2**len(games)):
    # Convert i to 5-bit binary string
    bitmask = format(i, f"0{len(games)}b")[::-1]  # 00101

    # Determine which games to include based on bitmask
    subset = [games[j] for j in range(len(games)) if bitmask[j] == "1"]

    subset_name = "_".join(subset)
    print(f"\n{'='*60}")
    print(f"Training model on: {subset_name} (bitmask {bitmask})")
    print(f"{'='*60}")

    # Start merging game data based on child_id
    merged = None
    for game in subset:
        df = game_dfs[game].copy()

        if merged is None or merged.empty:
            merged = df
        else:
            cols_to_merge = [c for c in df.columns if c not in merged.columns]
            merged = pd.merge(merged, df[["child_id"] + cols_to_merge], on="child_id", how="inner")
    
    if merged is None or merged.empty:
        continue

    X = merged.drop(columns=["child_id", "isasd"])
    y = merged["isasd"].astype(int)

    # Train-test split (e.g., 80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    # Model
    model = LogisticRegression(max_iter=1000)

    # Train the model
    model.fit(X_train, y_train)

    # Predict on test set
    y_pred = model.predict(X_test)
    
    # Get prediction probabilities
    y_pred_proba = model.predict_proba(X_test)

    # Compute accuracy
    acc = accuracy_score(y_test, y_pred)

    # Create detailed results DataFrame
    test_results = pd.DataFrame({
        'y_test': y_test.values,
        'y_pred': y_pred,
        'prob_class_0': y_pred_proba[:, 0],
        'prob_class_1': y_pred_proba[:, 1],
        'correct': (y_test.values == y_pred)
    })
    
    # Get child_ids from the original merged data
    test_indices = X_test.index
    test_child_ids = merged.iloc[test_indices]['child_id'].values
    test_results['child_id'] = test_child_ids

    print(f"\nTest Set Results (Accuracy: {acc:.3f}):")
    print("-" * 80)
    print(f"{'Child_ID':<10} {'y_test':<8} {'y_pred':<8} {'Prob_0':<8} {'Prob_1':<8} {'Correct':<8}")
    print("-" * 80)
    
    for _, row in test_results.iterrows():
        print(f"{row['child_id']:<10} {row['y_test']:<8} {row['y_pred']:<8} {row['prob_class_0']:<8.3f} {row['prob_class_1']:<8.3f} {row['correct']:<8}")
    
    print(f"\nSummary:")
    print(f"- Total test samples: {len(y_test)}")
    print(f"- Correct predictions: {test_results['correct'].sum()}")
    print(f"- Incorrect predictions: {(~test_results['correct']).sum()}")
    print(f"- Average probability for class 0: {test_results['prob_class_0'].mean():.3f}")
    print(f"- Average probability for class 1: {test_results['prob_class_1'].mean():.3f}")

    # Store the model and accuracy
    results.append({
        "subset": i,
        "bitmask": bitmask,
        "accuracy": acc,
        "model": model,
        "test_results": test_results
    })
    print(f"\nAccuracy for {i}: {acc:.3f}")

import joblib
joblib.dump(results, "all_game_models.pkl")


Training model on: dance_doodle_game (bitmask 10000)

Test Set Results (Accuracy: 0.650):
--------------------------------------------------------------------------------
Child_ID   y_test   y_pred   Prob_0   Prob_1   Correct 
--------------------------------------------------------------------------------
C815       1        1        0.092    0.908    1       
C588       0        0        0.582    0.418    1       
C436       0        0        0.681    0.319    1       
C110       0        1        0.364    0.636    0       
C229       1        0        0.850    0.150    0       
C965       1        0        0.592    0.408    0       
C373       1        1        0.250    0.750    1       
C793       1        0        0.738    0.262    0       
C176       0        1        0.360    0.640    0       
C044       0        0        0.724    0.276    1       
C709       1        1        0.163    0.837    1       
C179       1        1        0.235    0.765    1       
C751       1       

['all_game_models.pkl']

In [24]:
print(results)

[{'subset': 1, 'bitmask': '10000', 'accuracy': 1.0, 'model': LogisticRegression(max_iter=1000)}, {'subset': 2, 'bitmask': '01000', 'accuracy': 1.0, 'model': LogisticRegression(max_iter=1000)}, {'subset': 3, 'bitmask': '11000', 'accuracy': 1.0, 'model': LogisticRegression(max_iter=1000)}, {'subset': 4, 'bitmask': '00100', 'accuracy': 1.0, 'model': LogisticRegression(max_iter=1000)}, {'subset': 5, 'bitmask': '10100', 'accuracy': 1.0, 'model': LogisticRegression(max_iter=1000)}, {'subset': 6, 'bitmask': '01100', 'accuracy': 1.0, 'model': LogisticRegression(max_iter=1000)}, {'subset': 7, 'bitmask': '11100', 'accuracy': 1.0, 'model': LogisticRegression(max_iter=1000)}, {'subset': 8, 'bitmask': '00010', 'accuracy': 1.0, 'model': LogisticRegression(max_iter=1000)}, {'subset': 9, 'bitmask': '10010', 'accuracy': 1.0, 'model': LogisticRegression(max_iter=1000)}, {'subset': 10, 'bitmask': '01010', 'accuracy': 1.0, 'model': LogisticRegression(max_iter=1000)}, {'subset': 11, 'bitmask': '11010', 'ac

In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Example game list in lexicographic order
games = ["dance_doodle_game", "gaze_game", "gesture_game", "mirror_posture_game", "repeat_with_me_game"]

results = []

for i in range(1, 2**len(games)):
    # Convert i to 5-bit binary string
    bitmask = format(i, f"0{len(games)}b")[::-1]  # 00101

    # Determine which games to include based on bitmask
    subset = [games[j] for j in range(len(games)) if bitmask[j] == "1"]

    subset_name = "_".join(subset)
    print(f"Training model on: {subset_name} (bitmask {bitmask})")

    # Start merging game data based on child_id

    merged = None
    for game in subset:
        df = game_dfs[game].copy()

        if merged is None or merged.empty:
            merged = df
        else:
            cols_to_merge = [c for c in df.columns if c not in merged.columns]
            merged = pd.merge(merged, df[["child_id"] + cols_to_merge], on="child_id", how="inner")
    

    if merged is None or merged.empty:
        continue

    
    X = merged.drop(columns=["child_id", "isasd"])
    y = merged["isasd"].astype(int)

    # Train-test split (e.g., 80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    # Model
    model = LogisticRegression(penalty="l2", C=0.1, solver="lbfgs", max_iter=1000)


    # Train the model
    model.fit(X_train, y_train)

    # Predict on test set
    y_pred = model.predict(X_test)

    # Compute accuracy
    acc = accuracy_score(y_test, y_pred)

    # Store the model and accuracy
    results.append({
        "subset": i,
        "bitmask": bitmask,
        "accuracy": acc,
        "model": model
    })
    print(f"Accuracy for {i}: {acc}")
    
import joblib
joblib.dump(results, "all_game_models.pkl")
