In [12]:
import numpy as np
from collections import Counter

# Paths
X_PATH = r"C:\Users\user\Desktop\GaitLab\GaitLab\src\keypoints_csv\ts_data\X_seq_len64_stride32.npy"
y_PATH = r"C:\Users\user\Desktop\GaitLab\GaitLab\src\keypoints_csv\ts_data\y_seq_len64_stride32.npy"

# Load sequences and labels
X = np.load(X_PATH, allow_pickle=True)
y = np.load(y_PATH, allow_pickle=True)

# Basic checks
print("Loaded X shape:", X.shape)
print("Loaded y shape:", y.shape)
print("NaNs in X:", np.isnan(X).sum())
print("Infs in X:", np.isinf(X).sum())

# Label distribution
print("\nLabel distribution (counts):")
print(Counter(y))

# Unique classes
class_names = np.unique(y)
print("\nUnique labels (classes):", class_names)


Loaded X shape: (2204, 64, 21)
Loaded y shape: (2204,)
NaNs in X: 141056
Infs in X: 0

Label distribution (counts):
Counter({'KOA_Severe': 634, 'KOA_Mild': 506, 'KOA_Early': 336, 'PD_Early': 213, 'Normal': 208, 'PD_Mild': 170, 'PD_Severe': 57, 'NonAssistive': 55, 'Assistive': 25})

Unique labels (classes): ['Assistive' 'KOA_Early' 'KOA_Mild' 'KOA_Severe' 'NonAssistive' 'Normal'
 'PD_Early' 'PD_Mild' 'PD_Severe']


In [13]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# --- Handle NaNs ---
# Impute NaNs with median per feature across all sequences
num_sequences, seq_len, num_features = X.shape
for feature_idx in range(num_features):
    feature_values = X[:, :, feature_idx]
    median_val = np.nanmedian(feature_values)
    # Replace NaNs
    feature_values[np.isnan(feature_values)] = median_val
    X[:, :, feature_idx] = feature_values

print("✅ NaNs handled.")

# --- Flatten for scaling ---
X_flat = X.reshape(num_sequences * seq_len, num_features)

# --- Scale features ---
scaler = StandardScaler()
X_flat_scaled = scaler.fit_transform(X_flat)

# --- Reshape back ---
X_scaled = X_flat_scaled.reshape(num_sequences, seq_len, num_features)

# --- Train/Val/Test Split ---
X_train, X_temp, y_train, y_temp = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print("Train shape:", X_train.shape, "Val shape:", X_val.shape, "Test shape:", X_test.shape)


✅ NaNs handled.
Train shape: (1763, 64, 21) Val shape: (220, 64, 21) Test shape: (221, 64, 21)


  median_val = np.nanmedian(feature_values)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


In [14]:
import tensorflow as tf
from tensorflow.keras import layers, models

def build_teacher_model(input_shape, num_classes):
    """
    High-capacity TCN teacher model.
    """
    inputs = layers.Input(shape=input_shape, name="input_layer")
    x = inputs
    
    # --- TCN Blocks (6 blocks, wider filters) ---
    for i in range(6):
        x_prev = x
        x = layers.Conv1D(filters=128, kernel_size=3, padding='causal', activation=None, name=f'conv1d_teacher_{i}')(x)
        x = layers.BatchNormalization(name=f'bn_teacher_{i}')(x)
        x = layers.Activation('relu', name=f'act_teacher_{i}')(x)
        x = layers.Dropout(0.2, name=f'drop_teacher_{i}')(x)
        # Residual connection
        if x_prev.shape[-1] == x.shape[-1]:
            x = layers.Add()([x_prev, x])
    
    # Global pooling
    x = layers.GlobalAveragePooling1D(name="gap_teacher")(x)
    
    # Feed-forward for extra capacity
    x = layers.Dense(256, activation='relu', name="ff_teacher")(x)
    
    # Output layer
    outputs = layers.Dense(num_classes, activation='softmax', name="output_teacher")(x)
    
    model = models.Model(inputs=inputs, outputs=outputs, name="teacher_model")
    return model

# --- Build & compile ---
input_shape = (X_train.shape[1], X_train.shape[2])
num_classes = len(np.unique(y_train))

teacher = build_teacher_model(input_shape, num_classes)
teacher.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

teacher.summary()


In [17]:
from sklearn.preprocessing import LabelEncoder
import numpy as np

# --- Encode class labels to integers ---
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_val_enc = le.transform(y_val)
y_test_enc = le.transform(y_test)

# Optional: save class names
class_names = le.classes_
print("Class names:", class_names)


Class names: ['Assistive' 'KOA_Early' 'KOA_Mild' 'KOA_Severe' 'NonAssistive' 'Normal'
 'PD_Early' 'PD_Mild' 'PD_Severe']


In [19]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# --- Callbacks ---
early_stop = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
checkpoint = ModelCheckpoint('best_teacher_model.keras', monitor='val_loss', save_best_only=True)

# --- Train teacher model ---
history_teacher = teacher.fit(
    X_train, y_train_enc,
    validation_data=(X_val, y_val_enc),
    epochs=50,           # increase later if needed
    batch_size=16,       # CPU-friendly
    callbacks=[early_stop, checkpoint],
    verbose=2
)

# --- Evaluate on test set ---
test_loss, test_acc = teacher.evaluate(X_test, y_test_enc, verbose=0)
print("Teacher Test Accuracy: {:.2f}%".format(test_acc * 100))

# --- Predictions & classification report ---
y_pred = np.argmax(teacher.predict(X_test), axis=1)
print(classification_report(y_test_enc, y_pred, target_names=class_names))


Epoch 1/50
111/111 - 15s - 133ms/step - accuracy: 0.2836 - loss: 2.1942 - val_accuracy: 0.2864 - val_loss: 2.1913
Epoch 2/50
111/111 - 5s - 41ms/step - accuracy: 0.2876 - loss: 2.1883 - val_accuracy: 0.2864 - val_loss: 2.1856
Epoch 3/50
111/111 - 5s - 44ms/step - accuracy: 0.2876 - loss: 2.1825 - val_accuracy: 0.2864 - val_loss: 2.1799
Epoch 4/50
111/111 - 4s - 39ms/step - accuracy: 0.2876 - loss: 2.1769 - val_accuracy: 0.2864 - val_loss: 2.1745
Epoch 5/50
111/111 - 4s - 38ms/step - accuracy: 0.2876 - loss: 2.1714 - val_accuracy: 0.2864 - val_loss: 2.1691
Epoch 6/50
111/111 - 4s - 37ms/step - accuracy: 0.2876 - loss: 2.1659 - val_accuracy: 0.2864 - val_loss: 2.1638
Epoch 7/50
111/111 - 4s - 40ms/step - accuracy: 0.2876 - loss: 2.1607 - val_accuracy: 0.2864 - val_loss: 2.1587
Epoch 8/50
111/111 - 4s - 36ms/step - accuracy: 0.2876 - loss: 2.1555 - val_accuracy: 0.2864 - val_loss: 2.1537
Epoch 9/50
111/111 - 5s - 49ms/step - accuracy: 0.2876 - loss: 2.1503 - val_accuracy: 0.2864 - val_los

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
