In [1]:
# Re-import after execution state reset
import pandas as pd

# Reload the dataset
file_path = r"D:\Data\NYC\KINZ\KINECT_ACC_dataset_with_qor15_2025-05-27_14-29PM.csv"
df = pd.read_csv(file_path)

# Extract relevant columns
joint_columns = [col for col in df.columns if any(j in col for j in ['_X', '_Y', '_Z'])]
other_features = ['t_uniform', 'footfall_event_times', 'accel_energy_total', 'QoR_class']

# Check presence
columns_present = {
    "Joint Columns Count": len(joint_columns),
    "Other Columns Present": [col for col in other_features if col in df.columns]
}

# Summarize
summary = {
    "Shape": df.shape,
    "Missing Values": df.isnull().sum().sum(),
    "QoR_class Unique": df['QoR_class'].unique() if 'QoR_class' in df.columns else "Missing",
    "t_uniform Head": df['t_uniform'].head(10).tolist() if 't_uniform' in df.columns else "Missing"
}



  df = pd.read_csv(file_path)


In [2]:
import pandas as pd

df = pd.read_csv(file_path)

# Check structure
print("Shape:", df.shape)
print("Missing Values:", df.isnull().sum().sum())
print("QoR_class:", df['QoR_class'].unique())
print("t_uniform sample:", df['t_uniform'].head(10))


  df = pd.read_csv(file_path)


Shape: (116583, 447)
Missing Values: 15219235
QoR_class: [ 1.  0. nan]
t_uniform sample: 0    0.001646
1    0.021627
2    0.041607
3    0.061588
4    0.081568
5    0.101549
6    0.121529
7    0.141510
8    0.161490
9    0.181470
Name: t_uniform, dtype: float64


In [3]:
import pandas as pd

# Load dataset
df = pd.read_csv(file_path)

# Identify joint columns
joint_columns = [col for col in df.columns if any(j in col for j in ['_X', '_Y', '_Z'])]

# Drop rows with missing joint data or QoR_class
df_clean = df.dropna(subset=joint_columns + ['QoR_class'])

# Check how much data remains
print("Original Rows:", df.shape[0])
print("Cleaned Rows:", df_clean.shape[0])
print("Rows Removed:", df.shape[0] - df_clean.shape[0])
print("Remaining Missing:", df_clean.isnull().sum().sum())


  df = pd.read_csv(file_path)


Original Rows: 116583
Cleaned Rows: 94961
Rows Removed: 21622
Remaining Missing: 9887603


In [4]:
import torch
import pandas as pd
import plotly.graph_objs as go
from torch_geometric.data import Data



# Define anatomical connections as edges
edges = torch.tensor([
    [0, 2],  # FOOT_RIGHT to ANKLE_RIGHT
    [1, 3],  # FOOT_LEFT to ANKLE_LEFT
    [2, 4],  # ANKLE_RIGHT to KNEE_RIGHT
    [3, 5],  # ANKLE_LEFT to KNEE_LEFT
    [4, 6],  # KNEE_RIGHT to HIP_RIGHT
    [5, 7],  # KNEE_LEFT to HIP_LEFT
    [6, 8],  # HIP_RIGHT to PELVIS
    [7, 8],  # HIP_LEFT to PELVIS
    [8, 9],  # PELVIS to SPINE_NAVAL
    [9, 10], # SPINE_NAVAL to SPINE_CHEST
    [10, 11],# SPINE_CHEST to CLAVICLE_RIGHT
    [10, 12],# SPINE_CHEST to CLAVICLE_LEFT
    [11, 13],# CLAVICLE_RIGHT to SHOULDER_RIGHT
    [12, 14],# CLAVICLE_LEFT to SHOULDER_LEFT
    [13, 15],# SHOULDER_RIGHT to ELBOW_RIGHT
    [14, 16],# SHOULDER_LEFT to ELBOW_LEFT
    [15, 17],# ELBOW_RIGHT to WRIST_RIGHT
    [16, 18],# ELBOW_LEFT to WRIST_LEFT
    [17, 19],# WRIST_RIGHT to HAND_RIGHT
    [18, 20],# WRIST_LEFT to HAND_LEFT
    [19, 21],# HAND_RIGHT to HANDTIP_RIGHT
    [20, 22],# HAND_LEFT to HANDTIP_LEFT
    [17, 23],# HANDTIP_RIGHT to THUMB_RIGHT
    [18, 24],# HANDTIP_LEFT to THUMB_LEFT
    [10, 25],# SPINE_CHEST to NECK
    [25, 26],# NECK to HEAD
    [26, 27],
    [26, 28],
    [26, 29],
    [26, 30],
    [26, 31]
]).t().contiguous()

# List of joints in the order they appear in the DataFrame
joints = [
    'FOOT_RIGHT', 'FOOT_LEFT', 'ANKLE_RIGHT', 'ANKLE_LEFT', 'KNEE_RIGHT', 'KNEE_LEFT',
    'HIP_RIGHT', 'HIP_LEFT', 'PELVIS', 'SPINE_NAVAL', 'SPINE_CHEST',
    'CLAVICLE_RIGHT', 'CLAVICLE_LEFT', 'SHOULDER_RIGHT', 'SHOULDER_LEFT',
    'ELBOW_RIGHT', 'ELBOW_LEFT', 'WRIST_RIGHT', 'WRIST_LEFT', 'HAND_RIGHT',
    'HAND_LEFT', 'HANDTIP_RIGHT', 'HANDTIP_LEFT', 'THUMB_RIGHT', 'THUMB_LEFT',
    'NECK', 'HEAD', 'NOSE', 'EYE_LEFT', 'EAR_LEFT', 'EYE_RIGHT', 'EAR_RIGHT'
]

# Function to extract node features and create a Data object
def create_data_object_vis(row):
    node_features = []
    node_positions = []  # Store (x, y, z) positions separately for Plotly visualization

    for joint in joints:
        x = row[f'{joint}_X']
        y = row[f'{joint}_Y']
        z = row[f'{joint}_Z']
        node_features.append([x, y, z])
        node_positions.append((x, y, z))  # Save position for each joint

    node_features = torch.tensor(node_features, dtype=torch.float)

    # Extract label (assuming 'frailty' is the target column)
    label = torch.tensor([row['QoR_class']], dtype=torch.float)

    # Create the Data object
    data = Data(x=node_features, edge_index=edges, y=label)

    return data, node_positions

# Function to plot the graph using Plotly
def plot_graph(node_positions):
    # Extract x, y, z coordinates from node_positions
    x_coords = [pos[0] for pos in node_positions]
    y_coords = [pos[1] for pos in node_positions]
    z_coords = [pos[2] for pos in node_positions]

    # Create edges for Plotly
    edge_x = []
    edge_y = []
    edge_z = []
    for edge in edges.t().tolist():
        x0, y0, z0 = node_positions[edge[0]]
        x1, y1, z1 = node_positions[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])
        edge_z.extend([z0, z1, None])

    # Plot the edges
    edge_trace = go.Scatter3d(
        x=edge_x, y=edge_y, z=edge_z,
        mode='lines',
        line=dict(color='black', width=2),
        hoverinfo='none'
    )

    # Plot the nodes with joint names
    node_trace = go.Scatter3d(
        x=x_coords, y=y_coords, z=z_coords,
        mode='markers+text',
        marker=dict(size=6, color='blue'),
        text=joints,  # Use the joint names as text labels
        hoverinfo='text'
    )

    # Create the figure with gridlines and joint names
    fig = go.Figure(
    data=[edge_trace, node_trace],
    layout=go.Layout(
        title='3D Visualization',
        width=2200,  # Increase width
        height=1200,  # Increase height
        showlegend=False,
        scene=dict(
            xaxis=dict(
                showbackground=True,
                backgroundcolor="rgb(230, 230, 230)",
                gridcolor="rgb(200, 200, 200)",
                showgrid=True,
                zerolinecolor="rgb(200, 200, 200)",
            ),
            yaxis=dict(
                showbackground=True,
                backgroundcolor="rgb(230, 230, 230)",
                gridcolor="rgb(200, 200, 200)",
                showgrid=True,
                zerolinecolor="rgb(200, 200, 200)",
            ),
            zaxis=dict(
                showbackground=True,
                backgroundcolor="rgb(230, 230, 230)",
                gridcolor="rgb(200, 200, 200)",
                showgrid=True,
                zerolinecolor="rgb(200, 200, 200)",
            ),
        ),
        margin=dict(l=50, r=50, b=50, t=50),))

    fig.show()



#df_clean
#single_row = df.iloc[3]

single_row = df_clean.iloc[3]
data, node_positions = create_data_object_vis(single_row)

plot_graph(node_positions)


In [5]:
# Group by patientID and check class balance
patient_stats = df_clean.groupby('patientID')['QoR_class'].value_counts().unstack().fillna(0)

# Rename columns for clarity
patient_stats.columns = ['class_0', 'class_1']
patient_stats['total'] = patient_stats['class_0'] + patient_stats['class_1']

# Display basic stats
print("Total unique patients:", len(patient_stats))
print(patient_stats.head(10))  # Show first 10 patients

# Optionally: see overall class balance
print("\nOverall class balance:")
print(df_clean['QoR_class'].value_counts())

Total unique patients: 79
           class_0  class_1   total
patientID                          
001-LO         0.0   1509.0  1509.0
002-RJ         0.0   1321.0  1321.0
003-RM      1470.0      0.0  1470.0
004-MF      1439.0      0.0  1439.0
005-GP       602.0      0.0   602.0
006-TR         0.0   1109.0  1109.0
007-LI       805.0      0.0   805.0
008-BJ      1605.0      0.0  1605.0
009-TB         0.0   1548.0  1548.0
010-RA       942.0      0.0   942.0

Overall class balance:
QoR_class
0.0    54182
1.0    40779
Name: count, dtype: int64


In [6]:
import numpy as np
from sklearn.model_selection import train_test_split

# STEP 1: Create a mapping from patient to class
patient_class_map = df_clean.groupby('patientID')['QoR_class'].first()

# STEP 2: Separate patientIDs by class
class_0_patients = patient_class_map[patient_class_map == 0.0].index.tolist()
class_1_patients = patient_class_map[patient_class_map == 1.0].index.tolist()

# STEP 3: Split each class into train/val/test (70/15/15)
def split_patients(patient_ids):
    train, temp = train_test_split(patient_ids, test_size=0.3, random_state=42)
    val, test = train_test_split(temp, test_size=0.5, random_state=42)
    return train, val, test

train_0, val_0, test_0 = split_patients(class_0_patients)
train_1, val_1, test_1 = split_patients(class_1_patients)

# STEP 4: Combine across classes
train_patients = train_0 + train_1
val_patients = val_0 + val_1
test_patients = test_0 + test_1

# STEP 5: Subset the DataFrame for each split
train_df = df_clean[df_clean['patientID'].isin(train_patients)]
val_df = df_clean[df_clean['patientID'].isin(val_patients)]
test_df = df_clean[df_clean['patientID'].isin(test_patients)]

# STEP 6: Show sample counts
print(f"Train samples: {len(train_df)}")
print(f"Val samples:   {len(val_df)}")
print(f"Test samples:  {len(test_df)}")

# Optional: Check class balance
print("\nTrain class balance:\n", train_df['QoR_class'].value_counts())
print("\nVal class balance:\n", val_df['QoR_class'].value_counts())
print("\nTest class balance:\n", test_df['QoR_class'].value_counts())


Train samples: 60928
Val samples:   15673
Test samples:  18360

Train class balance:
 QoR_class
0.0    34585
1.0    26343
Name: count, dtype: int64

Val class balance:
 QoR_class
0.0    8731
1.0    6942
Name: count, dtype: int64

Test class balance:
 QoR_class
0.0    10866
1.0     7494
Name: count, dtype: int64


In [7]:
import torch
from torch_geometric.data import Data
from tqdm import tqdm

class KinectTemporalGraphDataset(torch.utils.data.Dataset):
    def __init__(self, df, seq_len=16, stride=1):
        self.graph_sequences = []
        self.labels = []

        joints = [
            'FOOT_RIGHT', 'FOOT_LEFT', 'ANKLE_RIGHT', 'ANKLE_LEFT', 'KNEE_RIGHT', 'KNEE_LEFT',
            'HIP_RIGHT', 'HIP_LEFT', 'PELVIS', 'SPINE_NAVAL', 'SPINE_CHEST',
            'CLAVICLE_RIGHT', 'CLAVICLE_LEFT', 'SHOULDER_RIGHT', 'SHOULDER_LEFT',
            'ELBOW_RIGHT', 'ELBOW_LEFT', 'WRIST_RIGHT', 'WRIST_LEFT', 'HAND_RIGHT',
            'HAND_LEFT', 'HANDTIP_RIGHT', 'HANDTIP_LEFT', 'THUMB_RIGHT', 'THUMB_LEFT',
            'NECK', 'HEAD', 'NOSE', 'EYE_LEFT', 'EAR_LEFT', 'EYE_RIGHT', 'EAR_RIGHT'
        ]

        edges = torch.tensor([
            [0, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7], [6, 8], [7, 8], [8, 9],
            [9, 10], [10, 11], [10, 12], [11, 13], [12, 14], [13, 15], [14, 16],
            [15, 17], [16, 18], [17, 19], [18, 20], [19, 21], [20, 22], [17, 23],
            [18, 24], [10, 25], [25, 26], [26, 27], [26, 28], [26, 29], [26, 30], [26, 31]
        ]).t().contiguous()

        grouped = df.groupby('patientID')

        for _, patient_df in tqdm(grouped, desc="Creating temporal graph dataset"):
            patient_df = patient_df.sort_values('t_uniform').reset_index(drop=True)
            num_frames = len(patient_df)

            for start in range(0, num_frames - seq_len + 1, stride):
                sequence = []
                for i in range(seq_len):
                    row = patient_df.iloc[start + i]
                    node_features = [
                        [row[f'{joint}_X'], row[f'{joint}_Y'], row[f'{joint}_Z']] for joint in joints
                    ]
                    x = torch.tensor(node_features, dtype=torch.float)
                    data = Data(x=x, edge_index=edges.clone())
                    sequence.append(data)

                label_row = patient_df.iloc[start + seq_len // 2]  # middle frame
                label = torch.tensor([label_row['QoR_class']], dtype=torch.float)

                self.graph_sequences.append(sequence)
                self.labels.append(label)

    def __len__(self):
        return len(self.graph_sequences)

    def __getitem__(self, idx):
        return self.graph_sequences[idx], self.labels[idx]


In [None]:
BATCH_SIZE = 16
SEQUENCE_LENGTH = 64
WINDOW_SLIDE = 1

In [None]:
train_temporal_dataset = KinectTemporalGraphDataset(train_df, seq_len=SEQUENCE_LENGTH, stride=WINDOW_SLIDE)
val_temporal_dataset   = KinectTemporalGraphDataset(val_df, seq_len=SEQUENCE_LENGTH, stride=WINDOW_SLIDE)
test_temporal_dataset  = KinectTemporalGraphDataset(test_df, seq_len=SEQUENCE_LENGTH, stride=WINDOW_SLIDE)

Creating temporal graph dataset: 100%|██████████| 54/54 [04:51<00:00,  5.40s/it]
Creating temporal graph dataset: 100%|██████████| 12/12 [01:15<00:00,  6.28s/it]
Creating temporal graph dataset: 100%|██████████| 13/13 [01:28<00:00,  6.80s/it]


In [None]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

def plot_sequence_with_slider(sequence, joints, edges):
    frames = []
    for i, data in enumerate(sequence):
        node_pos = data.x.tolist()

        # Node coords
        x_coords = [p[0] for p in node_pos]
        y_coords = [p[1] for p in node_pos]
        z_coords = [p[2] for p in node_pos]

        # Edges
        edge_x, edge_y, edge_z = [], [], []
        for e0, e1 in edges.t().tolist():
            x0, y0, z0 = node_pos[e0]
            x1, y1, z1 = node_pos[e1]
            edge_x += [x0, x1, None]
            edge_y += [y0, y1, None]
            edge_z += [z0, z1, None]

        # Create frame
        frames.append(go.Frame(
            data=[
                go.Scatter3d(x=edge_x, y=edge_y, z=edge_z, mode='lines', line=dict(color='black', width=2)),
                go.Scatter3d(x=x_coords, y=y_coords, z=z_coords, mode='markers+text',
                             text=joints, marker=dict(size=5, color='blue'))
            ],
            name=f'Frame {i}'
        ))

    # Base frame layout
    layout = layout = go.Layout(
    title="3D Skeleton Sequence",
    width=1600,  # Wider
    height=1100,  # Taller
    scene=dict(aspectmode='data'),
    updatemenus=[dict(
        type='buttons', showactive=False,
        buttons=[dict(label='Play',
                      method='animate',
                      args=[None, dict(frame=dict(duration=300, redraw=True),
                                       fromcurrent=True)])]
    )],
    sliders=[dict(steps=[
        dict(method='animate', args=[[f'Frame {i}'], dict(mode='immediate')], label=f'{i}')
        for i in range(len(frames))
    ])])


    fig = go.Figure(frames=frames, layout=layout)
    fig.add_trace(frames[0].data[0])
    fig.add_trace(frames[0].data[1])
    fig.show()
    
    
# Get 1 sequence from temporal dataset
sequence, label = train_temporal_dataset[0]

# Use the same joints and edges from your earlier code
plot_sequence_with_slider(sequence, joints, edges)



In [12]:
def temporal_collate(batch):
    sequences, labels = zip(*batch)
    return list(sequences), torch.tensor(labels, dtype=torch.float)


In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_temporal_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=temporal_collate)
val_loader   = DataLoader(val_temporal_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=temporal_collate)
test_loader  = DataLoader(test_temporal_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=temporal_collate)


In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm

class GCN_GRU_QoR(nn.Module):
    def __init__(self, gcn_hidden=64, gru_hidden=128, dropout=0.3):
        super().__init__()
        self.gcn1 = GCNConv(3, gcn_hidden)
        self.gcn2 = GCNConv(gcn_hidden, gcn_hidden)
        self.gru = nn.GRU(input_size=gcn_hidden, hidden_size=gru_hidden, batch_first=True)
        self.fc = nn.Linear(gru_hidden, 1)
        self.dropout = dropout

    def forward(self, sequence):
        embedded = []
        device = next(self.parameters()).device

        for data in sequence:
            data = data.to(device)
            x = F.relu(self.gcn1(data.x, data.edge_index))
            x = F.dropout(x, p=self.dropout, training=self.training)
            x = F.relu(self.gcn2(x, data.edge_index))
            pooled = global_mean_pool(x, torch.zeros(x.size(0), dtype=torch.long).to(device))
            embedded.append(pooled)

        sequence_tensor = torch.stack(embedded, dim=1)  # shape [1, seq_len, gcn_hidden]
        _, h_n = self.gru(sequence_tensor)
        out = self.fc(h_n.squeeze(0))
        return out.squeeze()


def train_temporal_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    device = next(model.parameters()).device
    for sequences, labels in tqdm(loader, desc="Training", leave=False):
        optimizer.zero_grad()
        preds = []
        for sequence in sequences:
            out = model(sequence)
            preds.append(out)

        logits = torch.stack(preds).to(device)
        labels = torch.tensor(labels, dtype=torch.float).to(device)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * len(labels)
    return total_loss / len(loader.dataset)


def eval_temporal_epoch(model, loader, criterion):
    model.eval()
    total_loss = 0
    all_preds, all_labels = [], []
    device = next(model.parameters()).device
    with torch.no_grad():
        for sequences, labels in loader:
            preds = []
            for sequence in sequences:
                out = model(sequence)
                preds.append(out)

            logits = torch.stack(preds).to(device)
            labels = torch.tensor(labels, dtype=torch.float).to(device)
            loss = criterion(logits, labels)
            total_loss += loss.item() * len(labels)

            all_preds.extend(torch.sigmoid(logits).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    preds_bin = [1 if p > 0.5 else 0 for p in all_preds]
    acc = accuracy_score(all_labels, preds_bin)
    prec = precision_score(all_labels, preds_bin)
    rec = recall_score(all_labels, preds_bin)
    f1 = f1_score(all_labels, preds_bin)
    return total_loss / len(loader.dataset), acc, prec, rec, f1


In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import torch

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Instantiate model
model = GCN_GRU_QoR().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Compute class weights for BCE loss
labels = [label.item() for _, label in train_temporal_dataset]
class_weights = compute_class_weight('balanced', classes=np.array([0., 1.]), y=labels)
pos_weight_tensor = torch.tensor([class_weights[1]], dtype=torch.float).to(device)
criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight_tensor)

# Train model
best_val_loss = float('inf')

for epoch in range(1, 50):
    train_loss = train_temporal_epoch(model, train_loader, optimizer, criterion)
    val_loss, acc, prec, rec, f1 = eval_temporal_epoch(model, val_loader, criterion)

    print(f"Epoch {epoch:02d} | "
          f"Train Loss: {train_loss:.4f} | "
          f"Val Loss: {val_loss:.4f} | "
          f"Acc: {acc:.4f} | Prec: {prec:.4f} | Rec: {rec:.4f} | F1: {f1:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_temporal_model.pt")



In [None]:
from sklearn.metrics import confusion_matrix

# Step 1: Load best model
model.load_state_dict(torch.load("best_temporal_model.pt"))
model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Step 2: Evaluate on test set
def evaluate_test(model, loader):
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for sequences, labels in loader:
            preds = []
            for sequence in sequences:
                out = model(sequence)
                preds.append(out)

            logits = torch.stack(preds).to(device)
            labels = torch.tensor(labels, dtype=torch.float).to(device)

            all_preds.extend(torch.sigmoid(logits).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    preds_bin = [1 if p > 0.5 else 0 for p in all_preds]
    acc  = accuracy_score(all_labels, preds_bin)
    prec = precision_score(all_labels, preds_bin)
    rec  = recall_score(all_labels, preds_bin)
    f1   = f1_score(all_labels, preds_bin)
    cm   = confusion_matrix(all_labels, preds_bin)

    print(f"Test Accuracy:  {acc:.4f}")
    print(f"Test Precision: {prec:.4f}")
    print(f"Test Recall:    {rec:.4f}")
    print(f"Test F1 Score:  {f1:.4f}")
    print("Confusion Matrix:\n", cm)

# Run it
evaluate_test(model, test_loader)
