In [None]:
from google.colab import drive
import zipfile
import os

# Mount Google Drive
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import glob # Import the glob module
# Define dataset path
dataset_path = "/content/drive/MyDrive/Filtered Data"  # Update this path

# Get all text files inside 12 subfolders
txt_files = glob.glob(os.path.join(dataset_path, "**/*.txt"), recursive=True)

print(f"Found {len(txt_files)} dataset files.")

Found 56 dataset files.


In [None]:
import pandas as pd


In [None]:
import os
import pandas as pd

# Path to your dataset folder in Google Drive
base_path = "/content/drive/MyDrive/Filtered Data"

# List to store file paths
txt_files = []

# Traverse the dataset folders
for folder in sorted(os.listdir(base_path)):
    folder_path = os.path.join(base_path, folder)

    if os.path.isdir(folder_path):  # Ensure it's a folder
        if folder == "008":  # Special handling for folder 8
            for subfolder in ["OFF_1", "OFF_2"]:
                subfolder_path = os.path.join(folder_path, subfolder)
                if os.path.exists(subfolder_path):  # Check if subfolder exists
                    for file in os.listdir(subfolder_path):
                        if file.endswith(".txt"):
                            txt_files.append(os.path.join(subfolder_path, file))
        else:  # All other folders where .txt files are directly present
            for file in os.listdir(folder_path):
                if file.endswith(".txt"):
                    txt_files.append(os.path.join(folder_path, file))

# Verify collected files
print(f"Total .txt files found: {len(txt_files)}")


Total .txt files found: 56


In [None]:
# Columns related to acceleration & gyroscope (32-59)
gait_columns = list(range(32, 59))  # Includes x, y, z accelerometer + gyro
label_column = [60]  # Freezing of Gait (FoG) Label

# Initialize list for storing processed data
all_data = []

# Read all files
for file in txt_files:
    df = pd.read_csv(file, header=None)  # No header in dataset
    gait_data = df[gait_columns + label_column]  # Extract only gait & labels
    all_data.append(gait_data)

# Merge all files
gait_df = pd.concat(all_data, ignore_index=True)

print("Gait Data Shape:", gait_df.shape)


Gait Data Shape: (6211056, 28)


In [None]:
import torch

# Ensure GPU usage
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convert gait data to PyTorch tensor and move to GPU
gait_tensor = torch.tensor(gait_df.values, dtype=torch.float32, device=device)

window_size = 1000  # 2 sec (500Hz)
step_size = 500     # 50% overlap

# Batch-wise segmentation for speed
num_samples = (gait_tensor.shape[0] - window_size) // step_size

# Preallocate tensors
X = torch.zeros((num_samples, window_size, gait_tensor.shape[1] - 1), device=device)
y = torch.zeros((num_samples,), dtype=torch.long, device=device)

# Fill tensors
for i in range(num_samples):
    start = i * step_size
    X[i] = gait_tensor[start : start + window_size, :-1]  # All gait features
    y[i] = gait_tensor[start + window_size - 1, -1]  # Last label in window

print("Segmented Data Shape:", X.shape, "Labels Shape:", y.shape)


Segmented Data Shape: torch.Size([12420, 1000, 27]) Labels Shape: torch.Size([12420])


In [None]:
import torch
import numpy as np

# Move data to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convert DataFrame to NumPy
gait_data = gait_df.iloc[:, :-1].values  # Features
labels = gait_df.iloc[:, -1].values  # Labels

# Convert to PyTorch tensors and move to GPU
gait_tensor = torch.tensor(gait_data, dtype=torch.float32, device=device)
labels_tensor = torch.tensor(labels, dtype=torch.int64, device=device)

# Segmentation parameters
window_size = 1000  # 2 seconds
step_size = 500

segments = []
labels = []

# GPU batch segmentation
for i in range(0, len(gait_tensor) - window_size, step_size):
    segment = gait_tensor[i:i+window_size]  # Feature segment
    label = labels_tensor[i+window_size-1]  # Label at window end
    segments.append(segment)
    labels.append(label)

# Stack as tensors
X = torch.stack(segments).to(device)
y = torch.tensor(labels, dtype=torch.int64, device=device)

print("Segmented Data Shape:", X.shape)


Segmented Data Shape: torch.Size([12421, 1000, 27])


USING CNN_LSTM

In [24]:
import torch
import torch.nn as nn

class CNN_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(CNN_LSTM, self).__init__()

        # CNN Layers
        self.conv1 = nn.Conv1d(in_channels=input_size, out_channels=64, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)

        # LSTM Layers
        self.lstm = nn.LSTM(input_size=64, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)

        # Fully connected output layer
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Convert to (batch, channels, time) for CNN
        x = self.pool(self.relu(self.conv1(x)))  # CNN Feature Extraction
        x = x.permute(0, 2, 1)  # Back to (batch, time, features)

        lstm_out, _ = self.lstm(x)
        lstm_features = lstm_out[:, -1, :]  # Extract last LSTM hidden state

        output = self.fc(lstm_features)  # Final classification output
        return output

    # New method to extract feature vectors
    def extract_features(self, x):
        x = x.permute(0, 2, 1)
        x = self.pool(self.relu(self.conv1(x)))
        x = x.permute(0, 2, 1)

        lstm_out, _ = self.lstm(x)
        lstm_features = lstm_out[:, -1, :]  # Extract feature vector before final FC layer
        return lstm_features


In [25]:
import torch.optim as optim

# Define model parameters
input_size = 27  # Number of gait features
hidden_size = 128
num_layers = 2
output_size = 1  # Binary classification (FoG or no FoG)

# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN_LSTM(input_size, hidden_size, num_layers, output_size).to(device)

# Define Loss and Optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 5
model.train()

for epoch in range(num_epochs):
    total_loss = 0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)

        optimizer.zero_grad()
        outputs = model(batch_X).squeeze()
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

print("CNN-LSTM Training Completed ✅")


Epoch 1, Loss: 0.575844870790408
Epoch 2, Loss: 0.5622298310232315
Epoch 3, Loss: 0.5670619021466308
Epoch 4, Loss: 0.5626194989757906
Epoch 5, Loss: 0.5573569317722628
CNN-LSTM Training Completed ✅


In [27]:
from sklearn.metrics import accuracy_score

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X).squeeze()
        preds = torch.round(torch.sigmoid(outputs))  # Convert logits to binary predictions
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(batch_y.cpu().numpy())

accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {accuracy * 100:.2f}% ✅")


Test Accuracy: 77.79% ✅


In [28]:
import numpy as np

# Ensure model is in evaluation mode
model.eval()

feature_vectors = []

with torch.no_grad():
    for batch_X, _ in test_loader:  # No need for labels here
        batch_X = batch_X.to(device)
        features = model.extract_features(batch_X)  # Extract features from LSTM layer
        feature_vectors.append(features.cpu().numpy())

# Convert to numpy array
feature_vectors = np.vstack(feature_vectors)

# Save feature vectors to a file
np.save('gait_feature_vectors.npy', feature_vectors)
print("Feature vectors saved successfully! ✅")


Feature vectors saved successfully! ✅


In [29]:
from google.colab import files
files.download('gait_feature_vectors.npy')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [30]:
print(feature_vectors.shape)  # Should be (num_samples, feature_dim)
print("Example feature vector:", feature_vectors[0][:10])  # Print first 10 values of first vector


(2485, 128)
Example feature vector: [-0.03943942 -0.2911995   0.2068045  -0.04807587  0.02643567  0.2355742
  0.0088343   0.03526265 -0.00297631  0.07361782]


USING RANDOM FOREST

In [31]:
import numpy as np
import pandas as pd
import torch
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [34]:
# ✅ Load the segmented data (assuming it's already in `X` and `y`)
print("Segmented Data Shape:", X.shape)  # Should be (num_samples, window_size, features)

# ✅ Flatten the time-series data for Random Forest (convert 3D to 2D)
X_flat = X.reshape(X.shape[0], -1)  # Flatten time steps into features
print("Flattened Data Shape for RF:", X_flat.shape)  # (samples, window_size * features)

# ✅ Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_flat, y, test_size=0.2, random_state=42)

# ✅ Initialize Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
print("Training Random Forest...")

# ✅ Convert PyTorch tensors to NumPy
X_train = X_train.cpu().numpy()
X_test = X_test.cpu().numpy()
y_train = y_train.cpu().numpy()
y_test = y_test.cpu().numpy()


# ✅ Train the model
rf_model.fit(X_train, y_train)
print("🎯 Random Forest Training Completed!")

Segmented Data Shape: torch.Size([12421, 1000, 27])
Flattened Data Shape for RF: torch.Size([12421, 27000])
Training Random Forest...
🎯 Random Forest Training Completed!


In [35]:
# ✅ Test accuracy
y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Test Accuracy: {accuracy:.4f}")

# ✅ Classification Report
print("\n🔹 Classification Report:\n", classification_report(y_test, y_pred))

✅ Test Accuracy: 0.8829

🔹 Classification Report:
               precision    recall  f1-score   support

         0.0       0.89      0.91      0.90      1433
         1.0       0.88      0.84      0.86      1052

    accuracy                           0.88      2485
   macro avg       0.88      0.88      0.88      2485
weighted avg       0.88      0.88      0.88      2485



In [36]:
# ✅ Extract feature vector using RF’s internal feature representation
feature_vectors = rf_model.apply(X_test)  # RF outputs leaf indices as features

# ✅ Save feature vector as .npy for multimodal integration
feature_vectors_path = "/content/drive/MyDrive/gait_feature_vectors_rf.npy"
np.save(feature_vectors_path, feature_vectors)
print(f"✅ Feature vectors saved at: {feature_vectors_path}")

✅ Feature vectors saved at: /content/drive/MyDrive/gait_feature_vectors_rf.npy


In [38]:
files.download("/content/drive/MyDrive/gait_feature_vectors_rf.npy")  # Directly download the file

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [39]:
import numpy as np

# Load the file
feature_vectors = np.load("/content/drive/MyDrive/gait_feature_vectors_rf.npy")

# Check shape
print("Feature Vector Shape:", feature_vectors.shape)

# Preview first few rows
print("First 5 rows:\n", feature_vectors[:5])


Feature Vector Shape: (2485, 100)
First 5 rows:
 [[  38  985 1108  973 1014  921  959 1103  980  970  816  685  948 1068
   996  989  480 1039  970  982 1046 1016 1087  860  916 1034 1154 1088
  1110 1023 1062  757 1095  974 1003  998  708 1072  959 1084 1104 1089
  1099 1141 1029 1011 1091 1061  974 1019  947  956  485  781 1119  702
   917  754 1034 1065  960  971  356  669  882 1139  474 1112 1029 1018
   992  996 1030 1005  993  830  941 1046 1088   79  752 1148 1127 1088
   107  260 1035 1121 1031 1017  619  861 1053  867 1065   27   61 1063
   999  977]
 [1007 1070 1077  927  944  890  875 1020  850  939  554  569  882 1037
   873  905 1142 1015  931  920  936  868  916  518 1050  863 1032 1012
   993  935 1028  500 1009 1055  951  947  809 1000  839  934 1008 1036
  1008 1053  885  845 1031  546  872  940  871  873 1076  845  987  525
   578 1107  938  865  893  932 1116 1081  567 1072  183 1046  862  872
   877  876  943  959  891  250  878  871  907 1132 1102 1068 1061 1003
  

In [40]:
print("Original Test Data Shape:", X_test.shape)  # Check before feature extraction


Original Test Data Shape: (2485, 27000)


In [41]:
print("Any empty feature vectors?", np.all(feature_vectors == 0, axis=1).sum())  # Should be 0


Any empty feature vectors? 0
