# Daphnet Freezing of Gait
- 1 BUSINESS TASK- The goal of this project is to analyze the data(accelerometer data collected from 10 subjects suffering from Parkinson's Disease) and to recognize Freezing of Gait automatically.

- 2 DATA COLLECTION AND UNDERSTANDING Data has been collected from UCI Machine Learning Repository --https://archive.ics.uci.edu/dataset/245/daphnet+freezing+of+gait The dataset includes 237 instances collected from 10 patients over 3 rounds(Users performed there kinds of tasks: R01- straight line walking, walking with numerous turns, R02- and finally a more realistic activity of daily living (ADL) task, R03- where users went into different rooms while fetching coffee, opening doors, etc.)

- 3 DATA EXPLORATION In this step, we will apply Exploratory Data Analysis (EDA) to extract insights from the data set to know which features could help us in classifying a Freezing of Gait or otherwise a normal gait. Performing  Data Analysis using Pandas and Data visualization. Below are tasks to be performed in EDA:

**Importing Libraries**

**Data Cleaning and EDA**


# **Loading libraries**

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from collections import Counter

# **Load and Clean Data**

In [None]:
file_paths = [
    "S01R01.txt", "S01R02.txt", "S02R01.txt", "S02R02.txt", "S03R01.txt", "S03R02.txt",
    "S03R03.txt", "S04R01.txt", "S05R01.txt", "S05R02.txt", "S06R01.txt", "S06R02.txt",
    "S07R01.txt", "S07R02.txt", "S08R01.txt", "S09R01.txt", "S10R01.txt"
]

dfs = []
for i, file in enumerate(file_paths):
    df = pd.read_csv(file, header=None, delimiter=' ', on_bad_lines='skip')
    df['Subject'] = f"S{i+1}"
    dfs.append(df)

df_combined = pd.concat(dfs, ignore_index=True)

df_combined.columns = ['Time_stamp', 'Ankle_x', 'Ankle_y', 'Ankle_z', 'Thigh_x', 'Thigh_y',
                       'Thigh_z', 'Trunk_x', 'Trunk_y', 'Trunk_z', 'Annotations', 'Subject']

df_combined.dropna(inplace=True)
df_combined = df_combined[df_combined['Annotations'] != 0]

    0   1   2    3  4  5  6  7  8  9  10 Subject
0  15  70  39 -970  0  0  0  0  0  0   0     S01
1  31  70  39 -970  0  0  0  0  0  0   0     S01
2  46  60  49 -960  0  0  0  0  0  0   0     S01
3  62  60  49 -960  0  0  0  0  0  0   0     S01
4  78  50  39 -960  0  0  0  0  0  0   0     S01


# **Check class distribution**

In [None]:
print(df_combined['Annotations'].value_counts())

# **Plot class distribution**

In [None]:
import matplotlib.pyplot as plt
df_combined['Annotations'].value_counts().plot(kind='bar')
plt.xlabel("Class")
plt.ylabel("Count")
plt.title("Class Distribution")
plt.show()

# **Finding missing values**

In [None]:
df_combined.isnull().sum()

In [None]:
missing_values = df_combined[df_combined.isnull().any(axis=1)]
missing_values

# **Label Encoding data**

In [None]:
# Features & Labels
X = df_combined.drop(columns=['Annotations', 'Subject'])
y = df_combined['Annotations']

# Encode Labels
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# **Splitting data**

In [None]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

# **Data Augmentation**

In [None]:
# Data Augmentation (Jittering)
def augment_data(data, factor=5):
    jittered_data = np.tile(data, (factor, 1))  # Duplicate data
    noise = np.random.normal(0, 0.01, jittered_data.shape)  # Add slight noise
    return jittered_data + noise

# **Balancing Dataset**

In [None]:
# Balance Dataset
freeze_data = X_train[y_train == 1]
no_freeze_data = X_train[y_train == 0]

augmented_freeze = augment_data(freeze_data, factor=(len(no_freeze_data) // len(freeze_data)))
X_train_balanced = np.vstack([no_freeze_data, augmented_freeze])
y_train_balanced = np.hstack([np.zeros(len(no_freeze_data)), np.ones(len(augmented_freeze))])

print("After augmentation:", dict(Counter(y_train_balanced)))

# **Normalizing Data**

In [None]:
# Normalize Features
scaler = StandardScaler()
X_train_balanced = scaler.fit_transform(X_train_balanced)
X_test = scaler.transform(X_test)

# **Reshaping for LSTM**

In [None]:
X_train_balanced = X_train_balanced.reshape(X_train_balanced.shape[0], X_train_balanced.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

y_train_balanced = to_categorical(y_train_balanced, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)

# **LSTM**

In [None]:
model = Sequential([
    Bidirectional(LSTM(32, return_sequences=True, input_shape=(X_train_balanced.shape[1], 1))),
    Dropout(0.3),
    Bidirectional(LSTM(32, return_sequences=False)),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dense(2, activation='softmax')
])

class_weights = {0: 1, 1: len(no_freeze_data) / len(augmented_freeze)}
model.compile(optimizer=AdamW(learning_rate=1e-4), loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# **Training the model**

In [None]:
history = model.fit(
    X_train_balanced, y_train_balanced,
    epochs=30, batch_size=128,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping])

# **Evaluating the model**

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}, Test Loss: {loss:.4f}")

# **Classification Report**

In [None]:
from sklearn.metrics import classification_report

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

print("\nClassification Report:\n")
print(classification_report(y_true_classes, y_pred_classes))