In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report
import numpy as np
import pandas as pd

X_raw = np.stack([acc_x.values, acc_y.values, acc_z.values], axis=-1)  # shape (n_samples, 128, 3)
X_raw = X_raw.reshape(X_raw.shape[0], -1)  # flatten to (n_samples, 384)
y_true = pd.read_csv('/kaggle/input/uci-har-dataset/UCI HAR Dataset/train/y_train.txt', header=None)[0]
X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_raw, y_true, test_size=0.2, random_state=42)

clf_raw = DecisionTreeClassifier(random_state=42)
clf_raw.fit(X_train_raw, y_train_raw)
y_pred_raw = clf_raw.predict(X_test_raw)

print("Raw Accelerometer Data:")
print("Accuracy:", accuracy_score(y_test_raw, y_pred_raw))
print("Precision:", precision_score(y_test_raw, y_pred_raw, average='weighted'))
print("Recall:", recall_score(y_test_raw, y_pred_raw, average='weighted'))
print("Confusion Matrix:\n", confusion_matrix(y_test_raw, y_pred_raw))


In [None]:

X_train_tsfel, X_test_tsfel, y_train_tsfel, y_test_tsfel = train_test_split(Tsfel_Features, y_true, test_size=0.2, random_state=42)
clf_tsfel = DecisionTreeClassifier(random_state=42)
clf_tsfel.fit(X_train_tsfel, y_train_tsfel)
y_pred_tsfel = clf_tsfel.predict(X_test_tsfel)

print("\n TSFEL Features:")
print("Accuracy:", accuracy_score(y_test_tsfel, y_pred_tsfel))
print("Precision:", precision_score(y_test_tsfel, y_pred_tsfel, average='weighted'))
print("Recall:", recall_score(y_test_tsfel, y_pred_tsfel, average='weighted'))
print("Confusion Matrix:\n", confusion_matrix(y_test_tsfel, y_pred_tsfel))


In [None]:

# X_provided = pd.read_csv('/kaggle/input/uci-har-dataset/UCI HAR Dataset/train/X_train.txt', delim_whitespace=True, header=None).values
X_train_prov, X_test_prov, y_train_prov, y_test_prov = train_test_split(X_reduced, y_true, test_size=0.2, random_state=42)

clf_prov = DecisionTreeClassifier(random_state=42)
clf_prov.fit(X_train_prov, y_train_prov)
y_pred_prov = clf_prov.predict(X_test_prov)

print("\n Provided Features :")
print("Accuracy:", accuracy_score(y_test_prov, y_pred_prov))
print("Precision:", precision_score(y_test_prov, y_pred_prov, average='weighted'))
print("Recall:", recall_score(y_test_prov, y_pred_prov, average='weighted'))
print("Confusion Matrix:\n", confusion_matrix(y_test_prov, y_pred_prov))


1. Raw data is noisy and high-dimensional, making it harder for the decision tree to identify class boundaries clearly.
2. TSFEL extracts meaningful temporal and statistical patterns (e.g., energy, mean, std), helping the decision tree         learn better rules.
3. These are precomputed time-domain and frequency-domain features, possibly less flexible than TSFEL’s dynamic             selection.

**Best Model: TSFEL-based model**

It slightly outperforms the one trained on provided features and vastly improves over raw signal-based classification.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# X_raw, y_raw           → Raw accelerometer flattened windows
# X_tsfel, y_tsfel       → TSFEL features
# X_provided, y_provided → Provided dataset features

y_raw=y_true
X_tsfel=Tsfel_Features
y_tsfel=y_true
y_provided=y_true

def evaluate_model_depths(X, y, label):
    accuracies = []
    for depth in range(2, 40,2):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        clf = DecisionTreeClassifier(max_depth=depth, random_state=42)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        accuracies.append(acc)
    return accuracies

depths = list(range(2, 40,2))

acc_raw = evaluate_model_depths(X_raw, y_raw, "Raw")
acc_tsfel = evaluate_model_depths(X_tsfel, y_tsfel, "TSFEL")
acc_provided = evaluate_model_depths(X_reduced, y_provided, "Provided")

# Plot
plt.figure(figsize=(10, 6))
plt.plot(depths, acc_raw, marker='o', label='Raw Accelerometer')
plt.plot(depths, acc_tsfel, marker='s', label='TSFEL Features')
plt.plot(depths, acc_provided, marker='^', label='Provided Features')
plt.xlabel("Tree Depth")
plt.ylabel("Test Accuracy")
plt.title("Decision Tree Accuracy vs Depth")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


| Feature Type           | Accuracy | Optimal Depth   | Insights                                      |
|------------------------|----------|-----------------|----------------------------------------------|
| Raw Accelerometer      | ~81%     | 20              | Noisy data, less informative                 |
| TSFEL Features         | ~95%     | 10              | Custom extracted features, very effective    |
| Provided Features      | ~95%     | 10              | Clean, domain-specific, very effective       |

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import tsfel
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# ---------- Load signals ----------
def load_signals(folder, signal_name):
    path = f"/kaggle/input/uci-har-dataset/UCI HAR Dataset/{folder}/Inertial Signals/{signal_name}"
    return np.loadtxt(path)

ax_train = load_signals('train', 'total_acc_x_train.txt')
ay_train = load_signals('train', 'total_acc_y_train.txt')
az_train = load_signals('train', 'total_acc_z_train.txt')
y_train = np.loadtxt("/kaggle/input/uci-har-dataset/UCI HAR Dataset/train/y_train.txt").astype(int)

ax_test = load_signals('test', 'total_acc_x_test.txt')
ay_test = load_signals('test', 'total_acc_y_test.txt')
az_test = load_signals('test', 'total_acc_z_test.txt')
y_test = np.loadtxt("/kaggle/input/uci-har-dataset/UCI HAR Dataset/test/y_test.txt").astype(int)

# ---------- Configure TSFEL ----------
cfg = tsfel.get_features_by_domain()

def extract_features_tsfel(signal_array, fs=50):  # for example, fs=50 Hz
    features_list = []
    for row in signal_array:
        df_row = pd.DataFrame(row)
        features = tsfel.time_series_features_extractor(cfg, df_row, fs=fs, verbose=0)
        feature_values = np.nan_to_num(features.values[0])
        features_list.append(feature_values)
    return np.array(features_list)


# Train features
feat_ax = extract_features_tsfel(ax_train)
feat_ay = extract_features_tsfel(ay_train)
feat_az = extract_features_tsfel(az_train)
X_train = np.hstack([feat_ax, feat_ay, feat_az])

# Test features
feat_ax_test = extract_features_tsfel(ax_test)
feat_ay_test = extract_features_tsfel(ay_test)
feat_az_test = extract_features_tsfel(az_test)
X_test = np.hstack([feat_ax_test, feat_ay_test, feat_az_test])

# ---------- Scale ----------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ---------- Train ----------
tree = DecisionTreeClassifier(random_state=42, max_depth=5)
tree.fit(X_train_scaled, y_train)
print("✅ Training completed.")

# ---------- Test accuracy ----------
y_pred = tree.predict(X_test_scaled)
print("Accuracy on official test set:", accuracy_score(y_test, y_pred))

# ---------- New collected data ----------
base_path = '/kaggle/input/d/dinesh168/collected-data/Collected data'
activities = ['Laying', 'Sitting', 'Standing', 'Walking', 'Walking_Downstairs', 'Walking_Upstairs']

label_map = {
    1: 'Walking',
    2: 'Walking_Upstairs',
    3: 'Walking_Downstairs',
    4: 'Sitting',
    5: 'Standing',
    6: 'Laying'
}

feature_list = []
true_labels = []
file_names = []

for activity in activities:
    path = os.path.join(base_path, activity)
    for file in os.listdir(path):
        df = pd.read_csv(os.path.join(path, file), header=None)
        df = df.apply(pd.to_numeric, errors='coerce')
        df.dropna(inplace=True)
        df.columns = ['acc_x', 'acc_y', 'acc_z'][:df.shape[1]]

        if len(df) < 128:
            continue
        window = df.iloc[:128]

        tsfel_features_all_axes = []
        for axis in ['acc_x', 'acc_y', 'acc_z']:
            sig_df = pd.DataFrame(window[axis].values)
            tsfel_feat_df = tsfel.time_series_features_extractor(cfg, sig_df, verbose=0)
            tsfel_feat = np.nan_to_num(tsfel_feat_df.values.flatten())
            tsfel_features_all_axes.append(tsfel_feat)

        final_features = np.concatenate(tsfel_features_all_axes)
        feature_list.append(final_features)
        true_labels.append(activity)
        file_names.append(file)

# ---------- Predict ----------
X_new = np.array(feature_list)
X_new_scaled = scaler.transform(X_new)
y_pred_new = tree.predict(X_new_scaled)

# ---------- Show results ----------
for fname, pred, true in zip(file_names, y_pred_new, true_labels):
    pred_label = label_map.get(pred, "Unknown")
    print(f" {fname}: Predicted = {pred_label}, Actual = {true}")


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Convert y_pred_new integers to string labels
y_pred_str = [label_map.get(pred, "Unknown") for pred in y_pred_new]
y_true_str = true_labels

# Calculate metrics
acc = accuracy_score(y_true_str, y_pred_str)
prec = precision_score(y_true_str, y_pred_str, average='weighted', zero_division=0)
rec = recall_score(y_true_str, y_pred_str, average='weighted', zero_division=0)
f1 = f1_score(y_true_str, y_pred_str, average='weighted', zero_division=0)
conf_mat = confusion_matrix(y_true_str, y_pred_str, labels=list(label_map.values()))

print("✅ Metrics on collected data:")
print(f"Accuracy: {acc:.4f}")
print(f"Precision (weighted): {prec:.4f}")
print(f"Recall (weighted): {rec:.4f}")
print(f"F1-score (weighted): {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_mat)

print("\nDetailed Classification Report:")
print(classification_report(y_true_str, y_pred_str, zero_division=0))


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tsfel

# ---------- Load UCI HAR signals ----------
def load_signals(folder, signal_name):
    path = f"/kaggle/input/uci-har-dataset/UCI HAR Dataset/{folder}/Inertial Signals/{signal_name}"
    return np.loadtxt(path)

ax_train = load_signals('train', 'total_acc_x_train.txt')
ay_train = load_signals('train', 'total_acc_y_train.txt')
az_train = load_signals('train', 'total_acc_z_train.txt')
y_train = np.loadtxt("/kaggle/input/uci-har-dataset/UCI HAR Dataset/train/y_train.txt").astype(int)

# ---------- Configure TSFEL ----------
cfg = tsfel.get_features_by_domain()

def extract_features_tsfel(signal_array, fs=50):  # for example, fs=50 Hz
    features_list = []
    for row in signal_array:
        df_row = pd.DataFrame(row)
        features = tsfel.time_series_features_extractor(cfg, df_row, fs=fs, verbose=0)
        feature_values = np.nan_to_num(features.values[0])
        features_list.append(feature_values)
    return np.array(features_list)

feat_ax = extract_features_tsfel(ax_train)
feat_ay = extract_features_tsfel(ay_train)
feat_az = extract_features_tsfel(az_train)
X_har = np.hstack([feat_ax, feat_ay, feat_az])
y_har = y_train

import os

base_path = '/kaggle/input/d/dinesh168/collected-data/Collected data'
activities = ['Laying', 'Sitting', 'Standing', 'Walking', 'Walking_Downstairs', 'Walking_Upstairs']

label_reverse_map = {
    'Walking': 1,
    'Walking_Upstairs': 2,
    'Walking_Downstairs': 3,
    'Sitting': 4,
    'Standing': 5,
    'Laying': 6
}

feature_list = []
label_list = []

for activity in activities:
    path = os.path.join(base_path, activity)
    for file in os.listdir(path):
        df = pd.read_csv(os.path.join(path, file), header=None)
        df = df.apply(pd.to_numeric, errors='coerce')
        df.dropna(inplace=True)
        df.columns = ['acc_x', 'acc_y', 'acc_z'][:df.shape[1]]

        if len(df) < 128:
            continue

        window = df.iloc[:128]
        tsfel_features_all_axes = []
        for axis in ['acc_x', 'acc_y', 'acc_z']:
            sig_df = pd.DataFrame(window[axis].values)
            tsfel_feat_df = tsfel.time_series_features_extractor(cfg, sig_df, verbose=0)
            tsfel_feat = np.nan_to_num(tsfel_feat_df.values.flatten())
            tsfel_features_all_axes.append(tsfel_feat)

        final_features = np.concatenate(tsfel_features_all_axes)
        feature_list.append(final_features)
        label_list.append(label_reverse_map[activity])

X_collected = np.array(feature_list)
y_collected = np.array(label_list)

import os

base_path = '/kaggle/input/d/dinesh168/collected-data/Collected data'
activities = ['Laying', 'Sitting', 'Standing', 'Walking', 'Walking_Downstairs', 'Walking_Upstairs']

label_reverse_map = {
    'Walking': 1,
    'Walking_Upstairs': 2,
    'Walking_Downstairs': 3,
    'Sitting': 4,
    'Standing': 5,
    'Laying': 6
}

feature_list = []
label_list = []

for activity in activities:
    path = os.path.join(base_path, activity)
    for file in os.listdir(path):
        df = pd.read_csv(os.path.join(path, file), header=None)
        df = df.apply(pd.to_numeric, errors='coerce')
        df.dropna(inplace=True)
        df.columns = ['acc_x', 'acc_y', 'acc_z'][:df.shape[1]]

        if len(df) < 128:
            continue

        window = df.iloc[:128]
        tsfel_features_all_axes = []
        for axis in ['acc_x', 'acc_y', 'acc_z']:
            sig_df = pd.DataFrame(window[axis].values)
            tsfel_feat_df = tsfel.time_series_features_extractor(cfg, sig_df, verbose=0)
            tsfel_feat = np.nan_to_num(tsfel_feat_df.values.flatten())
            tsfel_features_all_axes.append(tsfel_feat)

        final_features = np.concatenate(tsfel_features_all_axes)
        feature_list.append(final_features)
        label_list.append(label_reverse_map[activity])

X_collected = np.array(feature_list)
y_collected = np.array(label_list)

from sklearn.utils import shuffle

# Combine
X_all = np.vstack([X_har, X_collected])
y_all = np.hstack([y_har, y_collected])

# Shuffle
X_all, y_all = shuffle(X_all, y_all, random_state=42)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Scale
scaler = StandardScaler()
X_all_scaled = scaler.fit_transform(X_all)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_all_scaled, y_all, test_size=0.2, random_state=42)

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

tree = DecisionTreeClassifier(random_state=42, max_depth=5)
print("Training started...")
tree.fit(X_train, y_train)
print(" Training completed.")

y_pred = tree.predict(X_test)

print("\n Metrics after combining & shuffling:")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}")
print(f"Recall: {recall_score(y_test, y_pred, average='weighted'):.4f}")
print("\nDetailed Report:")
print(classification_report(y_test, y_pred, zero_division=0))
