In [1]:
import pandas as pd
import numpy as np

# Loading the boning and slicing datasets
boning_df = pd.read_csv("Boning.csv")
slicing_df = pd.read_csv("Slicing.csv")

# Defining the correct column names based on the file
correct_columns = [
    'Frame',
    'Right Lower Leg x', 'Right Lower Leg y', 'Right Lower Leg z',
    'Left Lower Leg x', 'Left Lower Leg y', 'Left Lower Leg z'
]

# Extract and label
boning_selected = boning_df[correct_columns].copy()
slicing_selected = slicing_df[correct_columns].copy()

boning_selected['class'] = 0
slicing_selected['class'] = 1

# Combining 
combined_df = pd.concat([boning_selected, slicing_selected], ignore_index=True)

combined_df.to_csv("combined_data.csv", index=False)
combined_df.head()


Unnamed: 0,Frame,Right Lower Leg x,Right Lower Leg y,Right Lower Leg z,Left Lower Leg x,Left Lower Leg y,Left Lower Leg z,class
0,0,0.219806,0.236238,0.03205,0.121231,-0.079631,0.02069,0
1,1,0.152939,0.516317,0.034023,0.094984,0.134901,0.073241,0
2,2,0.009686,0.089548,-0.045127,0.028965,0.138562,0.047474,0
3,3,-0.052419,-0.036543,0.108497,-0.063305,0.215892,0.038873,0
4,4,0.065916,-0.276617,0.053206,0.077462,0.142185,0.04401,0


In [2]:
# Step 2: Create Composite Columns
composite_df = combined_df.copy()

# Right Lower Leg acceleration columns
rx = composite_df['Right Lower Leg x']
ry = composite_df['Right Lower Leg y']
rz = composite_df['Right Lower Leg z']

# Left Lower Leg acceleration columns
lx = composite_df['Left Lower Leg x']
ly = composite_df['Left Lower Leg y']
lz = composite_df['Left Lower Leg z']

# Composite features for Right Lower Leg
composite_df['R_rms_xy'] = np.sqrt(rx**2 + ry**2)
composite_df['R_rms_yz'] = np.sqrt(ry**2 + rz**2)
composite_df['R_rms_zx'] = np.sqrt(rz**2 + rx**2)
composite_df['R_rms_xyz'] = np.sqrt(rx**2 + ry**2 + rz**2)
composite_df['R_roll'] = np.degrees(np.arctan2(ry, np.sqrt(rx**2 + rz**2)))
composite_df['R_pitch'] = np.degrees(np.arctan2(rx, np.sqrt(ry**2 + rz**2)))

# Composite features for Left Lower Leg
composite_df['L_rms_xy'] = np.sqrt(lx**2 + ly**2)
composite_df['L_rms_yz'] = np.sqrt(ly**2 + lz**2)
composite_df['L_rms_zx'] = np.sqrt(lz**2 + lx**2)
composite_df['L_rms_xyz'] = np.sqrt(lx**2 + ly**2 + lz**2)
composite_df['L_roll'] = np.degrees(np.arctan2(ly, np.sqrt(lx**2 + lz**2)))
composite_df['L_pitch'] = np.degrees(np.arctan2(lx, np.sqrt(ly**2 + lz**2)))

composite_df.to_csv("composite_data.csv", index=False)
composite_df.head()


Unnamed: 0,Frame,Right Lower Leg x,Right Lower Leg y,Right Lower Leg z,Left Lower Leg x,Left Lower Leg y,Left Lower Leg z,class,R_rms_xy,R_rms_yz,R_rms_zx,R_rms_xyz,R_roll,R_pitch,L_rms_xy,L_rms_yz,L_rms_zx,L_rms_xyz,L_roll,L_pitch
0,0,0.219806,0.236238,0.03205,0.121231,-0.079631,0.02069,0,0.322681,0.238402,0.22213,0.324269,46.762893,42.675967,0.145046,0.082275,0.122984,0.146514,-32.922799,55.836663
1,1,0.152939,0.516317,0.034023,0.094984,0.134901,0.073241,0,0.538492,0.517436,0.156678,0.539565,73.119439,16.466129,0.164985,0.153501,0.119942,0.180511,48.359265,31.748534
2,2,0.009686,0.089548,-0.045127,0.028965,0.138562,0.047474,0,0.090071,0.100276,0.046155,0.100743,62.732545,5.516979,0.141557,0.146469,0.055612,0.149306,68.131727,11.186103
3,3,-0.052419,-0.036543,0.108497,-0.063305,0.215892,0.038873,0,0.0639,0.114486,0.120496,0.125916,-16.871233,-24.601347,0.224982,0.219364,0.074288,0.228316,71.011809,-16.097374
4,4,0.065916,-0.276617,0.053206,0.077462,0.142185,0.04401,0,0.284363,0.281688,0.084711,0.289297,-72.973446,13.170542,0.161917,0.148841,0.089091,0.167791,57.929271,27.494177


In [3]:
from scipy.integrate import simps
from scipy.signal import find_peaks

# Columns to compute features for (excluding Frame and class)
feature_columns = composite_df.columns.difference(['Frame', 'class'])

# Number of frames per minute
frames_per_minute = 60

# List to store computed features for each window
feature_data = []

# Process each 60-frame window
for start in range(0, len(composite_df), frames_per_minute):
    end = start + frames_per_minute
    if end > len(composite_df):
        break  # skip incomplete window at the end

    window = composite_df.iloc[start:end]
    class_label = window['class'].mode()[0]  # Majority label in window

    stats = []
    for col in feature_columns:
        series = window[col]
        stats.append(series.mean())
        stats.append(series.std())
        stats.append(series.min())
        stats.append(series.max())
        stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
        peaks, _ = find_peaks(series)
        stats.append(len(peaks))

    stats.append(class_label)
    feature_data.append(stats)

# Create DataFrame
stat_columns = []
for col in feature_columns:
    stat_columns.extend([
        f'{col}_mean', f'{col}_std', f'{col}_min',
        f'{col}_max', f'{col}_auc', f'{col}_peaks'
    ])
stat_columns.append('class')

final_feature_df = pd.DataFrame(feature_data, columns=stat_columns)

print(final_feature_df)

final_feature_df.to_csv("final_data.csv", index=False)
final_feature_df.head()



  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson’s rule
  stats.append(simps(np.abs(series), dx=1))  # AUC using Simpson

      L_pitch_mean  L_pitch_std  L_pitch_min  L_pitch_max  L_pitch_auc  \
0        -1.971529    40.533459   -85.119584    69.454562  1894.393983   
1        -3.871032    34.651394   -79.312539    59.906913  1782.251509   
2        -3.057594    35.480822   -75.953481    72.361558  1644.820707   
3        -9.059023    34.390404   -73.581337    71.940919  1543.548588   
4        -5.703265    31.753086   -80.769895    79.588358  1396.801606   
...            ...          ...          ...          ...          ...   
1196      8.067096    48.961397   -86.093522    78.550691  2612.527795   
1197     -3.391983    49.448505   -83.144075    88.664941  2530.994416   
1198     -5.634771    57.967050   -87.821547    76.225757  3148.851115   
1199    -17.493565    40.888678   -83.103686    82.524991  2229.164801   
1200     10.520722    47.098525   -71.262013    86.422047  2433.967661   

      L_pitch_peaks  L_rms_xy_mean  L_rms_xy_std  L_rms_xy_min  L_rms_xy_max  \
0                16       0.373

Unnamed: 0,L_pitch_mean,L_pitch_std,L_pitch_min,L_pitch_max,L_pitch_auc,L_pitch_peaks,L_rms_xy_mean,L_rms_xy_std,L_rms_xy_min,L_rms_xy_max,...,Right Lower Leg y_max,Right Lower Leg y_auc,Right Lower Leg y_peaks,Right Lower Leg z_mean,Right Lower Leg z_std,Right Lower Leg z_min,Right Lower Leg z_max,Right Lower Leg z_auc,Right Lower Leg z_peaks,class
0,-1.971529,40.533459,-85.119584,69.454562,1894.393983,16,0.373374,0.497637,0.02384,3.364471,...,0.801465,17.576063,12,0.012193,0.106959,-0.2882,0.334382,4.929254,20,0
1,-3.871032,34.651394,-79.312539,59.906913,1782.251509,13,2.259368,1.73818,0.179505,6.858831,...,10.669277,115.317105,13,0.074931,0.572076,-2.037136,2.34074,22.090863,15,0
2,-3.057594,35.480822,-75.953481,72.361558,1644.820707,14,2.77579,2.350352,0.124959,9.724431,...,5.657845,83.070376,11,-0.173463,0.821534,-2.765553,1.563758,34.308116,13,0
3,-9.059023,34.390404,-73.581337,71.940919,1543.548588,13,1.137234,0.853194,0.15415,3.745403,...,2.811169,42.738119,12,0.06848,0.787934,-1.372357,2.382665,36.960166,11,0
4,-5.703265,31.753086,-80.769895,79.588358,1396.801606,15,1.62628,0.92904,0.330312,3.680735,...,1.814754,40.66036,15,-0.011528,0.812549,-1.596494,1.650904,36.344716,10,0


In [42]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, StratifiedKFold
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

# Prepare features and labels
X = final_feature_df.drop(columns=['class'])
y = final_feature_df['class']

# Strategy 1: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# Strategy 1: Train basic SVM
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm_basic = SVC()
svm_basic.fit(X_train_scaled, y_train)
y_pred_basic = svm_basic.predict(X_test_scaled)
acc_basic = accuracy_score(y_test, y_pred_basic)

# Strategy 2: 10-Fold Cross-Validation
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
svm_cv = SVC()
scores_cv = cross_val_score(svm_cv, X, y, cv=cv)
acc_cv = scores_cv.mean()

# Strategy 3: Hyperparameter Tuning + CV
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['rbf', 'linear'],
    'gamma': ['scale', 'auto']
}
grid_svm = GridSearchCV(SVC(), param_grid, cv=cv)
grid_svm.fit(X_train_scaled, y_train)
acc_grid = grid_svm.score(X_test_scaled, y_test)

# Initial results
svm_results = {
    "SVM Strategy": ["Train-Test Split", "10-Fold CV", "Hyperparameter Tuning"],
    "Accuracy": [acc_basic, acc_cv, acc_grid]
}

svm_results_df = pd.DataFrame(svm_results)

print(svm_results_df)



            SVM Strategy  Accuracy
0       Train-Test Split  0.925208
1             10-Fold CV  0.861763
2  Hyperparameter Tuning  0.925208


In [43]:
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA

# Strategy 4: Top 10 Features with Hyperparameter Tuning
# Select top 10 features
selector = SelectKBest(score_func=f_classif, k=10)
X_train_kbest = selector.fit_transform(X_train_scaled, y_train)
X_test_kbest = selector.transform(X_test_scaled)

# Train SVM with best features
grid_svm_kbest = GridSearchCV(SVC(), param_grid, cv=cv)
grid_svm_kbest.fit(X_train_kbest, y_train)
acc_kbest = grid_svm_kbest.score(X_test_kbest, y_test)

# Strategy 5: PCA with 10 Components
pca = PCA(n_components=10)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Train SVM with PCA features
grid_svm_pca = GridSearchCV(SVC(), param_grid, cv=cv)
grid_svm_pca.fit(X_train_pca, y_train)
acc_pca = grid_svm_pca.score(X_test_pca, y_test)

# Update with previous 3 results
svm_results["SVM Strategy"].extend(["Top 10 Features", "PCA (10 components)"])
svm_results["Accuracy"].extend([acc_kbest, acc_pca])

svm_results_df = pd.DataFrame(svm_results)

print(svm_results_df)



            SVM Strategy  Accuracy
0       Train-Test Split  0.925208
1             10-Fold CV  0.861763
2  Hyperparameter Tuning  0.925208
3        Top 10 Features  0.878116
4    PCA (10 components)  0.919668


In [44]:
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

# Common classifier training with same train-test split and scaled data
# SGD
sgd = SGDClassifier(random_state=42)
sgd.fit(X_train_scaled, y_train)
acc_sgd = sgd.score(X_test_scaled, y_test)

# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_scaled, y_train)
acc_rf = rf.score(X_test_scaled, y_test)

# MLP
mlp = MLPClassifier(max_iter=1000, random_state=42)
mlp.fit(X_train_scaled, y_train)
acc_mlp = mlp.score(X_test_scaled, y_test)

# Prepare results
other_results = {
    "Model": ["SGD Classifier", "Random Forest", "MLP Classifier"],
    "Accuracy": [acc_sgd, acc_rf, acc_mlp]
}

other_results_df = pd.DataFrame(other_results)

print(other_results_df)



            Model  Accuracy
0  SGD Classifier  0.883657
1   Random Forest  0.916898
2  MLP Classifier  0.925208
