Studio 3

Step 1: Data Collection

In [2]:
import pandas as pd  
import numpy as np
 
boning_data = pd.read_csv('./ampc2/Boning.csv') 
slicing_data = pd.read_csv('./ampc2/Slicing.csv') 

columns_to_use = ['Frame', 'Neck x', 'Neck y', 'Neck z', 'Head x', 'Head y', 'Head z'] 

boning_subset = boning_data[columns_to_use].copy() 
boning_subset['class'] = 0 

slicing_subset = slicing_data[columns_to_use].copy() 
slicing_subset['class'] = 1 

combined_data = pd.concat([boning_subset, slicing_subset], ignore_index=True) 

print(combined_data.head()) 
print(combined_data.shape)

   Frame    Neck x    Neck y    Neck z    Head x    Head y    Head z  class
0      0  0.207796  0.127939 -0.175130  0.376399  0.202993 -0.182585      0
1      1 -0.006589  0.356974  0.286768  0.204439  0.521502  0.198235      0
2      2  0.112606  0.043502  0.104975  0.021196  0.197390  0.165812      0
3      3 -0.031866  0.037024  0.131005 -0.157759  0.118886  0.201893      0
4      4  0.135369  0.019024  0.115650  0.011714  0.096737  0.107186      0
(72060, 8)


Step 2: Create Composite columns 

In [7]:
def rms(x, y): 
    return np.sqrt(x**2 + y**2) 

def roll(x, y, z): 
    return np.arctan2(y, np.sqrt(x**2 + z**2)) * 180 / np.pi 

def pitch(x, y, z): 
    return np.arctan2(x, np.sqrt(y**2 + z**2)) * 180 / np.pi  

combined_data['rms_xy_neck'] = rms(combined_data['Neck x'], combined_data['Neck y'])  
combined_data['rms_yz_neck'] = rms(combined_data['Neck y'], combined_data['Neck z'])   
combined_data['rms_zx_neck'] = rms(combined_data['Neck z'], combined_data['Neck x'])   
combined_data['rms_xyz_neck'] = np.sqrt(combined_data['Neck x'] ** 2 + combined_data['Neck y'] ** 2 + combined_data['Neck z'])  
combined_data['roll_neck'] = roll(combined_data['Neck x'], combined_data['Neck y'], combined_data['Neck z']) 
combined_data['pitch_neck'] = pitch(combined_data['Neck x'], combined_data['Neck y'], combined_data['Neck z']) 

combined_data['rms_xy_head'] = rms(combined_data['Head x'], combined_data['Head y'])  
combined_data['rms_yz_head'] = rms(combined_data['Head y'], combined_data['Head z'])   
combined_data['rms_zx_head'] = rms(combined_data['Head z'], combined_data['Head x'])   
combined_data['rms_xyz_head'] = np.sqrt(combined_data['Head x'] ** 2 + combined_data['Head y'] ** 2 + combined_data['Head z'])  
combined_data['roll_head'] = roll(combined_data['Head x'], combined_data['Head y'], combined_data['Head z']) 
combined_data['pitch_head'] = pitch(combined_data['Head x'], combined_data['Head y'], combined_data['Head z'])  

print(combined_data.head()) 
print(combined_data.shape)


   Frame    Neck x    Neck y    Neck z    Head x    Head y    Head z  class  \
0      0  0.207796  0.127939 -0.175130  0.376399  0.202993 -0.182585      0   
1      1 -0.006589  0.356974  0.286768  0.204439  0.521502  0.198235      0   
2      2  0.112606  0.043502  0.104975  0.021196  0.197390  0.165812      0   
3      3 -0.031866  0.037024  0.131005 -0.157759  0.118886  0.201893      0   
4      4  0.135369  0.019024  0.115650  0.011714  0.096737  0.107186      0   

   rms_xy_neck  rms_yz_neck  rms_zx_neck  rms_xyz_neck  roll_neck  pitch_neck  \
0     0.244023     0.216884     0.271753           NaN  25.210615   43.774063   
1     0.357035     0.457894     0.286844      0.643616  51.216647   -0.824380   
2     0.120717     0.113631     0.153948      0.345756  15.778909   44.740429   
3     0.048849     0.136136     0.134825      0.365227  15.355297  -13.174411   
4     0.136699     0.117204     0.178043      0.366519   6.098993   49.113563   

   rms_xy_head  rms_yz_head  rms_zx_he

  result = getattr(ufunc, method)(*inputs, **kwargs)


Step 3: Data pre-processing

In [19]:
import numpy as np
from scipy.signal import find_peaks

def compute_features(group):
    features = {}
    for col in group.columns:
        if col not in ['frame', 'class']:
            features[f'{col}_mean'] = group[col].mean()
            features[f'{col}_std'] = group[col].std()
            features[f'{col}_min'] = group[col].min()
            features[f'{col}_max'] = group[col].max()
            features[f'{col}_auc'] = np.trapz(group[col])  # Using numpy's trapz instead of simps
            features[f'{col}_peaks'] = len(find_peaks(group[col])[0]) 
            
    # Preserve the class label (assuming it's consistent within each group)
    features['class'] = group['class'].iloc[0]
    return pd.Series(features)

# Group by minutes (60 frames) and compute features
feature_data = combined_data.groupby(combined_data.index // 60).apply(compute_features).reset_index(drop=True)
feature_data['class'] = combined_data.groupby(combined_data.index // 60)['class'].first()

print(feature_data.head())
print(feature_data.shape)

   Frame_mean  Frame_std  Frame_min  Frame_max  Frame_auc  Frame_peaks  \
0        29.5  17.464249        0.0       59.0     1740.5          0.0   
1        89.5  17.464249       60.0      119.0     5280.5          0.0   
2       149.5  17.464249      120.0      179.0     8820.5          0.0   
3       209.5  17.464249      180.0      239.0    12360.5          0.0   
4       269.5  17.464249      240.0      299.0    15900.5          0.0   

   Neck x_mean  Neck x_std  Neck x_min  Neck x_max  ...  Head y_max  \
0     0.058447    0.258289   -0.595560    0.653929  ...    2.183209   
1    -0.096646    0.688439   -1.595246    2.575807  ...    1.228834   
2    -0.028674    1.078347   -2.423520    2.001616  ...    1.523143   
3     0.165025    1.015397   -2.141650    2.492493  ...    2.855687   
4    -0.146506    1.349297   -3.680950    3.637092  ...    4.772228   

   Head y_auc  Head y_peaks  Head z_mean  Head z_std  Head z_min  Head z_max  \
0    2.208231          11.0    -0.289862    1.96

Step 4: Training

In [20]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

# Prepare data
X = feature_data.drop('class', axis=1)
y = feature_data['class']

# Function to train and evaluate SVM
def train_evaluate_svm(X, y, cv=10, param_grid=None, feature_selection=None):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    pipeline_steps = [
        ('scaler', StandardScaler()),
        ('svm', SVC(random_state=42))
    ]
    
    if feature_selection == 'selectk':
        pipeline_steps.insert(1, ('feature_selection', SelectKBest(f_classif, k=10)))
    elif feature_selection == 'pca':
        pipeline_steps.insert(1, ('pca', PCA(n_components=10)))
    
    pipeline = Pipeline(pipeline_steps)
    
    if param_grid:
        grid_search = GridSearchCV(pipeline, param_grid, cv=cv, n_jobs=-1)
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
    else:
        best_model = pipeline.fit(X_train, y_train)
    
    train_score = best_model.score(X_train, y_train)
    test_score = best_model.score(X_test, y_test)
    cv_score = cross_val_score(best_model, X, y, cv=cv).mean()
    
    return train_score, test_score, cv_score

# Train different SVM models
svm_results = []

# Basic SVM
svm_results.append(train_evaluate_svm(X, y))

# SVM with hyperparameter tuning
param_grid = {'svm__C': [0.1, 1, 10], 'svm__kernel': ['rbf', 'linear']}
svm_results.append(train_evaluate_svm(X, y, param_grid=param_grid))

# SVM with hyperparameter tuning and 10 best features
svm_results.append(train_evaluate_svm(X, y, param_grid=param_grid, feature_selection='selectk'))

# SVM with hyperparameter tuning and 10 principal components
svm_results.append(train_evaluate_svm(X, y, param_grid=param_grid, feature_selection='pca'))

# Print SVM results
for i, result in enumerate(svm_results):
    print(f"SVM Model {i+1}: Train Score: {result[0]:.3f}, Test Score: {result[1]:.3f}, CV Score: {result[2]:.3f}")

# Train and evaluate other models
def train_evaluate_model(model, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    pipeline = Pipeline([('scaler', StandardScaler()), ('model', model)])
    pipeline.fit(X_train, y_train)
    train_score = pipeline.score(X_train, y_train)
    test_score = pipeline.score(X_test, y_test)
    cv_score = cross_val_score(pipeline, X, y, cv=10).mean()
    return train_score, test_score, cv_score

models = [
    ('SGD', SGDClassifier(random_state=42)),
    ('RandomForest', RandomForestClassifier(random_state=42)),
    ('MLP', MLPClassifier(random_state=42))
]

for name, model in models:
    result = train_evaluate_model(model, X, y)
    print(f"{name}: Train Score: {result[0]:.3f}, Test Score: {result[1]:.3f}, CV Score: {result[2]:.3f}")

  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw


SVM Model 1: Train Score: 0.931, Test Score: 0.881, CV Score: 0.860
SVM Model 2: Train Score: 0.931, Test Score: 0.881, CV Score: 0.860
SVM Model 3: Train Score: 0.911, Test Score: 0.881, CV Score: 0.825
SVM Model 4: Train Score: 0.904, Test Score: 0.881, CV Score: 0.851
SGD: Train Score: 0.890, Test Score: 0.850, CV Score: 0.849
RandomForest: Train Score: 1.000, Test Score: 0.909, CV Score: 0.805




MLP: Train Score: 0.987, Test Score: 0.875, CV Score: 0.864


