In [16]:
import pandas as pd

# Define the CSV path
csv_path = r'C:\Users\ADMIN\Downloads\ampc2\\'

# Load the CSV files
df1 = pd.read_csv(csv_path + 'Boning.csv')
df2 = pd.read_csv(csv_path + 'Slicing.csv')

# Select relevant columns from each DataFrame
df1_selected = df1[['Frame', 'Right Hand x', 'Right Hand y', 'Right Hand z']].copy()
df2_selected = df2[['Frame', 'Left Hand x', 'Left Hand y', 'Left Hand z']].copy()

# Add class column to each selected DataFrame
df1_selected['class'] = 0  # Boning
df2_selected['class'] = 1  # Slicing

# Rename columns to have a common structure, excluding the separate class column
df1_selected.columns = ['Frame', 'Right Hand x', 'Right Hand y', 'Right Hand z', 'class']
df2_selected.columns = ['Frame', 'Left Hand x', 'Left Hand y', 'Left Hand z', 'class']

combined_df =pd.concat([df1_selected,df2_selected], ignore_index=True)

#Move class columns to the end
cols = combined_df.columns.tolist()
cols.append(cols.pop(cols.index('class'))) 
combined_df = combined_df[cols]

#Save the data collection into a new csv
combined_df.to_csv('portfolio3.csv', index=False)



In [2]:
import numpy as np

combined_df['Right Hand RMS x,y'] = np.sqrt((combined_df['Right Hand x']**2) + (combined_df['Right Hand y']**2) )
combined_df['Right Hand RMS y,z'] = np.sqrt((combined_df['Right Hand y']**2) + (combined_df['Right Hand z']**2) )
combined_df['Right Hand RMS z,x'] = np.sqrt((combined_df['Right Hand z']**2) + (combined_df['Right Hand x']**2) )
combined_df['Right Hand RMS x,y,z'] = np.sqrt((combined_df['Right Hand x']**2) + (combined_df['Right Hand y']**2) + (combined_df['Right Hand z']**2))

In [3]:
combined_df['Right Hand Roll'] = 180 * np.arctan2(combined_df['Right Hand y'], np.sqrt(combined_df['Right Hand x']**2 + combined_df['Right Hand z']**2)) / np.pi
combined_df['Right Hand Pitch'] = 180 * np.arctan2(combined_df['Right Hand x'], np.sqrt(combined_df['Right Hand y']**2 + combined_df['Right Hand z']**2)) / np.pi

In [4]:
combined_df['Left Hand RMS x,y'] = np.sqrt((combined_df['Left Hand x']**2) + (combined_df['Left Hand y']**2) )
combined_df['Left Hand RMS y,z'] = np.sqrt((combined_df['Left Hand y']**2) + (combined_df['Left Hand z']**2) )
combined_df['Left Hand RMS z,x'] = np.sqrt((combined_df['Left Hand z']**2) + (combined_df['Left Hand x']**2) )
combined_df['Left Hand RMS x,y,z'] = np.sqrt((combined_df['Left Hand x']**2) + (combined_df['Left Hand y']**2) + (combined_df['Left Hand z']**2))

In [5]:
combined_df['Left Hand Roll'] = 180 * np.arctan2(combined_df['Left Hand y'], np.sqrt(combined_df['Left Hand x']**2 + combined_df['Left Hand z']**2)) / np.pi
combined_df['Left Hand Pitch'] = 180 * np.arctan2(combined_df['Left Hand x'], np.sqrt(combined_df['Left Hand y']**2 + combined_df['Left Hand z']**2)) / np.pi

In [6]:
cols = combined_df.columns.tolist()
cols.append(cols.pop(cols.index('class'))) 
combined_df = combined_df[cols]

combined_df.to_csv('portfolio3.csv', index=False)

In [7]:
import pandas as pd
import numpy as np
from scipy.integrate import simpson
from scipy.signal import find_peaks


In [8]:
combined_df['Minute'] = combined_df.index //60
feature_list = []
columns_to_compute = combined_df.columns[1:19]
for column in columns_to_compute:
    grouped = combined_df.groupby('Minute')[column]
    means = grouped.mean()
    stds = grouped.std()
    mins = grouped.min()
    maxs = grouped.max()
    aucs = grouped.apply(lambda x: simpson(x, dx=1) if len(x) > 1 else 0)
    peaks_counts = grouped.apply(lambda x: len(find_peaks(x)[0]))

    feature_df = pd.DataFrame({
            'Mean': means,
            'Std': stds,
            'Min': mins,
            'Max': maxs,
            'AUC': aucs,
            'Peaks': peaks_counts
        })


    feature_df.columns = [f"{column}_{feature}" for feature in feature_df.columns]  # Rename columns
    feature_list.append(feature_df)

final_features = pd.concat(feature_list, axis=1)

class_column = combined_df.groupby('Minute')['class'].first()

final_features['class'] = class_column.values

final_features.reset_index(drop=True, inplace=True)

final_features.to_csv('portfolio3.csv', index=False)

In [18]:
# Separate the features (X) and the target variable (y)
data=pd.read_csv('portfolio3.csv')

X = data.drop('class', axis=1)  # Features
y = data['class']  # Target variable (class)

# Fill any missing values in X with the mean of the respective columns
X.fillna(X.mean(), inplace=True)

# Import necessary modules
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split the data into training and testing sets (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

results = {
    'Scenario': [],
    'Accuracy': []
}

# Train an SVM model
clf = svm.SVC()
clf.fit(X_train, y_train)

# Predict the target for the test set
y_pred = clf.predict(X_test)

# Calculate the accuracy of the model
accuracy_split = accuracy_score(y_test, y_pred)
results['Scenario'].append('Train-Test split (70/30)')
results['Accuracy'].append(accuracy_split)

summary_df = pd.DataFrame(results)
summary_df


Unnamed: 0,Scenario,Accuracy
0,Train-Test split (70/30),0.746276


In [10]:
from sklearn.model_selection import GridSearchCV

# Define a parameter grid to tune the SVM model
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'gamma': [1, 0.1, 0.01, 0.001],  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
    'kernel': ['rbf', 'linear']  # Kernel type to be used in the algorithm
}

# Create a GridSearchCV object to search for the best parameters
grid_search = GridSearchCV(svm.SVC(), param_grid, refit=True, verbose=1)

# Fit the model with the training data
grid_search.fit(X_train, y_train)

# Print the best parameters found by GridSearchCV
best_params = grid_search.best_params_

# Predict using the best model found
y_pred_best = grid_search.predict(X_test)

# Calculate accuracy of the tuned model
accuracy_best = accuracy_score(y_test, y_pred_best)

best_params, accuracy_best


Fitting 5 folds for each of 32 candidates, totalling 160 fits


({'C': 0.1, 'gamma': 1, 'kernel': 'linear'}, 1.0)

In [11]:
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score

results = {
    'Scenario': [],
    'Accuracy': []
}

cv_scores = cross_val_score(clf, X, y, cv=10)
cv_accuracy = cv_scores.mean()
results['Scenario'].append('10-fold cross-validation (default)')
results['Accuracy'].append(cv_accuracy)

In [12]:
# Select the top 10 features
selector = SelectKBest(f_classif, k=10)
X_new = selector.fit_transform(X, y)

# Train-test split
X_train_new, X_test_new, y_train_new, y_test_new = train_test_split(X_new, y, test_size=0.3, random_state=1)
clf_new = svm.SVC(C=0.1, gamma=1, kernel='linear')
clf_new.fit(X_train_new, y_train_new)
y_pred_new = clf_new.predict(X_test_new)
accuracy_split_tuned_feat = accuracy_score(y_test_new, y_pred_new)
results['Scenario'].append('Train-Test split with tuning and top 10 features')
results['Accuracy'].append(accuracy_split_tuned_feat)

# 10-fold cross-validation
cv_scores_tuned_feat = cross_val_score(clf_new, X_new, y, cv=10)
cv_accuracy_tuned_feat = cv_scores_tuned_feat.mean()
results['Scenario'].append('10-fold cross-validation with tuning and top 10 features')
results['Accuracy'].append(cv_accuracy_tuned_feat)

In [13]:
# Apply PCA to get the top 10 principal components
pca = PCA(n_components=10)
X_pca = pca.fit_transform(X)

# Train-test split
X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(X_pca, y, test_size=0.3, random_state=1)
clf_pca = svm.SVC(C=0.1, gamma=1, kernel='linear')
clf_pca.fit(X_train_pca, y_train_pca)
y_pred_pca = clf_pca.predict(X_test_pca)
accuracy_split_tuned_pca = accuracy_score(y_test_pca, y_pred_pca)
results['Scenario'].append('Train-Test split with tuning and top 10 principal components')
results['Accuracy'].append(accuracy_split_tuned_pca)

# 10-fold cross-validation
cv_scores_tuned_pca = cross_val_score(clf_pca, X_pca, y, cv=10)
cv_accuracy_tuned_pca = cv_scores_tuned_pca.mean()
results['Scenario'].append('10-fold cross-validation with tuning and top 10 principal components')
results['Accuracy'].append(cv_accuracy_tuned_pca)


In [14]:
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score

summary_df = pd.DataFrame(results)
summary_df

Unnamed: 0,Scenario,Accuracy
0,10-fold cross-validation (default),0.985854
1,Train-Test split with tuning and top 10 features,1.0
2,10-fold cross-validation with tuning and top 1...,1.0
3,Train-Test split with tuning and top 10 princi...,0.797784
4,10-fold cross-validation with tuning and top 1...,0.765999
