In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.metrics import accuracy_score, confusion_matrix
import pickle
# Read the CSV file
file_path = '/content/drive/MyDrive/weiss_data/consolidated_wiess_data_final.csv'
df = pd.read_csv(file_path)

# Encode the target column if it's categorical
target_column = 'ACTIVITY'  # Replace with the name of your target column
df[target_column] = df[target_column].astype('category').cat.codes

# Drop the 'class' column from the features
X = df.drop(columns=[target_column, 'class'])
y = df[target_column]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the base models with hyperparameters
rf_model = RandomForestClassifier(n_estimators=200, min_samples_split=2, min_samples_leaf=1, max_depth=20, bootstrap=False, random_state=42)
svm_model = SVC(C=100, gamma='auto', kernel='rbf', probability=True, random_state=42)

# Get out-of-fold predictions for training the meta-model
rf_oof_preds = cross_val_predict(rf_model, X_train, y_train, cv=5, method='predict_proba')
svm_oof_preds = cross_val_predict(svm_model, X_train, y_train, cv=5, method='predict_proba')

# Stack the predictions as new features
stacked_features = np.hstack((rf_oof_preds, svm_oof_preds))

# Train the meta-model (Logistic Regression)
meta_model = LogisticRegression(random_state=42)
meta_model.fit(stacked_features, y_train)

# Generate test set predictions using the base models
rf_test_preds = rf_model.fit(X_train, y_train).predict_proba(X_test)
svm_test_preds = svm_model.fit(X_train, y_train).predict_proba(X_test)

# Stack the test set predictions
stacked_test_features = np.hstack((rf_test_preds, svm_test_preds))

# Predict using the meta-model
final_predictions = meta_model.predict(stacked_test_features)

# Evaluate the stacking ensemble
accuracy = accuracy_score(y_test, final_predictions)
conf_matrix = confusion_matrix(y_test, final_predictions)
model_filename = '/content/drive/MyDrive/trained_model.pkl'
with open(model_filename, 'wb') as model_file:
    pickle.dump(meta_model, model_file)
print(f'Accuracy: {accuracy}')
print(f'Confusion Matrix:\n{conf_matrix}')

Accuracy: 0.8649464726873456
Confusion Matrix:
[[164   0  17   1   0   0   0   0   0   0   0   0   5   0   0   1   1   3]
 [  0 180   0   0   0   0   0   0   0   1   0   0   0   1   1   0   0   0]
 [  7   2 160   0   0   0   0   2   0   0   0   1  10   3   0   0   0   6]
 [  0   0   1 176   4   9   0   0   3   6   1   3   0   0   0   6   0   1]
 [  0   0   0   2 187   0   0   3   6   0   1   3   1   3   1   2   0   2]
 [  0   0   0   4   1 191   0   0   4   0   1   1   0   0   0   9   0   0]
 [  0   0   1   0   1   0 167   1   5   0   0   1   0   0   0   0   1   1]
 [  0   0   0   0   2   0   4 153   5  19   6   7   0   0   0   2   0   1]
 [  0   0   0   1   1   1   1   7 159  12   7  23   0   0   0   0   0   2]
 [  0   0   0   2   1   2   0   4   7 163   0   7   0   0   0   2   0   1]
 [  0   0   1   4   1   0   0   0   2   7 168  24   0   0   0   0   0   1]
 [  0   0   0   4   1   1   0  11  25  14  15 152   0   0   1   2   0   0]
 [  3   0   7   0   0   0   2   1   1   0   0   1 178

In [None]:
import pandas as pd

# Step 1: Read the CSV file
df = pd.read_csv('/content/drive/MyDrive/ACC.csv', skiprows=1)  # Skip the first row with headers

# Step 2: Parse the starting timestamp from the first line
with open('/content/drive/MyDrive/ACC.csv', 'r') as f:
    first_line = f.readline().strip().split(',')
    starting_timestamp = float(first_line[0])  # Assuming the timestamp is in the first column

# Step 3: Calculate timestamps
sampling_frequency = 32  # Hz
resolution = 0.015  # seconds

# Calculate timestamps
timestamps = [starting_timestamp + i / sampling_frequency for i in range(len(df))]
df['Timestamp'] = timestamps

# Step 4: Rename columns X0, X1, X2 to X, Y0, Y1, Y2 to Y, Z0, Z1, Z2 to Z
df = df.rename(columns={
    df.columns[0]: 'X',
    df.columns[1]: 'Y',
    df.columns[2]: 'Z'
})

# Step 5: Save the updated DataFrame to a new CSV file
df.to_csv('/content/drive/MyDrive/ACC_with_time_and_renamed.csv', index=False)


In [None]:
import pandas as pd
import numpy as np

# Read the CSV file
file_path = '/content/drive/MyDrive/angel_data/ACC_with_time_and_renamed.csv'
df = pd.read_csv(file_path)

# Define the window size and sampling frequency
window_size = 10  # seconds
sampling_frequency = 32  # Hz
window_length = window_size * sampling_frequency

# Function to calculate features for each window
def calculate_features(window):
    features = {}

    # Calculate binned distribution
    for axis in ['X', 'Y', 'Z']:
        min_val = window[axis].min()
        max_val = window[axis].max()
        bins = np.linspace(min_val, max_val, num=11)  # 10 bins
        binned_counts, _ = np.histogram(window[axis], bins=bins)
        binned_distribution = binned_counts / len(window)
        for i in range(10):
            features[f'{axis}{i}'] = binned_distribution[i]

    # Average
    features['XAVG'] = window['X'].mean()
    features['YAVG'] = window['Y'].mean()
    features['ZAVG'] = window['Z'].mean()

    # Peak detection
    features['XPEAK'] = (window['X'].diff().abs() > 0.015).sum()  # Replace with actual peak detection logic
    features['YPEAK'] = (window['Y'].diff().abs() > 0.015).sum()  # Replace with actual peak detection logic
    features['ZPEAK'] = (window['Z'].diff().abs() > 0.015).sum()  # Replace with actual peak detection logic

    # Absolute deviation
    features['XABSOLDEV'] = np.abs(window['X'] - features['XAVG']).mean()
    features['YABSOLDEV'] = np.abs(window['Y'] - features['YAVG']).mean()
    features['ZABSOLDEV'] = np.abs(window['Z'] - features['ZAVG']).mean()

    # Standard deviation
    features['XSTANDDEV'] = window['X'].std()
    features['YSTANDDEV'] = window['Y'].std()
    features['ZSTANDDEV'] = window['Z'].std()

    # Resultant
    resultant = np.sqrt(window['X']**2 + window['Y']**2 + window['Z']**2)
    features['RESULTANT'] = resultant.mean()

    return features

# Process each window and collect features
features_list = []
for start in range(0, len(df), window_length):
    window = df.iloc[start:start + window_length]
    if len(window) == window_length:
        features = calculate_features(window)
        features_list.append(features)

# Create a DataFrame from the features
features_df = pd.DataFrame(features_list)

# Save the updated DataFrame to a new CSV file
output_file_path = '/content/drive/MyDrive/output_with_features.csv'
features_df.to_csv(output_file_path, index=False)

print(f"Feature extraction completed. The output is saved to {output_file_path}")


Feature extraction completed. The output is saved to /content/drive/MyDrive/output_with_features.csv


In [None]:
# Function to filter predictions to only valid classes
def filter_valid_classes(probs, valid_classes):
    # Select columns corresponding to valid classes
    valid_probs = probs[:, valid_classes]
    # Find the index of the class with the highest probability
    valid_class_index = np.argmax(valid_probs, axis=1)
    # Map back to original class labels
    return np.array(valid_classes)[valid_class_index]

# Function to make predictions on new data
def predict_new_data(new_data_path):
    # Read and preprocess the new data
    new_df = pd.read_csv(new_data_path)
    # new_X = new_df.drop(columns=['class'])  # Adjust column drop if necessary
    new_X = scaler.transform(new_df)

    # Generate prediction probabilities using base models
    rf_new_preds = rf_model.predict_proba(new_X)
    svm_new_preds = svm_model.predict_proba(new_X)

    # Stack the new prediction probabilities
    stacked_new_features = np.hstack((rf_new_preds, svm_new_preds))

    # Predict using the meta-model
    new_predictions_proba = meta_model.predict_proba(stacked_new_features)

    # Filter the predictions to only include valid classes
    valid_classes = [0, 1, 3, 4]
    filtered_predictions = filter_valid_classes(new_predictions_proba, valid_classes)

    return filtered_predictions

# Example usage
new_data_path = '/content/drive/MyDrive/output_with_features.csv'
new_predictions = predict_new_data(new_data_path)
print(new_predictions)


[1 1 1 1 1 1]


Standing data


In [None]:
import pandas as pd

# Step 1: Read the CSV file
df = pd.read_csv('/content/drive/MyDrive/angel_data/ACC_standing.csv', skiprows=1)  # Skip the first row with headers

# Step 2: Parse the starting timestamp from the first line
with open('/content/drive/MyDrive/angel_data/ACC_standing.csv', 'r') as f:
    first_line = f.readline().strip().split(',')
    starting_timestamp = float(first_line[0])  # Assuming the timestamp is in the first column

# Step 3: Calculate timestamps
sampling_frequency = 32  # Hz
resolution = 0.015  # seconds

# Calculate timestamps
timestamps = [starting_timestamp + i / sampling_frequency for i in range(len(df))]
df['Timestamp'] = timestamps

# Step 4: Rename columns X0, X1, X2 to X, Y0, Y1, Y2 to Y, Z0, Z1, Z2 to Z
df = df.rename(columns={
    df.columns[0]: 'X',
    df.columns[1]: 'Y',
    df.columns[2]: 'Z'
})

# Step 5: Save the updated DataFrame to a new CSV file
df.to_csv('/content/drive/MyDrive/angel_data/ACC_standing_with_time_and_renamed.csv', index=False)


In [None]:
import pandas as pd
import numpy as np

# Read the CSV file
file_path = '/content/drive/MyDrive/angel_data/ACC_standing_with_time_and_renamed.csv'
df = pd.read_csv(file_path)

# Define the window size and sampling frequency
window_size = 10  # seconds
sampling_frequency = 32  # Hz
window_length = window_size * sampling_frequency

# Function to calculate features for each window
def calculate_features(window):
    features = {}

    # Calculate binned distribution
    for axis in ['X', 'Y', 'Z']:
        min_val = window[axis].min()
        max_val = window[axis].max()
        bins = np.linspace(min_val, max_val, num=11)  # 10 bins
        binned_counts, _ = np.histogram(window[axis], bins=bins)
        binned_distribution = binned_counts / len(window)
        for i in range(10):
            features[f'{axis}{i}'] = binned_distribution[i]

    # Average
    features['XAVG'] = window['X'].mean()
    features['YAVG'] = window['Y'].mean()
    features['ZAVG'] = window['Z'].mean()

    # Peak detection
    features['XPEAK'] = (window['X'].diff().abs() > 0.015).sum()  # Replace with actual peak detection logic
    features['YPEAK'] = (window['Y'].diff().abs() > 0.015).sum()  # Replace with actual peak detection logic
    features['ZPEAK'] = (window['Z'].diff().abs() > 0.015).sum()  # Replace with actual peak detection logic

    # Absolute deviation
    features['XABSOLDEV'] = np.abs(window['X'] - features['XAVG']).mean()
    features['YABSOLDEV'] = np.abs(window['Y'] - features['YAVG']).mean()
    features['ZABSOLDEV'] = np.abs(window['Z'] - features['ZAVG']).mean()

    # Standard deviation
    features['XSTANDDEV'] = window['X'].std()
    features['YSTANDDEV'] = window['Y'].std()
    features['ZSTANDDEV'] = window['Z'].std()

    # Resultant
    resultant = np.sqrt(window['X']**2 + window['Y']**2 + window['Z']**2)
    features['RESULTANT'] = resultant.mean()

    return features

# Process each window and collect features
features_list = []
for start in range(0, len(df), window_length):
    window = df.iloc[start:start + window_length]
    if len(window) == window_length:
        features = calculate_features(window)
        features_list.append(features)

# Create a DataFrame from the features
features_df = pd.DataFrame(features_list)

# Save the updated DataFrame to a new CSV file
output_file_path = '/content/drive/MyDrive/output_standing_with_features.csv'
features_df.to_csv(output_file_path, index=False)

print(f"Feature extraction completed. The output is saved to {output_file_path}")


Feature extraction completed. The output is saved to /content/drive/MyDrive/output_standing_with_features.csv


In [None]:
# Function to filter predictions to only valid classes
def filter_valid_classes(probs, valid_classes):
    # Select columns corresponding to valid classes
    valid_probs = probs[:, valid_classes]
    # Find the index of the class with the highest probability
    valid_class_index = np.argmax(valid_probs, axis=1)
    # Map back to original class labels
    return np.array(valid_classes)[valid_class_index]

# Function to make predictions on new data
def predict_new_data(new_data_path):
    # Read and preprocess the new data
    new_df = pd.read_csv(new_data_path)
    # new_X = new_df.drop(columns=['class'])  # Adjust column drop if necessary
    new_X = scaler.transform(new_df)

    # Generate prediction probabilities using base models
    rf_new_preds = rf_model.predict_proba(new_X)
    svm_new_preds = svm_model.predict_proba(new_X)

    # Stack the new prediction probabilities
    stacked_new_features = np.hstack((rf_new_preds, svm_new_preds))

    # Predict using the meta-model
    new_predictions_proba = meta_model.predict_proba(stacked_new_features)

    # Filter the predictions to only include valid classes
    valid_classes = [0, 1, 3, 4]
    filtered_predictions = filter_valid_classes(new_predictions_proba, valid_classes)

    return filtered_predictions

# Example usage
new_data_path = '/content/drive/MyDrive/output_standing_with_features.csv'
new_predictions = predict_new_data(new_data_path)
print(new_predictions)


[3 3 1 1]


In [None]:
import pickle

# Save the base models
with open('/content/drive/MyDrive/rf_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

with open('/content/drive/MyDrive/svm_model.pkl', 'wb') as f:
    pickle.dump(svm_model, f)

# Save the meta-model
with open('/content/drive/MyDrive/meta_model.pkl', 'wb') as f:
    pickle.dump(meta_model, f)

# Save the scaler
with open('/content/drive/MyDrive/scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
