In [1]:
# Main import block 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [2]:
try:
    train_df = pd.read_csv('train_motion_data.csv')
    test_df = pd.read_csv('test_motion_data.csv')
    print("Train and test data loaded successfully.")
    print(f"Training data shape: {train_df.shape}")
    print(f"Testing data shape: {test_df.shape}\n")
except FileNotFoundError:
    print("Error: Make sure 'train_motion_data.csv' and 'test_motion_data.csv' are in the same directory.")
    exit()

train_df

print(f"Original shape: {train_df.shape}")

Train and test data loaded successfully.
Training data shape: (3644, 8)
Testing data shape: (3084, 8)

Original shape: (3644, 8)


In [6]:
# New approach 



def create_time_series(df, window_size):
    """
    Creates time-series features based on a rolling window.
    """
    df = df.copy()

    # Creates magnitude vector for both acc and mag, to account for absolute changes 
    df['Acc_Mag'] = np.sqrt(df['AccX']**2 + df['AccY']**2 + df['AccZ']**2)
    df['Gyro_Mag'] = np.sqrt(df['GyroX']**2 + df['GyroY']**2 + df['GyroZ']**2)

    feature_cols = ['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ', 'Acc_Mag', 'Gyro_Mag']


    print(f"\n--- Engineering features with window size {window_size} ---")
    # df = df.sort_values(by='Timestamp').copy()


    for col in feature_cols:
        # .rolling() creates the window object.
        # We then apply aggregate functions like .mean(), .std(), etc.
        df[f'{col}_mean_{window_size}'] = df[col].rolling(window=window_size).mean()
        df[f'{col}_std_{window_size}'] = df[col].rolling(window=window_size).std()
        df[f'{col}_max_{window_size}'] = df[col].rolling(window=window_size).max()
        df[f'{col}_min_{window_size}'] = df[col].rolling(window=window_size).min()

    df = df.fillna(method='bfill')

    return df



In [None]:

# Define the target and timestamp

WINDOW_SIZE = 150

df_train = create_time_series(train_df, WINDOW_SIZE)
df_test = create_time_series(test_df, WINDOW_SIZE)

feature_cols = [c for c in df_train.columns if c not in ['Class', 'Timestamp']]
target_col = 'Class'

X_train = df_train[feature_cols]
y_train = df_train[target_col]
X_test = df_test[feature_cols]
y_test = df_test[target_col]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)




--- Engineering features with window size 150 ---

--- Engineering features with window size 150 ---


  df = df.fillna(method='bfill')
  df = df.fillna(method='bfill')
