In [19]:
# Mount Google Drive to access dataset
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [20]:
import pandas as pd
import numpy as np
from scipy.signal import medfilt, butter, filtfilt
from scipy.fft import fft
import matplotlib.pyplot as plt

df = pd.read_csv("/content/drive/My Drive/Guru-Dataset.csv", names=['tAcc-X','tAcc-Y','tAcc-Z','tGyro-X','tGyro-Y','tGyro-Z','Activity'])



In [21]:
df

Unnamed: 0,tAcc-X,tAcc-Y,tAcc-Z,tGyro-X,tGyro-Y,tGyro-Z,Activity
0,0.0435,0.1641,-0.6017,-0.09,-0.29,0.07,2
1,0.0492,0.1165,-0.6629,-0.07,-0.28,0.05,2
2,0.0575,0.0752,-0.6598,-0.06,-0.28,0.03,2
3,0.0657,0.0164,-0.6570,-0.05,-0.28,0.00,2
4,0.0477,-0.0507,-0.6023,-0.04,-0.28,-0.02,2
...,...,...,...,...,...,...,...
2990,0.1372,-0.0518,-0.0875,-0.01,0.05,-0.01,5
2991,0.1155,-0.0687,-0.0614,-0.01,0.05,-0.01,5
2992,0.1012,-0.0807,-0.0165,-0.01,0.05,-0.01,5
2993,0.0870,-0.0783,0.0328,-0.01,0.04,-0.01,5


In [22]:
# Convert columns to numeric, forcing errors to NaN (then you can decide how to handle NaNs)
df['tAcc-X'] = pd.to_numeric(df['tAcc-X'], errors='coerce')
df['tAcc-Y'] = pd.to_numeric(df['tAcc-Y'], errors='coerce')
df['tAcc-Z'] = pd.to_numeric(df['tAcc-Z'], errors='coerce')
df['tGyro-X'] = pd.to_numeric(df['tGyro-X'], errors='coerce')
df['tGyro-Y'] = pd.to_numeric(df['tGyro-Y'], errors='coerce')
df['tGyro-Z'] = pd.to_numeric(df['tGyro-Z'], errors='coerce')

# Handle NaNs if any (e.g., by filling with the mean of the column)
df = df.fillna(df.mean())


In [23]:
# Step 3: Apply Median Filter
df['tAcc-X'] = medfilt(df['tAcc-X'], kernel_size=3)
df['tAcc-Y'] = medfilt(df['tAcc-Y'], kernel_size=3)
df['tAcc-Z'] = medfilt(df['tAcc-Z'], kernel_size=3)
df['tGyro-X'] = medfilt(df['tGyro-X'], kernel_size=3)
df['tGyro-Y'] = medfilt(df['tGyro-Y'], kernel_size=3)
df['tGyro-Z'] = medfilt(df['tGyro-Z'], kernel_size=3)

In [24]:
# Step 4: Apply 3rd Order Low Pass Butterworth Filter
def butter_lowpass(cutoff, fs, order=3):
    nyquist = 0.5 * fs
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

def lowpass_filter(data, cutoff, fs, order=3):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = filtfilt(b, a, data)
    return y

# Parameters
fs = 50.0  # Sampling frequency (Hz)
cutoff = 20.0  # Desired cutoff frequency (Hz)

# Apply the filter
df['tAcc-X'] = lowpass_filter(df['tAcc-X'], cutoff, fs)
df['tAcc-Y'] = lowpass_filter(df['tAcc-Y'], cutoff, fs)
df['tAcc-Z'] = lowpass_filter(df['tAcc-Z'], cutoff, fs)
df['tGyro-X'] = lowpass_filter(df['tGyro-X'], cutoff, fs)
df['tGyro-Y'] = lowpass_filter(df['tGyro-Y'], cutoff, fs)
df['tGyro-Z'] = lowpass_filter(df['tGyro-Z'], cutoff, fs)


In [25]:
# Step 5: Separate into Body and Gravity Acceleration
gravity_cutoff = 0.3

df['tGravityAcc-X'] = lowpass_filter(df['tAcc-X'], gravity_cutoff, fs)
df['tGravityAcc-Y'] = lowpass_filter(df['tAcc-Y'], gravity_cutoff, fs)
df['tGravityAcc-Z'] = lowpass_filter(df['tAcc-Z'], gravity_cutoff, fs)

df['tBodyAcc-X'] = df['tAcc-X'] - df['tGravityAcc-X']
df['tBodyAcc-Y'] = df['tAcc-Y'] - df['tGravityAcc-Y']
df['tBodyAcc-Z'] = df['tAcc-Z'] - df['tGravityAcc-Z']


In [26]:

# Step 6: Derive Jerk Signals
df['tBodyAccJerk-X'] = np.gradient(df['tBodyAcc-X'], edge_order=2)
df['tBodyAccJerk-Y'] = np.gradient(df['tBodyAcc-Y'], edge_order=2)
df['tBodyAccJerk-Z'] = np.gradient(df['tBodyAcc-Z'], edge_order=2)

df['tBodyGyroJerk-X'] = np.gradient(df['tGyro-X'], edge_order=2)
df['tBodyGyroJerk-Y'] = np.gradient(df['tGyro-Y'], edge_order=2)
df['tBodyGyroJerk-Z'] = np.gradient(df['tGyro-Z'], edge_order=2)


In [27]:

# Step 7: Calculate Magnitudes using Euclidean Norm
df['tBodyAccMag'] = np.sqrt(df['tBodyAcc-X']**2 + df['tBodyAcc-Y']**2 + df['tBodyAcc-Z']**2)
df['tGravityAccMag'] = np.sqrt(df['tGravityAcc-X']**2 + df['tGravityAcc-Y']**2 + df['tGravityAcc-Z']**2)
df['tBodyAccJerkMag'] = np.sqrt(df['tBodyAccJerk-X']**2 + df['tBodyAccJerk-Y']**2 + df['tBodyAccJerk-Z']**2)
df['tBodyGyroMag'] = np.sqrt(df['tGyro-X']**2 + df['tGyro-Y']**2 + df['tGyro-Z']**2)
df['tBodyGyroJerkMag'] = np.sqrt(df['tBodyGyroJerk-X']**2 + df['tBodyGyroJerk-Y']**2 + df['tBodyGyroJerk-Z']**2)


In [28]:

# Step 8: Apply Fast Fourier Transform (FFT)
def compute_fft(signal):
    return np.abs(fft(np.ascontiguousarray(signal)))

df['fBodyAcc-X'] = compute_fft(df['tBodyAcc-X'])
df['fBodyAcc-Y'] = compute_fft(df['tBodyAcc-Y'])
df['fBodyAcc-Z'] = compute_fft(df['tBodyAcc-Z'])

df['fBodyAccJerk-X'] = compute_fft(df['tBodyAccJerk-X'])
df['fBodyAccJerk-Y'] = compute_fft(df['tBodyAccJerk-Y'])
df['fBodyAccJerk-Z'] = compute_fft(df['tBodyAccJerk-Z'])

df['fBodyGyro-X'] = compute_fft(df['tGyro-X'])
df['fBodyGyro-Y'] = compute_fft(df['tGyro-Y'])
df['fBodyGyro-Z'] = compute_fft(df['tGyro-Z'])

df['fBodyAccMag'] = compute_fft(df['tBodyAccMag'])
df['fBodyAccJerkMag'] = compute_fft(df['tBodyAccJerkMag'])
df['fBodyGyroMag'] = compute_fft(df['tBodyGyroMag'])
df['fBodyGyroJerkMag'] = compute_fft(df['tBodyGyroJerkMag'])


In [29]:
# Display the first few rows of the DataFrame to verify the results
print(df.head())


     tAcc-X    tAcc-Y    tAcc-Z   tGyro-X   tGyro-Y   tGyro-Z  Activity  \
0  0.043473  0.116501 -0.601711 -0.069986 -0.280003  0.050003         2   
1  0.049935  0.115399 -0.657823 -0.069963 -0.280030  0.050242         2   
2  0.056169  0.077179 -0.662919 -0.060019 -0.280037  0.029559         2   
3  0.059232  0.013930 -0.653965 -0.050074 -0.279726  0.000521         2   
4  0.045818 -0.048225 -0.604184 -0.039811 -0.280685 -0.020430         2   

   tGravityAcc-X  tGravityAcc-Y  tGravityAcc-Z  ...  fBodyAccJerk-X  \
0      -0.078881       0.127158      -0.591676  ...        0.050630   
1      -0.080664       0.120651      -0.576851  ...        0.050941   
2      -0.082284       0.114191      -0.561966  ...        0.051897   
3      -0.083736       0.107786      -0.547031  ...        0.053547   
4      -0.085014       0.101441      -0.532055  ...        0.056110   

   fBodyAccJerk-Y  fBodyAccJerk-Z  fBodyGyro-X  fBodyGyro-Y  fBodyGyro-Z  \
0        0.058821        0.091159   140.839899

In [36]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis
from scipy.signal import find_peaks
from scipy.linalg import toeplitz
from numpy.lib.stride_tricks import sliding_window_view

# Function definitions remain the same
def mean(series):
    return np.mean(series)

def std(series):
    return np.std(series)

def mad(series):
    return np.median(np.abs(series - np.median(series)))

def max_value(series):
    return np.max(series)

def min_value(series):
    return np.min(series)

def sma(series):
    return np.sum(np.abs(series))

def energy(series):
    return np.sum(series ** 2) / len(series)

def iqr(series):
    return np.percentile(series, 75) - np.percentile(series, 25)

def entropy(series):
    prob_distribution, _ = np.histogram(series, bins=256, density=True)
    prob_distribution = prob_distribution[prob_distribution > 0]
    return -np.sum(prob_distribution * np.log2(prob_distribution))

def ar_coeff(series, order=4):
    from statsmodels.tsa.ar_model import AutoReg
    if len(series) > order:
        model = AutoReg(series, lags=order)
        model_fit = model.fit()
        return model_fit.params
    else:
        return [0] * (order + 1)  # Return zeros if the series is too short

def correlation(series1, series2):
    return np.corrcoef(series1, series2)[0, 1]

def max_inds(series):
    return np.argmax(series)

def mean_freq(series):
    fft_vals = np.fft.fft(series)
    freqs = np.fft.fftfreq(len(series))
    return np.sum(np.abs(freqs) * np.abs(fft_vals)) / np.sum(np.abs(fft_vals))

def skewness(series):
    return skew(series)

def kurt(series):
    return kurtosis(series)

def bands_energy(series, num_bins=64):
    fft_vals = np.abs(np.fft.fft(series))[:num_bins]
    band_energy = []
    bin_size = len(fft_vals) // num_bins
    for i in range(0, len(fft_vals), bin_size):
        band_energy.append(np.sum(fft_vals[i:i+bin_size] ** 2) / bin_size)
    return band_energy

def angle(series1, series2):
    dot_product = np.dot(series1, series2)
    norm1 = np.linalg.norm(series1)
    norm2 = np.linalg.norm(series2)
    return np.arccos(dot_product / (norm1 * norm2))

# Function to compute all features for a given signal
def compute_features(df, signal_name):
    features = {}
    signal = df[signal_name]

    features[f'{signal_name}_mean'] = mean(signal)
    features[f'{signal_name}_std'] = std(signal)
    features[f'{signal_name}_mad'] = mad(signal)
    features[f'{signal_name}_max'] = max_value(signal)
    features[f'{signal_name}_min'] = min_value(signal)
    features[f'{signal_name}_sma'] = sma(signal)
    features[f'{signal_name}_energy'] = energy(signal)
    features[f'{signal_name}_iqr'] = iqr(signal)
    features[f'{signal_name}_entropy'] = entropy(signal)
    features[f'{signal_name}_arCoeff'] = ar_coeff(signal)
    features[f'{signal_name}_maxInds'] = max_inds(signal)
    features[f'{signal_name}_meanFreq'] = mean_freq(signal)
    features[f'{signal_name}_skewness'] = skewness(signal)
    features[f'{signal_name}_kurtosis'] = kurt(signal)
    # features[f'{signal_name}_bandsEnergy'] = bands_energy(signal)

    return features

# List of signals to compute features for
signals = [
    'tBodyAcc-X', 'tBodyAcc-Y', 'tBodyAcc-Z',
    'tGravityAcc-X', 'tGravityAcc-Y', 'tGravityAcc-Z',
    'tBodyAccJerk-X', 'tBodyAccJerk-Y', 'tBodyAccJerk-Z',
    'tGyro-X', 'tGyro-Y', 'tGyro-Z',
    'tBodyGyroJerk-X', 'tBodyGyroJerk-Y', 'tBodyGyroJerk-Z',
    'tBodyAccMag', 'tGravityAccMag', 'tBodyAccJerkMag',
    'tBodyGyroMag', 'tBodyGyroJerkMag',
    'fBodyAcc-X', 'fBodyAcc-Y', 'fBodyAcc-Z',
    'fBodyAccJerk-X', 'fBodyAccJerk-Y', 'fBodyAccJerk-Z',
    'fBodyGyro-X', 'fBodyGyro-Y', 'fBodyGyro-Z',
    'fBodyAccMag', 'fBodyAccJerkMag', 'fBodyGyroMag', 'fBodyGyroJerkMag'
]

# Check for missing values in the original DataFrame and fill if necessary
df.fillna(0, inplace=True)

# Initialize a list to store features DataFrame for each row
features_list = []

# Compute features for each row in the original DataFrame
for index, row in df.iterrows():
    row_features = {}
    for signal in signals:
        signal_features = compute_features(pd.DataFrame([row]), signal)
        row_features.update(signal_features)

    # Append the row features to the list
    features_list.append(row_features)

# Create the features DataFrame from the list of dictionaries
features_df = pd.DataFrame(features_list)

# Display the features DataFrame
print(features_df.head())


   tBodyAcc-X_mean  tBodyAcc-X_std  tBodyAcc-X_mad  tBodyAcc-X_max  \
0         0.122354             0.0             0.0        0.122354   
1         0.130599             0.0             0.0        0.130599   
2         0.138454             0.0             0.0        0.138454   
3         0.142968             0.0             0.0        0.142968   
4         0.130832             0.0             0.0        0.130832   

   tBodyAcc-X_min  tBodyAcc-X_sma  tBodyAcc-X_energy  tBodyAcc-X_iqr  \
0        0.122354        0.122354           0.014971             0.0   
1        0.130599        0.130599           0.017056             0.0   
2        0.138454        0.138454           0.019169             0.0   
3        0.142968        0.142968           0.020440             0.0   
4        0.130832        0.130832           0.017117             0.0   

   tBodyAcc-X_entropy tBodyAcc-X_arCoeff  ...  fBodyGyroJerkMag_min  \
0             -2048.0    [0, 0, 0, 0, 0]  ...             53.598675   
1   

In [37]:
features_df.to_csv('RealTimeValidation.csv',index_label=False)

In [38]:
features_df

Unnamed: 0,tBodyAcc-X_mean,tBodyAcc-X_std,tBodyAcc-X_mad,tBodyAcc-X_max,tBodyAcc-X_min,tBodyAcc-X_sma,tBodyAcc-X_energy,tBodyAcc-X_iqr,tBodyAcc-X_entropy,tBodyAcc-X_arCoeff,...,fBodyGyroJerkMag_min,fBodyGyroJerkMag_sma,fBodyGyroJerkMag_energy,fBodyGyroJerkMag_iqr,fBodyGyroJerkMag_entropy,fBodyGyroJerkMag_arCoeff,fBodyGyroJerkMag_maxInds,fBodyGyroJerkMag_meanFreq,fBodyGyroJerkMag_skewness,fBodyGyroJerkMag_kurtosis
0,0.122354,0.0,0.0,0.122354,0.122354,0.122354,0.014971,0.0,-2048.0,"[0, 0, 0, 0, 0]",...,53.598675,53.598675,2872.817994,0.0,-2048.0,"[0, 0, 0, 0, 0]",0,0.0,,
1,0.130599,0.0,0.0,0.130599,0.130599,0.130599,0.017056,0.0,-2048.0,"[0, 0, 0, 0, 0]",...,9.394472,9.394472,88.256110,0.0,-2048.0,"[0, 0, 0, 0, 0]",0,0.0,,
2,0.138454,0.0,0.0,0.138454,0.138454,0.138454,0.019169,0.0,-2048.0,"[0, 0, 0, 0, 0]",...,13.206281,13.206281,174.405871,0.0,-2048.0,"[0, 0, 0, 0, 0]",0,0.0,,
3,0.142968,0.0,0.0,0.142968,0.142968,0.142968,0.020440,0.0,-2048.0,"[0, 0, 0, 0, 0]",...,15.692679,15.692679,246.260159,0.0,-2048.0,"[0, 0, 0, 0, 0]",0,0.0,,
4,0.130832,0.0,0.0,0.130832,0.130832,0.130832,0.017117,0.0,-2048.0,"[0, 0, 0, 0, 0]",...,19.219193,19.219193,369.377394,0.0,-2048.0,"[0, 0, 0, 0, 0]",0,0.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2990,0.220348,0.0,0.0,0.220348,0.220348,0.220348,0.048553,0.0,-2048.0,"[0, 0, 0, 0, 0]",...,8.541934,8.541934,72.964642,0.0,-2048.0,"[0, 0, 0, 0, 0]",0,0.0,,
2991,0.203844,0.0,0.0,0.203844,0.203844,0.203844,0.041552,0.0,-2048.0,"[0, 0, 0, 0, 0]",...,19.219193,19.219193,369.377394,0.0,-2048.0,"[0, 0, 0, 0, 0]",0,0.0,,
2992,0.185399,0.0,0.0,0.185399,0.185399,0.185399,0.034373,0.0,-2048.0,"[0, 0, 0, 0, 0]",...,15.692679,15.692679,246.260159,0.0,-2048.0,"[0, 0, 0, 0, 0]",0,0.0,,
2993,0.173845,0.0,0.0,0.173845,0.173845,0.173845,0.030222,0.0,-2048.0,"[0, 0, 0, 0, 0]",...,13.206281,13.206281,174.405871,0.0,-2048.0,"[0, 0, 0, 0, 0]",0,0.0,,
