In [1]:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis
from scipy.fft import rfft, rfftfreq
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import lightgbm as lgb

In [2]:
COLUMNS = ["timestamp","ax","ay","az","gx","gy","gz","temp"]

def load_csv(path, label):
    df = pd.read_csv(path, names=COLUMNS)
    df["label"] = label
    return df

idle_df    = load_csv("idle.csv",    0)
walking_df = load_csv("walking.csv", 1)
running_df = load_csv("running.csv", 2)

df = pd.concat([idle_df, walking_df, running_df], ignore_index=True)


In [3]:
WINDOW_SIZE = 200
STEP_SIZE   = 50

def create_windows(df):
    windows = []
    labels  = []

    for label in df["label"].unique():
        temp = df[df["label"] == label]

        for i in range(0, len(temp) - WINDOW_SIZE, STEP_SIZE):
            window = temp.iloc[i:i+WINDOW_SIZE]
            windows.append(window)
            labels.append(label)

    return windows, np.array(labels)


In [4]:
def extract_features(window):
    features = {}

    for col in ["ax","ay","az","gx","gy","gz"]:
        x = window[col].values

        features[f"{col}_mean"] = np.mean(x)
        features[f"{col}_std"]  = np.std(x)
        features[f"{col}_min"]  = np.min(x)
        features[f"{col}_max"]  = np.max(x)
        features[f"{col}_skew"] = skew(x)
        features[f"{col}_kurt"] = kurtosis(x)
        features[f"{col}_energy"] = np.sum(x**2) / len(x)

    # Acceleration magnitude (VERY IMPORTANT)
    acc_mag = np.sqrt(
        window["ax"]**2 +
        window["ay"]**2 +
        window["az"]**2
    )

    features["acc_mag_mean"] = acc_mag.mean()
    features["acc_mag_std"]  = acc_mag.std()
    features["acc_mag_energy"] = np.sum(acc_mag**2) / len(acc_mag)

    # Dominant frequency (FFT)
    fft_vals = np.abs(rfft(acc_mag))
    freqs = rfftfreq(len(acc_mag), d=1/50)
    features["dom_freq"] = freqs[np.argmax(fft_vals)]

    # Temperature
    features["temp_mean"] = window["temp"].mean()
    features["temp_std"]  = window["temp"].std()

    return features


In [5]:
df

Unnamed: 0,timestamp,ax,ay,az,gx,gy,gz,temp,label
0,1791011,-3607,-4712,208,6554,-1013,-1693,44.06,0
1,1791031,-4335,-4960,676,7664,-984,-2271,43.97,0
2,1791051,-5411,-5180,-84,9600,-1102,-3164,44.01,0
3,1791071,-5399,-5184,-268,12813,-1045,-4171,43.54,0
4,1791091,-5107,-4488,-112,15192,-1169,-5705,44.06,0
...,...,...,...,...,...,...,...,...,...
37536,1483571,-7311,-3552,-7428,2567,-3565,3683,40.72,2
37537,1483591,-5899,-4864,-7736,2198,-3179,5284,40.72,2
37538,1483611,-4075,-5832,-7836,2153,-3143,6447,40.72,2
37539,1483631,-2471,-6156,-7728,2147,-3485,7074,40.67,2


In [6]:
df["acc_mag"] = np.sqrt(
        df["ax"]**2 +
        df["ay"]**2 +
        df["az"]**2
    )

In [7]:
df["gyro_mag"] = np.sqrt(
        df["gx"]**2 +
        df["gy"]**2 +
        df["gz"]**2
    )

In [8]:
df

Unnamed: 0,timestamp,ax,ay,az,gx,gy,gz,temp,label,acc_mag,gyro_mag
0,1791011,-3607,-4712,208,6554,-1013,-1693,44.06,0,5937.731638,6844.511232
1,1791031,-4335,-4960,676,7664,-984,-2271,43.97,0,6621.993733,8053.731620
2,1791051,-5411,-5180,-84,9600,-1102,-3164,44.01,0,7491.219994,10167.856215
3,1791071,-5399,-5184,-268,12813,-1045,-4171,43.54,0,7489.651594,13515.259339
4,1791091,-5107,-4488,-112,15192,-1169,-5705,44.06,0,6799.715950,16269.924708
...,...,...,...,...,...,...,...,...,...,...,...
37536,1483571,-7311,-3552,-7428,2567,-3565,3683,40.72,2,11011.022160,5732.643631
37537,1483591,-5899,-4864,-7736,2198,-3179,5284,40.72,2,10876.690351,6546.594611
37538,1483611,-4075,-5832,-7836,2153,-3143,6447,40.72,2,10583.985308,7488.502320
37539,1483631,-2471,-6156,-7728,2147,-3485,7074,40.67,2,10184.505928,8172.900954


In [9]:
corrmatrix = df.corr()
corrmatrix["label"].sort_values(ascending=False)

label        1.000000
gyro_mag     0.756660
acc_mag      0.556906
az           0.415433
ay           0.258811
gz           0.178350
ax           0.077621
gx          -0.027629
gy          -0.107074
timestamp   -0.547471
temp        -0.771407
Name: label, dtype: float64

In [11]:
idle_walking =df[df["label"]==0]

In [18]:
idle_walking.describe()

Unnamed: 0,timestamp,ax,ay,az,gx,gy,gz,temp,label,acc_mag,gyro_mag
count,12759.0,12759.0,12759.0,12759.0,12759.0,12759.0,12759.0,12759.0,12759.0,12759.0,12759.0
mean,1918591.0,-3323.49471,692.052355,-5873.925856,1072.908457,679.922016,-426.606004,44.112053,0.0,7407.720112,2499.890102
std,73667.01,675.388226,2938.485056,1155.110195,3043.113952,1159.852337,1285.071591,0.17502,0.0,1256.053767,2793.354348
min,1791011.0,-9987.0,-11700.0,-12712.0,-32048.0,-8188.0,-11274.0,43.49,0.0,2172.631814,103.990384
25%,1854801.0,-3667.0,-1268.0,-6692.0,565.0,426.0,-628.0,44.01,0.0,6610.896838,1237.586159
50%,1918591.0,-3367.0,1844.0,-5756.0,1082.0,651.0,-382.0,44.06,0.0,7236.493695,1563.921354
75%,1982381.0,-2983.0,2888.0,-5204.0,1590.0,861.5,-178.0,44.2,0.0,8285.789803,2503.834154
max,2046171.0,1945.0,10528.0,1192.0,28834.0,16617.0,9459.0,44.81,0.0,14262.686738,32302.549652


In [20]:
walking_walking =df[df["label"]==1]
walking_walking.describe()

Unnamed: 0,timestamp,ax,ay,az,gx,gy,gz,temp,label,acc_mag,gyro_mag
count,12427.0,12427.0,12427.0,12427.0,12427.0,12427.0,12427.0,12427.0,12427.0,12427.0,12427.0
mean,902951.0,-3813.241249,-1678.194898,-6155.794641,1090.928543,582.347791,-398.331697,43.212018,1.0,7864.21127,8332.149301
std,71750.2,1655.78209,2216.767837,1897.011013,6089.299714,4140.178064,5777.060573,0.61644,0.0,2158.455402,4455.651833
min,778691.0,-11199.0,-13432.0,-13916.0,-31652.0,-14351.0,-16590.0,42.13,1.0,2587.279846,192.942997
25%,840821.0,-4955.0,-2902.0,-7232.0,-3013.5,-2216.0,-4621.0,42.74,1.0,6293.232906,4734.931626
50%,902951.0,-3687.0,-1284.0,-5988.0,1394.0,847.0,-494.0,42.98,1.0,7689.156586,7999.459294
75%,965081.0,-2615.0,-236.0,-4752.0,5343.0,3537.0,3815.5,43.54,1.0,9117.422481,11361.699215
max,1027211.0,1421.0,10504.0,-184.0,32271.0,11835.0,15506.0,45.0,1.0,16602.30734,34256.229054


In [21]:
running_walking =df[df["label"]==2]
running_walking.describe()

Unnamed: 0,timestamp,ax,ay,az,gx,gy,gz,temp,label,acc_mag,gyro_mag
count,12355.0,12355.0,12355.0,12355.0,12355.0,12355.0,12355.0,12355.0,12355.0,12355.0,12355.0
mean,1360111.0,-2188.243545,4316.63966,129.365034,515.897208,-1780.285552,4654.066289,41.90837,2.0,14756.759969,25196.28605
std,71334.51,10095.719408,8067.373902,8632.701082,12534.357799,15591.455051,18855.25318,1.111131,0.0,6869.529004,12079.647361
min,1236571.0,-20843.0,-19228.0,-25208.0,-32687.0,-32767.0,-32755.0,40.25,2.0,404.565199,639.965624
25%,1298341.0,-9613.0,-1894.0,-6748.0,-7237.0,-13319.0,-10038.0,40.91,2.0,9661.873978,15439.899787
50%,1360111.0,-1207.0,3784.0,-380.0,1140.0,-2353.0,5019.0,41.71,2.0,14000.401459,24374.145216
75%,1421881.0,5921.0,9834.0,5950.0,8939.0,10566.0,20319.5,42.6,2.0,19066.077977,34999.841049
max,1483651.0,24389.0,32107.0,22639.0,32712.0,32764.0,32762.0,44.67,2.0,44141.589222,56295.014291
