# Feature Engineering
Feature engineering or feature extraction or feature discovery is the process of using domain knowledge to extract features (characteristics, properties, attributes) from raw data. The motivation is to use these extra features to improve the quality of results from a machine learning process, compared with supplying only the raw data to the machine learning process.

In [1]:
import json
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go



In [47]:
with open("recording_exercise.json", "r") as f:
    data = json.load(f)


def read_data(data):

    readings_microphone = {"times": [], "values": []}
    readings_accelerometer = {"times": [], "x": [], "y": [], "z": []}
    readings_orientation = {"times": [], "qz": [], "qy": [], "qx": [], "qw": []}

    for entry in data:
            if entry["sensor"] == "Microphone":
                readings_microphone["times"].append(entry["seconds_elapsed"])
                readings_microphone["values"].append(entry["dBFS"])
            
            if entry["sensor"] == "Accelerometer":
                 
                readings_accelerometer["times"].append(entry["seconds_elapsed"])
                readings_accelerometer["x"].append(entry["x"])
                readings_accelerometer["y"].append(entry["y"])
                readings_accelerometer["z"].append(entry["z"])

            if entry["sensor"] == "Orientation":
                     
                readings_orientation["times"].append(entry["seconds_elapsed"])
                readings_orientation["qz"].append(entry["qz"])
                readings_orientation["qy"].append(entry["qy"])
                readings_orientation["qx"].append(entry["qx"])
                readings_orientation["qw"].append(entry["qw"])

    # cut the orientation data to the same length as the times of the orientation data
    readings_orientation["qz"] = readings_orientation["qz"][:len(readings_orientation["times"])]
    readings_orientation["qy"] = readings_orientation["qy"][:len(readings_orientation["times"])]
    readings_orientation["qz"] = readings_orientation["qz"][:len(readings_orientation["times"])]
    readings_orientation["qw"] = readings_orientation["qw"][:len(readings_orientation["times"])]

    # turn all the lists into pandas dataframes
    readings_microphone = pd.DataFrame(readings_microphone)
    readings_accelerometer = pd.DataFrame(readings_accelerometer)
    readings_orientation = pd.DataFrame(readings_orientation)

    # transform all the times into a float
    readings_microphone["times"] = readings_microphone["times"].astype(float)
    readings_accelerometer["times"] = readings_accelerometer["times"].astype(float)
    readings_orientation["times"] = readings_orientation["times"].astype(float)

    # transform all the values into a float
    readings_microphone["values"] = readings_microphone["values"].astype(float)
    readings_accelerometer["x"] = readings_accelerometer["x"].astype(float)
    readings_accelerometer["y"] = readings_accelerometer["y"].astype(float)
    readings_accelerometer["z"] = readings_accelerometer["z"].astype(float)
    readings_orientation["qz"] = readings_orientation["qz"].astype(float)
    readings_orientation["qy"] = readings_orientation["qy"].astype(float)
    readings_orientation["qx"] = readings_orientation["qx"].astype(float)
    readings_orientation["qw"] = readings_orientation["qw"].astype(float)
        
    
    

    return readings_microphone, readings_accelerometer, readings_orientation

readings_microphone, readings_accelerometer, readings_orientation = read_data(data)

### Let's take a look a acceleration data

In [48]:
readings_orientation

Unnamed: 0,times,qz,qy,qx,qw
0,0.319507,0.397245,0.137388,0.221284,0.879974
1,0.337595,0.398098,0.138680,0.222065,0.879189
2,0.355677,0.398651,0.139346,0.223663,0.878428
3,0.373760,0.398949,0.139535,0.225543,0.877781
4,0.391844,0.399171,0.139401,0.227088,0.877303
...,...,...,...,...,...
612,11.385554,0.195094,0.061579,0.524909,0.826207
613,11.403638,0.201087,0.060521,0.501615,0.839216
614,11.421719,0.202807,0.058026,0.478224,0.852528
615,11.439795,0.195938,0.050227,0.457141,0.866087


In [49]:
# plot the accelerometer data
fig = go.Figure()

fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["x"], name="x"))
fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["y"], name="y"))
fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["z"], name="z"))

fig.update_layout(title="Accelerometer Data", xaxis_title="Time (s)", yaxis_title="Acceleration (m/s^2)")

fig.show()

### We are seeing how the raw values change. But maybe, there is more information hidden in there...

In [50]:
# create a new column for the magnitude of the acceleration

readings_accelerometer["magnitude"] = np.sqrt(readings_accelerometer["x"]**2 + readings_accelerometer["y"]**2 + readings_accelerometer["z"]**2)

In [51]:
# plot the accelerometer data
fig = go.Figure()

fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["magnitude"], name="magnitude"))
fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["x"], name="x", opacity=0.3))
fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["y"], name="y", opacity=0.3))
fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["z"], name="z", opacity=0.3))

fig.update_layout(title="Accelerometer Data", xaxis_title="Time (s)", yaxis_title="Acceleration (m/s^2)")

fig.show()

### This let's us see the magnitude, so how much acceleration is applied in general
### There is even more information, that we can extract from the data

In [53]:
# trace the orientation x, y, z as a cumulative sum from the accelerometer data

readings_accelerometer["x_cumsum"] = readings_accelerometer["x"].cumsum()
readings_accelerometer["y_cumsum"] = readings_accelerometer["y"].cumsum()
readings_accelerometer["z_cumsum"] = readings_accelerometer["z"].cumsum()

# plot the orientation data
fig = go.Figure()

fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["x_cumsum"], name="x"))
fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["y_cumsum"], name="y"))
fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["z_cumsum"], name="z"))

fig.update_layout(title="Orientation Data", xaxis_title="Time (s)", yaxis_title="Orientation (m)")

fig.show()

In [73]:
# The accelerometer data is noisy, so we will use aggregation to smooth it out

# create a new df with the accelerometer data aggregated by 0.1 seconds

readings_accelerometer_aggregated = readings_accelerometer.groupby(pd.cut(readings_accelerometer["times"], np.arange(0, readings_accelerometer["times"].max(), 0.5))).mean()


# plot the accelerometer data
fig = go.Figure()

#fig.add_trace(go.Scatter(x=readings_accelerometer_aggregated["times"], y=readings_accelerometer_aggregated["magnitude"], name="magnitude_agg"))
fig.add_trace(go.Scatter(x=readings_accelerometer_aggregated["times"], y=readings_accelerometer_aggregated["x"], name="x_agg", opacity=0.5))
#fig.add_trace(go.Scatter(x=readings_accelerometer_aggregated["times"], y=readings_accelerometer_aggregated["y"], name="y_agg", opacity=0.5))
#fig.add_trace(go.Scatter(x=readings_accelerometer_aggregated["times"], y=readings_accelerometer_aggregated["z"], name="z_agg", opacity=0.5))

#fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["magnitude"], name="magnitude", opacity=0.3))
fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["x"], name="x", opacity=0.3))
#fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["y"], name="y", opacity=0.3))
#fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["z"], name="z", opacity=0.3))

fig.update_layout(title="Accelerometer Data", xaxis_title="Time (s)", yaxis_title="Acceleration (m/s^2)")

fig.show()

In [74]:
# get the total distance travelled by the accelerometer

readings_accelerometer["distance"] = readings_accelerometer["magnitude"].cumsum()

# plot the accelerometer data
fig = go.Figure()

fig.add_trace(go.Scatter(x=readings_accelerometer["times"], y=readings_accelerometer["distance"], name="distance"))

fig.update_layout(title="Distance Travelled", xaxis_title="Time (s)", yaxis_title="Distance (m)")

fig.show()