In this exercise, we will learn:
    
    1. How to load data
    2. How to combine and synchronise data
    3. How to find some attributes of the data
    4. How to create sliding windows

In [1]:
import pandas as pd

# Load the IMU and Voltage data
imu_df = pd.read_csv("./data_exercise2/imu_data.csv", parse_dates=["timestamp"])
voltage_df = pd.read_csv("./data_exercise2/voltage_data.csv", parse_dates=["timestamp"])
print(imu_df)
print(voltage_df)

                     timestamp     imu_x     imu_y     imu_z
0   2025-02-17 22:37:36.248279 -1.103960  0.444364  0.827972
1   2025-02-17 22:37:36.268279  0.529332  1.318311 -0.892368
2   2025-02-17 22:37:36.288279 -1.088093 -0.143852  1.628061
3   2025-02-17 22:37:36.308279 -2.059210 -0.608279  1.245692
4   2025-02-17 22:37:36.328279  1.204604  1.849850 -0.967165
..                         ...       ...       ...       ...
495 2025-02-17 22:37:46.148279  0.923155  0.478154  0.455544
496 2025-02-17 22:37:46.168279  0.696304 -0.692716  0.433924
497 2025-02-17 22:37:46.188279 -0.842162  0.033667  0.502960
498 2025-02-17 22:37:46.208279  0.168948  0.314619 -2.590918
499 2025-02-17 22:37:46.228279  0.150978  0.272842 -0.152935

[500 rows x 4 columns]
                     timestamp   voltage
0   2025-02-17 22:37:36.248279  0.366984
1   2025-02-17 22:37:36.298279 -1.316928
2   2025-02-17 22:37:36.348279 -0.956157
3   2025-02-17 22:37:36.398279 -1.270197
4   2025-02-17 22:37:36.448279 -0.46193

In [2]:
# Merge data based on the nearest timestamp (interpolation)
# https://pandas.pydata.org/docs/reference/api/pandas.merge_asof.html

merged_df = pd.merge_asof(imu_df.sort_values("timestamp"), voltage_df.sort_values("timestamp"), on="timestamp")
print(merged_df)
# Save the merged dataset
merged_df.to_csv("merged_data.csv", index=False)

                     timestamp     imu_x     imu_y     imu_z   voltage
0   2025-02-17 22:37:36.248279 -1.103960  0.444364  0.827972  0.366984
1   2025-02-17 22:37:36.268279  0.529332  1.318311 -0.892368  0.366984
2   2025-02-17 22:37:36.288279 -1.088093 -0.143852  1.628061  0.366984
3   2025-02-17 22:37:36.308279 -2.059210 -0.608279  1.245692 -1.316928
4   2025-02-17 22:37:36.328279  1.204604  1.849850 -0.967165 -1.316928
..                         ...       ...       ...       ...       ...
495 2025-02-17 22:37:46.148279  0.923155  0.478154  0.455544  1.961926
496 2025-02-17 22:37:46.168279  0.696304 -0.692716  0.433924  1.961926
497 2025-02-17 22:37:46.188279 -0.842162  0.033667  0.502960  1.961926
498 2025-02-17 22:37:46.208279  0.168948  0.314619 -2.590918 -0.381695
499 2025-02-17 22:37:46.228279  0.150978  0.272842 -0.152935 -0.381695

[500 rows x 5 columns]


In [3]:
# Loading libraries
# Numpy is utilized to manipulate data containers
import numpy as np 

In [11]:
# basics for numpy
arr = np.array([[1, 2, 3], [4, 5, 6], [7,8,9]])  # Create a 2D array
print("array shape:", arr.shape)
print("Array:\n", arr)
print(arr[0,:])   # colon is frequently use in python
print(arr[0,2])
print(arr[:,0])
print(arr[:,:2])  # ':2' means like [0:2) in math, include left index, not include right index,means access [0,1]
print(arr[-1,:])  # minus 1, means the last one in python
print(arr[:-1,:])
print(arr[[0,2],:])

array shape: (3, 3)
Array:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]
[1 2 3]
3
[1 4 7]
[[1 2]
 [4 5]
 [7 8]]
[7 8 9]
[[1 2 3]
 [4 5 6]]
[[1 2 3]
 [7 8 9]]


In [15]:
# 2. Generating Special Arrays
zeros = np.zeros((2, 3))  # 2x3 array of zeros
ones = np.ones((2, 3))  # 2x3 array of ones
rand_arr = np.random.rand(3, 3)  # 3x3 array with random values

print("\nZeros:\n", zeros)
print("\nOnes:\n", ones)
print("\nRandom Array:\n", rand_arr)


Zeros:
 [[0. 0. 0.]
 [0. 0. 0.]]

Ones:
 [[1. 1. 1.]
 [1. 1. 1.]]

Random Array:
 [[0.26656439 0.75799467 0.22352716]
 [0.42869287 0.73535536 0.26848615]
 [0.3119297  0.03288169 0.97580765]]


In [7]:
# 3. Array Operations
arr = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])
sum_result = arr + arr2  # Element-wise addition
sum_1_result = arr + 1
product_result = arr * arr2  # Element-wise multiplication
# dot_product = np.dot(arr, arr2.T)  # Dot product (matrix multiplication)

print("\nElement-wise Sum:\n", sum_result)
print("\nElement + 1:\n", sum_1_result)
print("\nElement-wise Product:\n", product_result)
# print("\nDot Product:\n", dot_product)


Element-wise Sum:
 [[ 8 10 12]
 [14 16 18]]

Element + 1:
 [[2 3 4]
 [5 6 7]]

Element-wise Product:
 [[ 7 16 27]
 [40 55 72]]


In [8]:
# 4. Aggregation Functions
arr = np.array([[1, 2, 3], [4, 5, 6]])
mean_value = np.mean(arr)  # Mean
# mean_value = np.mean(arr, axis=0)  # Mean along the columns
# mean_value = np.mean(arr, axis=1)  # Mean along the rows
sum_value = np.sum(arr)  # Sum
standard_deviation_value = np.std(arr)
max_value = np.max(arr)  # Max value
min_value = np.min(arr)  # Min value

print(arr)
print("\nMean:", mean_value)
print("Sum:", sum_value)
print("standard deviation:", standard_deviation_value)
print("Max:", max_value)
print("Min:", min_value)

[[1 2 3]
 [4 5 6]]

Mean: 3.5
Sum: 21
standard deviation: 1.707825127659933
Max: 6
Min: 1


In [9]:
arr = np.array([[1, 2, 3], [4, 5, 6]])
mean_value = np.mean(arr, axis = 0)  # Mean
sum_value = np.sum(arr, axis = 0)  # Sum
standard_deviation_value = np.std(arr, axis = 0)
max_value = np.max(arr, axis = 0)  # Max value
min_value = np.min(arr, axis = 0)  # Min value

print(arr)
print("\nMean:", mean_value)
print("Sum:", sum_value)
print("standard deviation:", standard_deviation_value)
print("Max:", max_value)
print("Min:", min_value)


[[1 2 3]
 [4 5 6]]

Mean: [2.5 3.5 4.5]
Sum: [5 7 9]
standard deviation: [1.5 1.5 1.5]
Max: [4 5 6]
Min: [1 2 3]


In [10]:
arr = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])
mean_value = np.mean(arr, axis = 1)  # Mean
sum_value = np.sum(arr, axis = 0)  # Sum
standard_deviation_value = np.std(arr, axis = 0)
max_value = np.max(arr, axis = 0)  # Max value
min_value = np.min(arr, axis = 0)  # Min value

print(arr)
print(arr.shape)
print("\nMean:", mean_value)
print("Sum:", sum_value)
print("standard deviation:", standard_deviation_value)
print("Max:", max_value)
print("Min:", min_value)

[[[1 2 3]
  [4 5 6]]

 [[1 2 3]
  [4 5 6]]]
(2, 2, 3)

Mean: [[2.5 3.5 4.5]
 [2.5 3.5 4.5]]
Sum: [[ 2  4  6]
 [ 8 10 12]]
standard deviation: [[0. 0. 0.]
 [0. 0. 0.]]
Max: [[1 2 3]
 [4 5 6]]
Min: [[1 2 3]
 [4 5 6]]


In [11]:
# 7. Boolean Masking
arr = np.array([[1, 2, 3], [4, 5, 6]])
mask = arr > 2  # Boolean mask for elements greater than 2
filtered_elements = arr[mask]  # Extract elements using the mask

print("\nBoolean Mask:\n", mask)
print("Filtered Elements:", filtered_elements)


Boolean Mask:
 [[False False  True]
 [ True  True  True]]
Filtered Elements: [3 4 5 6]


In [12]:
# 8. Concatenation and Stacking
arr = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])
stacked_vertical = np.vstack((arr, arr2))  # Stack vertically
stacked_horizontal = np.hstack((arr, arr2))  # Stack horizontally

arr_concat = np.concatenate([arr, arr2], axis = 0)

print("\nStacked Vertically:\n", stacked_vertical)
print("\nStacked Horizontally:\n", stacked_horizontal)
print("array concat:\n", arr_concat)


Stacked Vertically:
 [[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]

Stacked Horizontally:
 [[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]]
array concat:
 [[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [13]:
# 9. Linspace and Arange
linear_space = np.linspace(0, 10, 5)  # 5 evenly spaced numbers between 0 and 10
arange_values = np.arange(0, 10, 2)  # Values from 0 to 10 with step 2

print("\nLinspace:\n", linear_space)
print("\nArange:\n", arange_values)


Linspace:
 [ 0.   2.5  5.   7.5 10. ]

Arange:
 [0 2 4 6 8]


In [15]:
# Loading the data recorded for the class "walking"
# This loads the data into a NumPy array for further processing or analysis in Python
# The data is loaded into a "matrix" structure with two dimensions
#data = np.loadtxt("/notebooks/smartwearable2025/walking.csv", delimiter=",") 
data = np.loadtxt("C:\\Users\\Mattia\\Desktop\\Smart Werables\\code_tutorial2\\walking.csv", delimiter=",")
print(type(data))

<class 'numpy.ndarray'>


In [16]:
# Here we access the number of components in the first dimension (number of samples)
num_samples = data.shape[0]
print(f"Number of samples: {num_samples}")

# Here we access the number of components in the first dimension (number of sensor channels)
num_channels = data.shape[1]
print(f"Number of sensor channels: {num_channels} corresponding to ACC X, Y, Z and GYRO X, Y, Z")

Number of samples: 61266
Number of sensor channels: 6 corresponding to ACC X, Y, Z and GYRO X, Y, Z


In [17]:
# Let's have a look at the data
print("     ACC X,       ACC Y,     ACC Z,     GYRO X,     GYRO Y,     GYRO Z")
print(data)

     ACC X,       ACC Y,     ACC Z,     GYRO X,     GYRO Y,     GYRO Z
[[ 6.3906e-02 -6.5013e-02 -1.1267e-01  4.1905e-03  2.7495e-02 -8.9308e-03]
 [ 1.5697e-02  7.7307e-04 -1.1857e-01 -3.7507e-03  3.0604e-03 -8.9308e-03]
 [-1.5182e-03  5.3167e-05 -8.9513e-02 -2.4520e-02 -5.4917e-03  7.5625e-03]
 ...
 [-2.9038e+00  1.7022e+00 -5.0675e-01 -7.1477e-01 -3.5953e-03 -3.5306e-01]
 [ 7.2340e-01 -1.1946e+00  2.9736e-01 -6.5512e-01 -7.7911e-01 -4.1485e-01]
 [ 3.6843e+00 -2.3661e+00  1.7814e-01 -2.0558e-01 -2.2197e-01 -2.1458e+00]]


In [18]:
data[:, [0, 2]]

array([[ 6.3906e-02, -1.1267e-01],
       [ 1.5697e-02, -1.1857e-01],
       [-1.5182e-03, -8.9513e-02],
       ...,
       [-2.9038e+00, -5.0675e-01],
       [ 7.2340e-01,  2.9736e-01],
       [ 3.6843e+00,  1.7814e-01]], shape=(61266, 2))

In [19]:
# Segmenting the data into sliding windows with overlap
window_length     = 50
window_step_size  = 25

In [20]:
# Calculating the number of windows
# Note that double slash// is the "floor division" operator. It is used for division where 
# the result is rounded down to the nearest whole number (integer)
num_windows = (num_samples - window_length) // window_step_size + 1
print(f'Number of sliding windows: {num_windows}')

Number of sliding windows: 2449


In [21]:
# Creating a 3D container (named tensor) to store the sliding windows
sliding_windows = np.zeros((num_windows, window_length, num_channels))
print(sliding_windows.shape)
print(data.shape)
# 2449*25 = 61225
for i in range(0, num_windows):
    sliding_windows[i,:,:] = data[i*window_step_size: i*window_step_size + window_length,:]


# First window
# data[0:50]
# Second window
# data[25:25 + 50] = data[25:75]
# Third window: data[50:100]

(2449, 50, 6)
(61266, 6)


In [22]:
# Let's have a look at the first sliding window
print("     ACC X,       ACC Y,     ACC Z,     GYRO X,     GYRO Y,     GYRO Z")
print("Second sliding window")
print(sliding_windows[1,:,:])

     ACC X,       ACC Y,     ACC Z,     GYRO X,     GYRO Y,     GYRO Z
Second sliding window
[[ 7.0954e+00  4.0756e+00  2.5848e+00  5.3687e-01  2.3641e-01 -3.1497e-01]
 [-5.8843e-01 -1.1124e+00 -1.0316e+00  3.2123e-01 -7.3425e-01 -1.7264e-01]
 [-2.4835e+00 -3.0426e+00  3.3747e+00 -2.3954e-01 -2.0036e-01  4.8587e-01]
 [-3.1539e-01 -2.3174e+00  3.6266e+00 -6.0484e-01  1.4295e-01  3.8935e-01]
 [ 3.5078e+00  5.2209e-01  2.4613e+00 -6.6059e-02  3.4270e-01  1.0408e-01]
 [ 2.7247e+00  7.1450e-01  9.7705e-02  8.9101e-02 -5.1434e-01  4.5227e-01]
 [-8.4079e-02 -4.8730e-01  1.6587e+00  9.8875e-02 -4.4715e-01  3.2033e-01]
 [-2.3616e+00 -1.4169e+00  1.6972e+00 -5.8729e-02 -1.9303e-01  3.0444e-01]
 [-3.3413e+00  5.1658e-01  2.0486e+00 -5.8729e-02  1.8815e-01  2.7818e-01]
 [-2.7373e+00  1.9631e+00  1.3717e+00  1.2820e-01  2.0587e-01 -1.0423e-01]
 [-1.3753e+00  5.5891e-01  5.6347e-01  5.3076e-01  1.8388e-01 -3.4002e-01]
 [ 8.4465e-01 -5.7681e-01 -1.9308e+00  5.3503e-01 -2.1135e-01 -4.2432e-01]
 [-1.30