In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import h5py

READ FROM CSV AND convert to sequences

In [14]:
def read_csv(file_name):
    return pd.read_csv(f"{file_name}.csv")

In [15]:
file_names = [f"Day_{i}" for i in range(1, 6)]
data_days = [read_csv(name) for name in file_names]

In [16]:
# Access data by index
data_day_1 = data_days[0]  # First DataFrame
data_day_2 = data_days[1]  # Second DataFrame
data_day_3 = data_days[2]
data_day_4 = data_days[3]
data_day_5 = data_days[4]

In [17]:
# Loop through the list and print the first and last values of each DataFrame
for i, data in enumerate(data_days, start=1):
    print(f"Day_{i}:")
    print(f"  First value: {data.iloc[0, 0]}")
    print(f"  Last value: {data.iloc[-1, 0]}")

Day_1:
  First value: 2024-12-03 10:00:00.000000
  Last value: 2024-12-04 09:59:59.984375
Day_2:
  First value: 2024-12-04 08:00:00.000000
  Last value: 2024-12-05 07:59:59.984375
Day_3:
  First value: 2024-12-05 06:00:00.000000
  Last value: 2024-12-06 05:59:59.984375
Day_4:
  First value: 2024-12-06 02:00:00.000000
  Last value: 2024-12-07 01:59:59.984375
Day_5:
  First value: 2024-12-07 05:00:00.000000
  Last value: 2024-12-08 04:59:59.984375


In [18]:
# Convert all datasets to NumPy arrays after dropping the 'Timestamp' column
sensor_data = [
    data.drop(columns=['Timestamp']).to_numpy() 
    for data in data_days
]

In [19]:
sensor_data_day_1 = sensor_data[0]
sensor_data_day_2 = sensor_data[1]
sensor_data_day_3 = sensor_data[2]
sensor_data_day_4 = sensor_data[3]
sensor_data_day_5 = sensor_data[4]

In [20]:
# Initialize empty lists to store data and timestamps for all days
all_data = []
all_timestamps = []
days = 5

In [21]:
# Loop through each day and process data
for day in range(1, days + 1):
    # Dynamically access the sensor data and timestamps for the current day
    sensor_data_day = globals()[f"sensor_data_day_{day}"]  # Access sensor_data_day_X
    timestamps_day = globals()[f"data_day_{day}"]['Timestamp']  # Access the corresponding timestamp data

    # Parameters for each day
    sampling_rate = 64  # 64 Hz
    sequence_duration = 5  # 5 seconds
    sequence_length = sampling_rate * sequence_duration  # 320 samples for 5 seconds

    # Truncate the data to a multiple of sequence_length (320 samples)
    num_samples = sensor_data_day.shape[0]  # Number of samples in the current day
    num_sequences = num_samples // sequence_length  # Number of 5-second sequences
    print(f"Day {day} - Number of sequences:", num_sequences)

    # Truncate the data and timestamps
    sensor_data_truncated = sensor_data_day[:num_sequences * sequence_length]
    timestamps_truncated = timestamps_day.iloc[:num_sequences * sequence_length]

    # Reshape the data into sequences of shape (x, 7, 320)
    data_sequences = sensor_data_truncated.reshape((num_sequences, sequence_length, 7)).transpose((0, 2, 1))

    # Extract the start timestamp for each sequence (the first timestamp in each 320-sample block)
    start_timestamps = [
        timestamps_truncated.iloc[i * sequence_length] for i in range(num_sequences)
    ]

    # Append data and timestamps for the current day to the lists
    all_data.append(data_sequences)
    all_timestamps.append(start_timestamps)

Day 1 - Number of sequences: 17280
Day 2 - Number of sequences: 17280
Day 3 - Number of sequences: 17280
Day 4 - Number of sequences: 17280
Day 5 - Number of sequences: 17280


In [22]:
# Concatenate the data from all days (combine them into one array)
final_data = np.concatenate(all_data, axis=0)
final_timestamps = np.concatenate(all_timestamps, axis=0)

# Save to a single compressed NPZ file
np.savez_compressed('sensor_data_all_days.npz', timestamp=final_timestamps, data=final_data)

print(f"Data for all {days} days saved in 'sensor_data_seq.npz'.")

Data for all 5 days saved in 'sensor_data_seq.npz'.


In [23]:
def save_h5(filepath='sensor_data_seq.h5', dataset_name='data', data=final_data):
    """
    Save data to an .h5 file.

    Parameters:
    - filepath (str): Path to the .h5 file.
    - dataset_name (str): Name of the dataset to save.
    - data (numpy.ndarray): Data to save.
    """
    with h5py.File(filepath, 'w') as h5file:
        h5file.create_dataset('data', data=data, compression='gzip', compression_opts=9)
    print(f"Data saved to {filepath} under dataset '{dataset_name}'")

In [24]:
save_h5()

Data saved to sensor_data_seq.h5 under dataset 'data'
