In [5]:
"""
process_sensor_data.py

本脚本用于读取原始传感器数据（CSV文件），将其重采样到16 GHz的时间分辨率，并保存为新的CSV文件。
This script reads raw sensor data (CSV file), resamples it to a 16 GHz time resolution, and saves it as a new CSV file.

主要功能 / Main Features:
1. 读取原始数据文件（假设有时间和数据两列）。
   Reads the original data file (assumed to have two columns: time and data).
2. 数据清洗和类型转换。
   Cleans the data and converts types.
3. 时间轴归零。
   Resets the time axis to start from zero.
4. 以16 GHz采样率进行线性插值重采样。
   Performs linear interpolation resampling at 16 GHz sampling rate.
5. 保存重采样后的数据到新文件。
   Saves the resampled data to a new file.

适用场景 / Application Scenarios:
信号处理、通信等需要高精度时间对齐的数据分析。
Signal processing, communications, and other data analysis requiring high-precision time alignment.
"""

import pandas as pd
import numpy as np

# Define input and output file names
input_filename = "../csv/QDPSK_500Mbps_5u.csv"
output_filename = "../csv/data16.csv"

# Define the target resampling frequency and interval
target_frequency_hz = 16 * 1e9  # 16 GHz
resampling_interval_s = 1 / target_frequency_hz

try:
    # Read the CSV file, skipping the original header row to replace it later
    df = pd.read_csv(input_filename, header=0)

    # Rename columns for clarity based on the original header structure
    # Assuming the first column is time-like and second is data-like
    df.columns = ['original_time', 'original_data']

    # Convert columns to numeric, coercing errors if any
    df['original_time'] = pd.to_numeric(df['original_time'], errors='coerce')
    df['original_data'] = pd.to_numeric(df['original_data'], errors='coerce')

    # Drop rows with NaN values that might have resulted from coercion
    df.dropna(subset=['original_time', 'original_data'], inplace=True)

    if df.empty:
        print(f"Error: No valid numeric data found in {input_filename} after header processing.")
    else:
        # Adjust time axis to start from zero
        min_time = df['original_time'].min()
        df['time'] = df['original_time'] - min_time

        # Prepare for resampling
        # The new time axis will start from 0 (because df['time'] now starts from 0)
        # and go up to the maximum duration of the adjusted time
        start_resample_time = 0
        end_resample_time = df['time'].max()

        new_time_axis = np.arange(start_resample_time, end_resample_time, resampling_interval_s)

        # Perform linear interpolation
        # np.interp needs the original x-values (df['time']) to be sorted
        df_sorted = df.sort_values(by='time')
        resampled_data_values = np.interp(new_time_axis, df_sorted['time'], df_sorted['original_data'])

        # Create a new DataFrame for the resampled data
        df_resampled = pd.DataFrame({'time': new_time_axis, 'data': resampled_data_values})

        # Save the resampled data to the new CSV file with the new header
        df_resampled.to_csv(output_filename, index=False)
        print(f"Processing complete. Output saved to {output_filename}")
        print(f"New header: time,data")
        print(f"Time axis starts from 0.")
        print(f"Data resampled to {target_frequency_hz / 1e9} GHz.")

except FileNotFoundError:
    print(f"Error: The file {input_filename} was not found.")
except Exception as e:
    print(f"An error occurred: {e}")



Processing complete. Output saved to ../csv/data16.csv
New header: time,data
Time axis starts from 0.
Data resampled to 16.0 GHz.


In [4]:
"""
Use 256GHz sampling rate instead
"""

import pandas as pd
import numpy as np

# Define input and output file names
input_filename = "../csv/QDPSK_500Mbps_5u.csv"
output_filename = "../csv/data256.csv"

# Define the target resampling frequency and interval
target_frequency_hz = 256 * 1e9  # 16 GHz
resampling_interval_s = 1 / target_frequency_hz

try:
    # Read the CSV file, skipping the original header row to replace it later
    df = pd.read_csv(input_filename, header=0)

    # Rename columns for clarity based on the original header structure
    # Assuming the first column is time-like and second is data-like
    df.columns = ['original_time', 'original_data']

    # Convert columns to numeric, coercing errors if any
    df['original_time'] = pd.to_numeric(df['original_time'], errors='coerce')
    df['original_data'] = pd.to_numeric(df['original_data'], errors='coerce')

    # Drop rows with NaN values that might have resulted from coercion
    df.dropna(subset=['original_time', 'original_data'], inplace=True)

    if df.empty:
        print(f"Error: No valid numeric data found in {input_filename} after header processing.")
    else:
        # Adjust time axis to start from zero
        min_time = df['original_time'].min()
        df['time'] = df['original_time'] - min_time

        # Prepare for resampling
        # The new time axis will start from 0 (because df['time'] now starts from 0)
        # and go up to the maximum duration of the adjusted time
        start_resample_time = 0
        end_resample_time = df['time'].max()

        new_time_axis = np.arange(start_resample_time, end_resample_time, resampling_interval_s)

        # Perform linear interpolation
        # np.interp needs the original x-values (df['time']) to be sorted
        df_sorted = df.sort_values(by='time')
        resampled_data_values = np.interp(new_time_axis, df_sorted['time'], df_sorted['original_data'])

        # Create a new DataFrame for the resampled data
        df_resampled = pd.DataFrame({'time': new_time_axis, 'data': resampled_data_values})

        # Save the resampled data to the new CSV file with the new header
        df_resampled.to_csv(output_filename, index=False)
        print(f"Processing complete. Output saved to {output_filename}")
        print(f"New header: time,data")
        print(f"Time axis starts from 0.")
        print(f"Data resampled to {target_frequency_hz / 1e9} GHz.")

except FileNotFoundError:
    print(f"Error: The file {input_filename} was not found.")
except Exception as e:
    print(f"An error occurred: {e}")



Processing complete. Output saved to ../csv/data256.csv
New header: time,data
Time axis starts from 0.
Data resampled to 256.0 GHz.


In [2]:
"""
Process data without resampling - only adjust time axis and rename headers
"""

import pandas as pd
import numpy as np

# Define input and output file names
input_filename = "../csv/QDPSK_500Mbps_5u.csv"
output_filename = "../csv/data_zero_time.csv"

try:
    # Read the CSV file
    df = pd.read_csv(input_filename, header=0)

    # Rename columns to time and data
    df.columns = ['time', 'data']

    # Convert columns to numeric, coercing errors if any
    df['time'] = pd.to_numeric(df['time'], errors='coerce')
    df['data'] = pd.to_numeric(df['data'], errors='coerce')

    # Drop rows with NaN values
    df.dropna(subset=['time', 'data'], inplace=True)

    if df.empty:
        print(f"Error: No valid numeric data found in {input_filename}.")
    else:
        # Adjust time axis to start from zero
        min_time = df['time'].min()
        df['time'] = df['time'] - min_time

        # Save the processed data
        df.to_csv(output_filename, index=False)
        print(f"Processing complete. Output saved to {output_filename}")
        print(f"Header: time,data")
        print(f"Time axis adjusted to start from 0.")

except FileNotFoundError:
    print(f"Error: The file {input_filename} was not found.")
except Exception as e:
    print(f"An error occurred: {e}")

Processing complete. Output saved to ../csv/data_zero_time.csv
Header: time,data
Time axis adjusted to start from 0.
