In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load CSV file
file_path = "eeg_data.csv"
df = pd.read_csv(file_path)

# Group by unique timestamps
grouped = df.groupby("timestamp")

# Analyze repeated instances
summary = grouped.size().reset_index(name="count")  # Number of repeats per unique timestamp
summary["min"] = grouped["timestamp"].min()
summary["max"] = grouped["timestamp"].max()
summary["range"] = summary["max"] - summary["min"]

# Time-series analysis
unique_timestamps = summary["timestamp"].sort_values().to_numpy()

# Calculate delta times between consecutive timestamps
delta_t = unique_timestamps[1:] - unique_timestamps[:-1]

# Instantaneous sampling rates (in Hz)
sampling_rates = 1.0 / delta_t

# Save results to CSV
summary["delta_t"] = [None] + list(delta_t)  # Add NaN for the first timestamp (no delta)
summary["sampling_rate"] = [None] + list(sampling_rates)  # Add NaN for the first timestamp
output_path = "eeg_analysis_summary.csv"
summary.to_csv(output_path, index=False)

# Plotting the histogram of sampling rates
plt.figure(figsize=(10, 6))
bins = 50  # Number of bins for the histogram
plt.hist(sampling_rates, bins=bins, color="blue", edgecolor="black", alpha=0.7)
plt.title("Histogram of Instantaneous Sampling Rates")
plt.xlabel("Sampling Rate (Hz)")
plt.ylabel("Frequency")
plt.grid(axis="y", alpha=0.75)
plt.show()

# Display key stats
print("Number of unique timestamps:", len(unique_timestamps))
print("Summary statistics for delta t (seconds):")
print(pd.Series(delta_t).describe())
print("Summary statistics for sampling rate (Hz):")
print(pd.Series(sampling_rates).describe())

print(f"Results saved to {output_path}")