In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# Load CSV file
file_path = "data/kevin/eeg_data.csv"
df = pd.read_csv(file_path)

is_zero_row = df["timestamp"] == 0

# Identify transitions from zero to non-zero groups
transition = (~is_zero_row).astype(int).diff().fillna(0)

# Create `segment_id` by cumulatively summing the positive transitions
df["segment_id"] = (transition > 0).cumsum()

# Ensure rows in zero groups have `segment_id` set to 0
df.loc[is_zero_row, "segment_id"] = -1

# Group by segments of successful connections
segments = df.groupby("segment_id")

# Prepare to save analysis for each segment
pdf_path = "data/eeg_analysis_report_with_segments.pdf"
with PdfPages(pdf_path) as pdf:
    segment_stats = []
    for segment_id, segment_data in segments:
        if segment_id == -1 or segment_id == 0: continue
        # Group by unique timestamps within the segment
        grouped = segment_data.groupby("timestamp")

        # Analyze repeated instances
        summary = grouped.size().reset_index(name="count")  # Number of repeats per unique timestamp

        # Get unique timestamps and their counts
        unique_timestamps = summary["timestamp"].to_numpy()
        counts = summary["count"].to_numpy()
        if unique_timestamps.shape[0] == 1: continue
        # Calculate delta times between consecutive unique timestamps
        delta_t = unique_timestamps[1:] - unique_timestamps[:-1]

        # Calculate instantaneous sampling rates based on chunk intervals
        sampling_rates = counts[:-1] / delta_t  # Samples per second

        # Statistics for delta_t and sampling rate
        delta_t_stats = pd.Series(delta_t).describe()
        delta_t_median = np.median(delta_t)
        delta_t_mode = pd.Series(delta_t).mode().iloc[0]

        sampling_rate_stats = pd.Series(sampling_rates).describe()
        sampling_rate_median = np.median(sampling_rates)
        sampling_rate_mode = pd.Series(sampling_rates).mode().iloc[0]

        # Create a dataframe for delta_t and sampling rate analysis
        analysis_df = pd.DataFrame({
            "timestamp": unique_timestamps[1:],  # Start from the second timestamp
            "delta_t": delta_t,
            "sampling_rate": sampling_rates
        })

        # Append segment statistics
        segment_stats.append({
            "segment_id": segment_id,
            "delta_t_stats": delta_t_stats.to_dict(),
            "delta_t_median": delta_t_median,
            "delta_t_mode": delta_t_mode,
            "sampling_rate_stats": sampling_rate_stats.to_dict(),
            "sampling_rate_median": sampling_rate_median,
            "sampling_rate_mode": sampling_rate_mode,
        })

        # Save a time series plot for the segment
        plt.figure(figsize=(10, 6))
        plt.plot(analysis_df["timestamp"], analysis_df["sampling_rate"], color="green", label="Sampling Rate")
        plt.title(f"Instantaneous Sampling Rate Over Time (Segment {segment_id})")
        plt.xlabel("Timestamp")
        plt.ylabel("Sampling Rate (Hz)")
        plt.grid()
        plt.legend()
        pdf.savefig()  # Save time series plot to PDF
        plt.close()

    # Save a summary page
    plt.figure(figsize=(8.5, 11))  # Standard letter-size page
    plt.axis('off')  # Turn off axes

    stats_text = "Summary of Segments:\n\n"
    for stat in segment_stats:
        stats_text += (
            f"Segment ID: {stat['segment_id']}\n"
            f"Delta t Statistics: {stat['delta_t_stats']}\n"
            f"Median Delta t: {stat['delta_t_median']:.6f}\n"
            f"Mode Delta t: {stat['delta_t_mode']:.6f}\n"
            f"Sampling Rate Statistics: {stat['sampling_rate_stats']}\n"
            f"Median Sampling Rate: {stat['sampling_rate_median']:.6f}\n"
            f"Mode Sampling Rate: {stat['sampling_rate_mode']:.6f}\n\n"
        )

    plt.text(0.1, 0.1, stats_text, fontsize=10, wrap=True)
    pdf.savefig()
    plt.close()

print(f"PDF report saved to {pdf_path}")

PDF report saved to eeg_analysis_report_with_segments.pdf


In [None]:
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# from matplotlib.backends.backend_pdf import PdfPages

# # Load CSV file
# file_path = "output1.csv"
# df = pd.read_csv(file_path)

# # Identify zero rows
# is_zero_row = df["timestamp"] == 0

# # Identify transitions from zero to non-zero groups
# transition = (~is_zero_row).astype(int).diff().fillna(0)

# # Create `segment_id` by cumulatively summing the positive transitions
# df["segment_id"] = (transition > 0).cumsum()

# # Ensure rows in zero groups have `segment_id` set to -1
# df.loc[is_zero_row, "segment_id"] = -1

# # Group by segments of successful connections
# segments = df.groupby("segment_id")

# # Prepare to save analysis for all segments
# pdf_path = "eeg_analysis_combined_segments.pdf"
# with PdfPages(pdf_path) as pdf:
#     # Initialize a single figure for combined plotting
#     plt.figure(figsize=(12, 8))
#     plt.title("Instantaneous Sampling Rate Across Segments")
#     plt.xlabel("Timestamp")
#     plt.ylabel("Sampling Rate (Hz)")
#     plt.grid()
    
#     segment_stats = []
#     for segment_id, segment_data in segments:
#         if segment_id == -1:
#             continue  # Skip zero or invalid segments
        
#         # Group by unique timestamps within the segment
#         grouped = segment_data.groupby("timestamp")

#         # Analyze repeated instances
#         summary = grouped.size().reset_index(name="count")  # Number of repeats per unique timestamp

#         # Get unique timestamps and their counts
#         unique_timestamps = summary["timestamp"].to_numpy()
#         counts = summary["count"].to_numpy()
#         if unique_timestamps.shape[0] < 2:
#             continue  # Skip segments with fewer than 2 timestamps
        
#         # Calculate delta times and sampling rates
#         delta_t = unique_timestamps[1:] - unique_timestamps[:-1]
#         sampling_rates = counts[:-1] / delta_t

#         # Append segment statistics
#         segment_stats.append({
#             "segment_id": segment_id,
#             "delta_t_stats": pd.Series(delta_t).describe().to_dict(),
#             "delta_t_median": np.median(delta_t),
#             "delta_t_mode": pd.Series(delta_t).mode().iloc[0] if len(delta_t) > 0 else np.nan,
#             "sampling_rate_stats": pd.Series(sampling_rates).describe().to_dict(),
#             "sampling_rate_median": np.median(sampling_rates),
#             "sampling_rate_mode": pd.Series(sampling_rates).mode().iloc[0] if len(sampling_rates) > 0 else np.nan,
#         })

#         # Prepare data for continuous plotting with NaN gaps for discontinuities
#         timestamps_with_gaps = np.concatenate(([np.nan], unique_timestamps[1:]))
#         sampling_rates_with_gaps = np.concatenate(([np.nan], sampling_rates))

#         # Plot the segment with distinct colors
#         plt.plot(
#             timestamps_with_gaps,
#             sampling_rates_with_gaps,
#             label=f"Segment {segment_id}",
#             linewidth=1.5
#         )

#     # Add legend
#     plt.legend(loc="best", fontsize="small")
#     pdf.savefig()  # Save the combined plot to PDF
#     plt.close()

#     # Save a summary page
#     plt.figure(figsize=(8.5, 11))  # Standard letter-size page
#     plt.axis('off')  # Turn off axes

#     stats_text = "Summary of Segments:\n\n"
#     for stat in segment_stats:
#         stats_text += (
#             f"Segment ID: {stat['segment_id']}\n"
#             f"Delta t Statistics: {stat['delta_t_stats']}\n"
#             f"Median Delta t: {stat['delta_t_median']:.6f}\n"
#             f"Mode Delta t: {stat['delta_t_mode']:.6f}\n"
#             f"Sampling Rate Statistics: {stat['sampling_rate_stats']}\n"
#             f"Median Sampling Rate: {stat['sampling_rate_median']:.6f}\n"
#             f"Mode Sampling Rate: {stat['sampling_rate_mode']:.6f}\n\n"
#         )

#     plt.text(0.1, 0.8, stats_text, fontsize=10, wrap=True)
#     pdf.savefig()
#     plt.close()

# print(f"PDF report saved to {pdf_path}")