In [None]:
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# from matplotlib.backends.backend_pdf import PdfPages

# # Load CSV file
# file_path = "temp.csv"
# df = pd.read_csv(file_path)

# # Group by unique timestamps
# grouped = df.groupby("timestamp")

# # Analyze repeated instances
# summary = grouped.size().reset_index(name="count")  # Number of repeats per unique timestamp
# print(summary.head())
# # Get unique timestamps and their counts
# unique_timestamps = summary["timestamp"].to_numpy()
# counts = summary["count"].to_numpy()

# # Calculate delta times between consecutive unique timestamps
# delta_t = unique_timestamps[1:] - unique_timestamps[:-1]

# # Calculate instantaneous sampling rates based on chunk intervals
# sampling_rates = counts[:-1] / delta_t  # Samples per second

# # Statistics for delta_t and sampling rate
# delta_t_stats = pd.Series(delta_t).describe()
# delta_t_median = np.median(delta_t)
# delta_t_mode = pd.Series(delta_t).mode().iloc[0]

# sampling_rate_stats = pd.Series(sampling_rates).describe()
# sampling_rate_median = np.median(sampling_rates)
# sampling_rate_mode = pd.Series(sampling_rates).mode().iloc[0]

# # Create a dataframe for delta_t and sampling rate analysis
# analysis_df = pd.DataFrame({
#     "timestamp": unique_timestamps[1:],  # Start from the second timestamp
#     "delta_t": delta_t,
#     "sampling_rate": sampling_rates
# })

# # Save analysis to a CSV
# analysis_csv_path = "eeg_delta_sampling_analysis.csv"
# analysis_df.to_csv(analysis_csv_path, index=False)

# # Create PDF report
# pdf_path = "eeg_analysis_report.pdf"
# with PdfPages(pdf_path) as pdf:
#     # Histogram of sampling rates
#     plt.figure(figsize=(10, 6))
#     bins = 50  # Number of bins for the histogram
#     plt.hist(sampling_rates, bins=bins, color="blue", edgecolor="black", alpha=0.7)
#     plt.title("Histogram of Instantaneous Sampling Rates")
#     plt.xlabel("Sampling Rate (Hz)")
#     plt.ylabel("Frequency")
#     plt.grid(axis="y", alpha=0.75)
#     pdf.savefig()  # Save histogram to PDF
#     plt.close()

#     # Time series plot of sampling rates
#     plt.figure(figsize=(10, 6))
#     plt.plot(analysis_df["timestamp"], analysis_df["sampling_rate"], color="green", label="Sampling Rate")
#     plt.title("Instantaneous Sampling Rate Over Time")
#     plt.xlabel("Timestamp")
#     plt.ylabel("Sampling Rate (Hz)")
#     plt.grid()
#     plt.legend()
#     pdf.savefig()  # Save time series plot to PDF
#     plt.close()

#     # Statistics page
#     plt.figure(figsize=(8.5, 11))  # Standard letter-size page
#     plt.axis('off')  # Turn off axes

#     # Create text summary
#     stats_text = (
#         f"Number of unique timestamps: {len(unique_timestamps)}\n\n"
#         "Summary statistics for delta t (seconds):\n"
#         f"{delta_t_stats}\n"
#         f"Median: {delta_t_median:.6f}\n"
#         f"Mode: {delta_t_mode:.6f}\n\n"
#         "Summary statistics for sampling rate (Hz):\n"
#         f"{sampling_rate_stats}\n"
#         f"Median: {sampling_rate_median:.6f}\n"
#         f"Mode: {sampling_rate_mode:.6f}\n\n"
#         f"Results saved to: {analysis_csv_path}"
#     )

#     plt.text(0.1, 0.1, stats_text, fontsize=10, wrap=True)  # Add text to the page
#     pdf.savefig()  # Save stats page to PDF
#     plt.close()

# print(f"PDF report saved to {pdf_path}")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# Load CSV file
file_path = "eeg_data.csv"
df = pd.read_csv(file_path)

# Identify periods of disconnection
is_disconnected = (df == 0).all(axis=1)  # Rows where all values are 0
df["segment_id"] = (is_disconnected.cumsum() - is_disconnected).cumsum()

# Filter out disconnected rows
valid_data = df[~is_disconnected].copy()

# Group by segments of successful connections
segments = valid_data.groupby("segment_id")

# Prepare to save analysis for each segment
pdf_path = "eeg_analysis_report_with_segments.pdf"
with PdfPages(pdf_path) as pdf:
    segment_stats = []

    for segment_id, segment_data in segments:
        # Group by unique timestamps within the segment
        grouped = segment_data.groupby("timestamp")

        # Analyze repeated instances
        summary = grouped.size().reset_index(name="count")  # Number of repeats per unique timestamp

        # Get unique timestamps and their counts
        unique_timestamps = summary["timestamp"].to_numpy()
        counts = summary["count"].to_numpy()

        # Calculate delta times between consecutive unique timestamps
        delta_t = unique_timestamps[1:] - unique_timestamps[:-1]

        # Calculate instantaneous sampling rates based on chunk intervals
        sampling_rates = counts[:-1] / delta_t  # Samples per second

        # Statistics for delta_t and sampling rate
        delta_t_stats = pd.Series(delta_t).describe()
        delta_t_median = np.median(delta_t)
        delta_t_mode = pd.Series(delta_t).mode().iloc[0]

        sampling_rate_stats = pd.Series(sampling_rates).describe()
        sampling_rate_median = np.median(sampling_rates)
        sampling_rate_mode = pd.Series(sampling_rates).mode().iloc[0]

        # Create a dataframe for delta_t and sampling rate analysis
        analysis_df = pd.DataFrame({
            "timestamp": unique_timestamps[1:],  # Start from the second timestamp
            "delta_t": delta_t,
            "sampling_rate": sampling_rates
        })

        # Append segment statistics
        segment_stats.append({
            "segment_id": segment_id,
            "delta_t_stats": delta_t_stats.to_dict(),
            "delta_t_median": delta_t_median,
            "delta_t_mode": delta_t_mode,
            "sampling_rate_stats": sampling_rate_stats.to_dict(),
            "sampling_rate_median": sampling_rate_median,
            "sampling_rate_mode": sampling_rate_mode,
        })

        # Save a time series plot for the segment
        plt.figure(figsize=(10, 6))
        plt.plot(analysis_df["timestamp"], analysis_df["sampling_rate"], color="green", label="Sampling Rate")
        plt.title(f"Instantaneous Sampling Rate Over Time (Segment {segment_id})")
        plt.xlabel("Timestamp")
        plt.ylabel("Sampling Rate (Hz)")
        plt.grid()
        plt.legend()
        pdf.savefig()  # Save time series plot to PDF
        plt.close()

    # Save a summary page
    plt.figure(figsize=(8.5, 11))  # Standard letter-size page
    plt.axis('off')  # Turn off axes

    stats_text = "Summary of Segments:\n\n"
    for stat in segment_stats:
        stats_text += (
            f"Segment ID: {stat['segment_id']}\n"
            f"Delta t Statistics: {stat['delta_t_stats']}\n"
            f"Median Delta t: {stat['delta_t_median']:.6f}\n"
            f"Mode Delta t: {stat['delta_t_mode']:.6f}\n"
            f"Sampling Rate Statistics: {stat['sampling_rate_stats']}\n"
            f"Median Sampling Rate: {stat['sampling_rate_median']:.6f}\n"
            f"Mode Sampling Rate: {stat['sampling_rate_mode']:.6f}\n\n"
        )

    plt.text(0.1, 0.8, stats_text, fontsize=10, wrap=True)
    pdf.savefig()
    plt.close()

print(f"PDF report saved to {pdf_path}")