In [None]:
import numpy as np
import pandas as pd

class DataAnalyzer:
    def __init__(self, file_path, participant_name):
        self.file_path = file_path
        self.participant_name = participant_name
        self.df = self.load_data()

    def load_data(self):
        # Load the data and set up the DataFrame
        df = pd.read_csv(self.file_path, low_memory=False)
        new_header = df.iloc[29]
        df = df.iloc[30:]
        df.columns = new_header
        df.dropna(axis=1, how="all", inplace=True)
        return df

    def compute_rmssd(self, intervals):
        if len(intervals) < 2:
            return 0
        nn_intervals = np.diff(intervals)
        return np.sqrt(np.mean(nn_intervals**2))

    def compute_sdnn(self, intervals):
        if len(intervals) < 2:
            return 0
        return np.std(intervals)

    def calculate_statistical_differences(self, video_series, fixation_series):
        # Calculate normalized differences for mean, min, max, skewness, kurtosis, and standard deviation
        stats = {
            'mean_diff': (video_series.mean() - fixation_series.mean()) / fixation_series.mean(),
            'min_diff': (video_series.min() - fixation_series.min()) / fixation_series.min(),
            'max_diff': (video_series.max() - fixation_series.max()) / fixation_series.max(),
            'skewness_diff': (video_series.skew() - fixation_series.skew()) / fixation_series.skew(),
            'kurtosis_diff': (video_series.kurtosis() - fixation_series.kurtosis()) / fixation_series.kurtosis(),
            'std_dev_diff': (video_series.std() - fixation_series.std()) / fixation_series.std()
        }
        return stats

    def calculate_video_differences(self):
        fixation_data = self.df[self.df['SourceStimuliName'] == 'Fixation point']
        fixation_heart_rates = pd.to_numeric(fixation_data['Heart Rate PPG ALG']).dropna()
        fixation_rmssd = self.compute_rmssd(fixation_heart_rates)
        fixation_sdnn = self.compute_sdnn(fixation_heart_rates)

        video_differences = []
        video_names = ['A1', 'A2', 'A3', 'A4', 'A', 'B', 'C', 'F', 'G', 'H', 'J', 'K', 'M', 'N', 'O', 'P', 'Q', 'U', 'V', 'W']

        for video_name in video_names:
            video_data = self.df[self.df['SourceStimuliName'] == video_name]
            video_heart_rates = pd.to_numeric(video_data['Heart Rate PPG ALG']).dropna()
            video_rmssd = self.compute_rmssd(video_heart_rates)
            video_sdnn = self.compute_sdnn(video_heart_rates)

            # Calculate normalized differences in RMSSD, SDNN, and other statistical measures
            rmssd_diff = (video_rmssd - fixation_rmssd) / fixation_rmssd
            sdnn_diff = (video_sdnn - fixation_sdnn) / fixation_sdnn
            stats_diff = self.calculate_statistical_differences(video_heart_rates, fixation_heart_rates)

            video_differences.append({
                'participant name': self.participant_name,
                'video name': video_name,
                'RMSSD diff': rmssd_diff,
                'SDNN diff': sdnn_diff,
                **stats_diff
            })

        # Convert the list of dictionaries to a DataFrame and display it
        video_differences_df = pd.DataFrame(video_differences)
        display(video_differences_df)

# Example usage
file_path = 'ITA02.csv'
participant_name = 'group33_v9_12'
analyzer = DataAnalyzer(file_path, participant_name)
analyzer.calculate_video_differences()
