In [None]:
from typing import Tuple, Dict, List, Optional, Union
from abc import ABC, abstractmethod

import pandas as pd


In [None]:
class StatisticalTest(ABC):

    @property
    @abstractmethod
    def name_displayed_in_gui(self):
        pass
    
    
    @abstractmethod
    def calculate_stats(self, data: pd.DataFrame, configs: Configs, show: bool, save: bool) -> None:
        pass

In [None]:
class OneSample(StatisticalTest):
    
    @property
    def name_displayed_in_gui(self):
        return 'Comparison of values from one sample to a reference value'
    
    
    def calculate_stats(self, database: Database, show: bool, save: bool) -> None:
        self.database = database
        self.df = self.database.data.copy()
        self.lut = self.initialize_lut()
        for group_id in self.lut['df_infos']['all_group_ids']:
            self.lut['group_level_stats'][group_id] = self.add_normality_check_on_group_level(group_id = group_id)
        
        self.lut['summary_stats'] = self.compute_summary_stats(group_id = group_id)
        
        
        
        

if parametric == True:
    d_results['summary']['pairwise_comparisons'] = pg.ttest(df[data_col].values, fixed_value)
    performed_test = 'one sample t-test'
else:
    d_results['summary']['pairwise_comparisons'] = pg.wilcoxon(df[data_col].values - fixed_value, correction='auto')
    performed_test = 'one sample wilcoxon rank-sum test'

d_results['performed_test'] = performed_test


    def compute_summary_stats(self, group_id: str) -> Dict:

        are_all_normally_distributed = []
        for group_id in self.lut['df_infos']['all_group_ids']:
            are_all_normally_distributed.append(self.lut['group_level_stats'][group_id]['is_normally_distributed'])
        # add check for homoscedasticity for the other stats
        data = self.lut[
        if all(are_all_normally_distributed):
            use_parametric_test = True
            performed_test = 'one_sample_t-test'
            stats_results = pg.ttest(self.lut['group_level_stats']['data'], self.lut['df_info']['fixed_value'])
            
        else:
            use_parametric_test = False
        
        summary_stats = {'use_parametric_test': use_parametric_test
        
        
        
        
        
        data = self.lut['group_level_stats'][group_id]['data'].copy()


    def add_normality_check_on_group_level(self, group_id: str) -> Dict:
        data_column_name = self.lut['df_infos']['data_column_name']
        group_column_name = self.lut['df_infos']['group_column_name']
        data = self.df.loc[self.df[group_column_name] == group_id, data_column_name].values.copy()
        normality_test_results = pg.normality(data)
        is_normally_distributed = normality_test_results['normal'][0]
        normality_check = {'data': data,
                           'normality_test_results': normality_test_results,
                           'is_normally_distributed': is_normally_distributed}
        return normality_check
        
        
        
    def initialize_lut(self) -> Dict:
        lut = {'df_infos': dict(),
               'summary_stats': dict(),
               'group_level_stats': dict(),
               'pairwise_comparisons': dict()}
        
        df_infos = {'data_column_name': self.df.columns[0],
                    'group_column_name': self.df.columns[1],
                    'fixed_value_column_name': self.df.columns[2],
                    'fixed_value': self.df.iloc[0, 2],
                    'group_id': self.df.iloc[:, 1].unique()[0],
                    'all_group_ids': list(self.df.iloc[:, 1].unique())}
        
        lut['df_infos'] = df_infos
        
        return lut
                    
        
        
        
        
        

In [None]:
import pandas as pd
data = {'a': [1, 2, 3, 4],
        'b': [2, 3, 4, 5],
        'c': [23, 0, 0, 0]}

df = pd.DataFrame(data = data)

df.iloc[:, 2].values.copy()

array([23,  0,  0,  0])

In [None]:
def one_sample(df):
    data_col = df.columns[0]
    group_col = df.columns[1]
    fixed_val_col = df.columns[2]
    fixed_value = df[fixed_val_col].values[0]
    l_groups = list(df[group_col].unique())

    d_results = {'data_col': data_col,
                 'group_col': group_col,
                 'fixed_val_col': fixed_val_col,
                 'fixed_value': fixed_value,
                 'l_groups': l_groups}

    group_id = l_groups[0]
    d_results[group_id] = {'data': df.loc[df[group_col] == group_id, data_col].values,
                        'normality_full': pg.normality(df.loc[df[group_col] == group_id, data_col].values),
                        'normality_bool': pg.normality(df.loc[df[group_col] == group_id, data_col].values)['normal'][0]}
    parametric = d_results[group_id]['normality_bool']

    d_results['summary'] = {'normality_full': pg.normality(df.loc[df[group_col] == group_id, data_col].values),
                         'normality_bool': pg.normality(df.loc[df[group_col] == group_id, data_col].values)['normal'][0]}

    if parametric == True:
        d_results['summary']['pairwise_comparisons'] = pg.ttest(df[data_col].values, fixed_value)
        performed_test = 'one sample t-test'
    else:
        d_results['summary']['pairwise_comparisons'] = pg.wilcoxon(df[data_col].values - fixed_value, correction='auto')
        performed_test = 'one sample wilcoxon rank-sum test'

    d_results['performed_test'] = performed_test

    return d_results