In [4]:
import sqlite3
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pingouin as pg
from scipy.stats import friedmanchisquare
from rich.console import Console
from scipy.stats import shapiro
from scipy.stats import probplot
from tabulate import tabulate

def test_by_positions(observed_group, tested_variable, query_data, connection):

    positions = [
        ("in front", "(3, 4, 10, 11, 16, 17, 18, 21)"),
        ("behind", "(2, 8, 9, 13, 14, 15, 19, 20)"),
        ("left", "(7, 8, 10, 11, 12, 14, 17, 20)"),
        ("right", "(4 ,5 ,6 ,9 ,13,16,19,21)"),
        ("horizontal", "(2, 3, 5, 7, 9, 14, 16, 17)"),
        ("vertical", "(4, 6, 8, 10, 11, 12, 13, 15, 18, 19, 20, 21)")
    ]

    conn = sqlite3.connect(connection)

    console = Console()

    for position in positions:
        position_name, test_cases = position

        #getting datasets for each position area from a database
        console.print(f"Analysis for test cases [bold]{position_name}[/bold] of the user\n", style="bold")

        query_data_positions_avg = f"""{query_data}
                                    WHERE TestCaseName IN {test_cases}"""

        data_combined_avg = pd.read_sql_query(query_data_positions_avg, conn)
        #getting the average of each position across each scenario
        data_average_aggregated = data_combined_avg.groupby([observed_group, 'ScenarioCase'])[tested_variable].mean().reset_index()

        #showing the duration needed for each testcase in each scenario
        plt.figure(figsize=(12, 6))
        sns.barplot(x=observed_group, y=tested_variable, hue='ScenarioCase', data=data_average_aggregated, palette='viridis')
        plt.title(f"Average {tested_variable} Across Scenarios ({position_name})")
        plt.xlabel(observed_group)
        plt.ylabel(f"Average {tested_variable}")
        plt.legend(title="Scenario Case")
        plt.show()

        data_position_average_scenario = data_average_aggregated.groupby('ScenarioCase')[tested_variable].mean().reset_index()
        print(data_position_average_scenario)

        #Check for normality
        normality_values = []

        print("\nShapiro-Wilk Normality Test Results per ScenarioCase:")
        for scenario in data_average_aggregated['ScenarioCase'].unique():
            data_scenario = data_average_aggregated.loc[
                data_average_aggregated['ScenarioCase'] == scenario, tested_variable]
            stat, p_value = shapiro(data_scenario)
            print(f"Scenario: {scenario:<15} | Shapiro-Wilk Statistic: {stat:.6f} | p-value: {p_value:.6f}")
            normality_values.append(p_value)

        # Get unique scenarios
        scenarios = data_average_aggregated['ScenarioCase'].unique()
        n_scenarios = len(scenarios)

        # Create a subplot grid. For example, one row with n_scenarios columns:
        fig, axs = plt.subplots(1, n_scenarios, figsize=(6 * n_scenarios, 6))

        # In case there's only one scenario, wrap the ax in a list
        if n_scenarios == 1:
            axs = [axs]

        # Loop over each scenario and respective subplot axis
        for ax, scenario in zip(axs, scenarios):
            data_to_plot = data_average_aggregated.loc[
                data_average_aggregated['ScenarioCase'] == scenario, tested_variable]
            probplot(data_to_plot, dist="norm", plot=ax)
            ax.set_title(f"Q-Q Plot for Scenario: {scenario}")
            ax.set_xlabel("Theoretical Quantiles")
            ax.set_ylabel("Sample Quantiles")

        plt.tight_layout()
        plt.show()

        if any(value < 0.05 for value in normality_values):
            #Friedman Test
            scenario_data = [
                data_average_aggregated[data_average_aggregated['ScenarioCase'] == case][tested_variable].values
                for case in data_average_aggregated['ScenarioCase'].unique()]
            stat, p_value = friedmanchisquare(*scenario_data)

            print(f"\nFriedman Test Statistic: {stat}")
            print(f"p-value: {p_value}")

            # Interpretation
            if p_value < 0.05:
                print("Significant differences exist between the groups (ScenarioCase).")

                #Wilcoxon Test
                pairwise_results = pg.pairwise_tests(dv=tested_variable,
                                                     within='ScenarioCase',
                                                     subject=observed_group,
                                                     data=data_average_aggregated,
                                                     padjust='holm',
                                                     parametric=False)

                print("\nPairwise Comparisons Table of Wilcoxon Test:")
                print(tabulate(pairwise_results, headers='keys', tablefmt='psql', showindex=False))
            else:
                print("No significant differences found.")
            sns.boxplot(data=data_average_aggregated, x='ScenarioCase', y=tested_variable, width=0.6, showfliers=False)
            sns.stripplot(data=data_average_aggregated, x='ScenarioCase', y=tested_variable, color='black', alpha=0.5)
            plt.show()
            print()

        else:
            #Mauchlys Test
            sphericity_results = pg.sphericity(data_average_aggregated, dv=tested_variable, within='ScenarioCase',
                                                subject=observed_group, method='mauchly', alpha=0.05)

            header = f"{'Spher':<10} {'W':<15} {'Chi2':<15} {'Dof':<10} {'P-Value':<15}"
            print("\nMauchly's Test for Sphericity Results:")
            print(header)
            print("-" * len(header))

            row = f"{str(sphericity_results.spher):<10} " \
                    f"{sphericity_results.W:<15.6f} " \
                    f"{sphericity_results.chi2:<15.6f} " \
                    f"{sphericity_results.dof:<10} " \
                    f"{sphericity_results.pval:<15.6f}"
            print(row)

            rm_anova = pg.rm_anova(data=data_average_aggregated,
                                   dv=tested_variable,
                                   subject=observed_group,
                                   within=['ScenarioCase'],
                                   detailed=True)

            # Print results
            print("\nRepeated Measures ANOVA Results:\n")
            print(tabulate(rm_anova, headers='keys', tablefmt='psql', showindex=False))

            sns.boxplot(data=data_average_aggregated, x='ScenarioCase', y=tested_variable, width=0.6,showfliers=False)
            sns.stripplot(data=data_average_aggregated, x='ScenarioCase', y=tested_variable, color='black', alpha=0.5)
            plt.show()

            pairwise_results = pg.pairwise_tests(dv=tested_variable,
                                                 within='ScenarioCase',
                                                 subject=observed_group,
                                                 data=data_average_aggregated,
                                                 padjust='holm')

            print("\nPairwise Comparisons Table of Pairwise T-Test:")
            print(tabulate(pairwise_results, headers='keys', tablefmt='psql', showindex=False))