In [33]:
import sys
from pathlib import Path
from dotenv import load_dotenv
import os

# Load environment variables from the .env file
load_dotenv()

WORKSPACE_PATH = os.getenv("WORKSPACE_PATH")

# Add the parent directory to the system path
sys.path.append(str(WORKSPACE_PATH))


In [34]:
from utils.dataframe_utils import read_excel_file, write_to_excel

In [40]:
from src.anova_data_preparation import (
    sort_columns_by_rules,
    regex_patterns,
    sorting_rules,
)

ModuleNotFoundError: No module named 'config'

In [35]:
RESULTS_DIR = Path(os.getenv("RESULTS_DIR"))

In [39]:
# Define the prefixes and suffixes for the results
result_prefixes = ["best", "all"]
corr_types = ["pearson", "spearman"]

# Initialize empty lists to hold the merged DataFrames
merged_results = []

# Loop through each combination of prefix and correlation type for best results
for prefix in result_prefixes:
    for corr_type in corr_types:
        # Construct the file path
        file_path = (
            RESULTS_DIR
            / "analysis_data"
            / "ANOVA"
            / str(f"{corr_type}_{prefix}_anova_data.xlsx")
        )

        # Read the Excel file
        try:
            results_df = read_excel_file(file_path)
            merged_results.append((corr_type, results_df))
        except FileNotFoundError:
            print(f"File not found: {file_path}")

    # Merge the results based on a common column, e.g., 'pair_number'
    if len(merged_results) > 0:
        # Start with the first DataFrame and add a suffix to all columns except 'pair_number'
        first_corr_type = merged_results[0][0]
        merged_df = merged_results[0][1].rename(
            columns=lambda x: f"{x}_{first_corr_type}" if x != "pair_number" else x
        )

        for corr_type, df in merged_results[1:]:
            # Add suffix to the current DataFrame, except for 'pair_number'
            df = df.rename(
                columns=lambda x: f"{x}_{corr_type}" if x != "pair_number" else x
            )
            merged_df = merged_df.merge(
                df, on="pair_number", suffixes=("", f"_{corr_type}")
            )

        merged_df = sort_columns_by_rules(merged_df, regex_patterns, sorting_rules)
        merged_df.reset_index(names="pair_number", inplace=True)

        # Save the merged results to an Excel file
        write_to_excel(
            merged_df,
            RESULTS_DIR / "analysis_data" / "ANOVA" / str(prefix + "_anova_data.xlsx"),
        )
    else:
        print(f"Not enough data to merge {prefix} results.")

    # Clear merged_results for the next prefix
    merged_results.clear()

DataFrame successfully written to c:\Users\huber\OneDrive\Dokumenty\GitHub\swps_synchronization_study\results\analysis_data\ANOVA\best_anova_data.xlsx
DataFrame successfully written to c:\Users\huber\OneDrive\Dokumenty\GitHub\swps_synchronization_study\results\analysis_data\ANOVA\all_anova_data.xlsx
