In [1]:
import os
import pandas as pd

def compare_excel_folders(folder1, folder2, sheet_name):
    # List all Excel files in both folders
    folder1_files = [f for f in os.listdir(folder1) if f.endswith('.xlsx')]
    folder2_files = [f for f in os.listdir(folder2) if f.endswith('.xlsx')]
    
    # Iterate through files with the same name in both folders
    for file in folder1_files:
        if file in folder2_files:
            file1_path = os.path.join(folder1, file)
            file2_path = os.path.join(folder2, file)
            
            # Read the specified sheet from both Excel files
            try:
                df1 = pd.read_excel(file1_path, sheet_name=sheet_name)
                df2 = pd.read_excel(file2_path, sheet_name=sheet_name)
            except Exception as e:
                print(f"Error loading sheet '{sheet_name}' from {file}: {e}")
                continue
            
            # Find common columns between both DataFrames
            common_columns = df1.columns.intersection(df2.columns)
            
            if len(common_columns) == 0:
                print(f"No common columns found in {file}")
                continue
            
            # Iterate over common columns and compare values
            differences_found = False
            for column in common_columns:
                if not df1[column].equals(df2[column]):
                    differences_found = True
                    print(f"{file}: Differences found in column '{column}'")
                    
                    # Optionally print the differing rows
                    diff_rows = df1[df1[column] != df2[column]]
                    print(f"Differences in column '{column}':\n{diff_rows}\n")
            
            if not differences_found:
                print(f"{file}: No differences found in common columns.")
        else:
            print(f"{file} exists in folder1 but not in folder2")

# Example usage:
folder1 = r'D:\test\202410_v360評価用データセット'
folder2 = r'D:\test\check'
sheet_name = 'v360_test_dataset'  

compare_excel_folders(folder1, folder2, sheet_name)


confidence_F1_curve_V360.xlsx: Differences found in column '240704101619_runs_train_202403_7_BGs_2percent_ex1'
Differences in column '240704101619_runs_train_202403_7_BGs_2percent_ex1':
      Confidence  240704101619_runs_train_202403_7_BGs_2percent_ex1  \
1000         NaN                                                NaN   
1001         NaN                                                NaN   
1002         NaN                                                NaN   
1003         NaN                                                NaN   
1004      max_F1                                                NaN   
1005    max_conf                                                NaN   
1006  row_number                                                NaN   

      240704110918_runs_train_202403_7_BGs_2percent_ex2  \
1000                                                NaN   
1001                                                NaN   
1002                                                NaN   
1003     