In [1]:
# DomainNet-126

In [8]:
import os
import re
import pandas as pd

def read_and_save_accuracy_to_excel(directory, output_file):
    # Define the regex pattern to match the desired lines
    pattern = re.compile(r'\[INFO\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} target\.py:\d+ Accuracy of direct prediction: (\d+\.\d+)')

    # Initialize a list to hold all the data
    data = []
    max_epochs =  51

    # Loop through all files in the given directory
    for filename in os.listdir(directory):
        # Check if the file ends with .err
        if filename.endswith('.err'):
            file_path = os.path.join(directory, filename)

            # Check if the current path is a file
            if os.path.isfile(file_path):
                print(f"Reading file: {filename}")

                accuracies = [filename]  # Start with the filename

                # Open and read the file
                with open(file_path, 'r', encoding='utf-8') as file:
                    for line in file:
                        # Check if the line matches the pattern
                        match = pattern.search(line)
                        if match:
                            accuracy = float(match.group(1))
                            accuracies.append(accuracy)

                # Ensure the list has the correct length (up to max_epochs)
                while len(accuracies) < max_epochs + 1:  # +1 for the filename column
                    accuracies.append(0)  # Fill missing epochs with 0

                # Append the list of accuracies to the data
                data.append(accuracies[:max_epochs + 1])  # Truncate to max_epochs

    # Convert the data into a DataFrame
    columns = ['Filename'] + [f'Epoch {i}' for i in range(max_epochs)]
    df = pd.DataFrame(data, columns=columns)

    # Function to find max excluding last three non-zero values
    def max_excluding_last_three(series):
        non_zero_values = series[series != 0].values  # Filter out zero values
        if len(non_zero_values) > 3:
            return max(non_zero_values[:-3])  # Exclude last three non-zero values
        return max(non_zero_values, default=np.nan)  # If less than 3, return max of available

    # Apply function to each row (excluding filename column)
    df['Max Accuracy'] = df.iloc[:, 1:].max(axis=1)

    # Add a new column for the maximum accuracy, skipping the 'Filename' column
    # df['Max Accuracy'] = df.iloc[:, 1:].max(axis=1)

    # Save the DataFrame to an Excel file    
    df.to_excel(output_file, index=False)
    print(f"Data saved to {output_file}")
    
    print(df,df['Filename'])
    return df

# Replace with the path to your directory and desired output file name
directory_path = "Results/rc_log_shuffle_patch_mix_o_all_true/domainnet-126"
output_file = "Results/rc_log_shuffle_patch_mix_o_all_true/accuracy_domainnet.xlsx"

df = read_and_save_accuracy_to_excel(directory_path, output_file)

import pandas as pd
import re

# Load the data from the Excel file containing accuracy data
# input_file = "Results_icip/rc_log_shuffle_patch_mix_o_l_all/accuracy_domainnet-126.xlsx"
# accuracy_df = pd.read_excel(input_file)

accuracy_df = df
# Load the data from your current DataFrame that needs to be updated with 'Max_Accuracy'
# df = pd.read_excel(input_file)

# Function to extract Mix_Ratio, Patch_Height, SPM_Start, and Source_Target from the filename
def extract_info_from_filename(filename):
    # Updated regex pattern to extract Mix_Ratio, Patch_Height, SPM_Start, and Source_Target
    match = re.search(r'_([\d.]+)_resnet\d+_(\d+)_([\d.]+)_(\d+)_([\d.]+)_([\w]+_[\w]+)_\d+\.err', filename)
    if match:
        mix_ratio = float(match.group(1))
        patch_height = int(match.group(2))
        spm_start = float(match.group(3))
        nn = int(match.group(4))
        c = float(match.group(5))
        source_target = match.group(6)
        return mix_ratio, patch_height, spm_start, c, source_target
    else:
        return None, None, None, None, None

# Apply the updated function to the 'Filename' column
df['Mix_Ratio'], df['Patch_Height'], df['SPM_Start'], df['C'], df['Source_Target'] = zip(*df['Filename'].map(extract_info_from_filename))

# Now, merge the accuracy data (with 'Filename' and 'Max Accuracy') into the main DataFrame
# Normalize column names by stripping any leading/trailing spaces
df.columns = df.columns.str.strip()
accuracy_df.columns = accuracy_df.columns.str.strip()


# Drop the 'Filename' column as it's no longer needed
df = df.drop(columns=['Filename'])

# Reorder the columns to place 'Mix_Ratio', 'Patch_Height', 'SPM_Start', 'Source_Target', 'NN', and 'Max Accuracy'
df = df[['Mix_Ratio', 'Patch_Height', 'SPM_Start', 'Source_Target', 'C', 'Max Accuracy']]

print("Columns in df:", df.columns, accuracy_df.columns)

# Sort by 'NN', 'Patch_Height', 'SPM_Start', and then 'Source_Target'
df_sorted = df.sort_values(by=['Mix_Ratio', 'Patch_Height', 'SPM_Start', 'Source_Target'])

# Save the sorted DataFrame back to an Excel file
output_file = "Results/rc_log_shuffle_patch_mix_o_all_true/output_domainnet.xlsx"

print(df_sorted)
print(df_sorted['Max Accuracy'].to_string(index=False))
df_sorted.to_excel(output_file, index=False)

print(f"Sorted data saved to {output_file}")


Reading file: 2022_0.8_resnet50_56_8.0_3__clipart_sketch_20323775.err
Reading file: 2022_0.8_resnet50_56_8.0_3__painting_clipart_20323774.err
Reading file: 2022_0.8_resnet50_56_8.0_3__painting_real_20323778.err
Reading file: 2022_0.8_resnet50_56_8.0_3__real_clipart_20323772.err
Reading file: 2022_0.8_resnet50_56_8.0_3__real_painting_20323773.err
Reading file: 2022_0.8_resnet50_56_8.0_3__real_sketch_20323777.err
Reading file: 2022_0.8_resnet50_56_8.0_3__sketch_painting_20323776.err
Data saved to Results/rc_log_shuffle_patch_mix_o_all_true/accuracy_domainnet.xlsx
                                            Filename  Epoch 0  Epoch 1  \
0  2022_0.8_resnet50_56_8.0_3__clipart_sketch_203...    46.47    56.45   
1  2022_0.8_resnet50_56_8.0_3__painting_clipart_2...    52.98    65.06   
2  2022_0.8_resnet50_56_8.0_3__painting_real_2032...    74.82    80.26   
3  2022_0.8_resnet50_56_8.0_3__real_clipart_20323...    55.35    64.97   
4  2022_0.8_resnet50_56_8.0_3__real_painting_2032...    62.67 

In [3]:
# VISDA-C

In [6]:
import os
import re
import pandas as pd
import numpy as np  # Import NumPy for numerical operations

def read_and_save_mean_accuracy_to_excel(directory, output_file):
    try:
        # Define the regex pattern to match the desired mean accuracy lines
        pattern_mean = re.compile(
            r'\[INFO\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} utils\.py:\d+ Accuracy per class: \[.*?\], mean: (\d+\.\d+)'
        )

        # Initialize a list to hold all the data
        data = []
        max_epochs = 101  # Total number of epochs to capture

        # Walk through all directories and subdirectories
        for root, dirs, files in os.walk(directory):
            for filename in files:
                # Check if the file ends with .err
                if filename.endswith('.err'):
                    file_path = os.path.join(root, filename)

                    # Check if the current path is a file
                    if os.path.isfile(file_path):
                        print(f"Reading file: {filename}")

                        means = [filename]  # Start with the filename

                        # Open and read the file
                        with open(file_path, 'r', encoding='utf-8') as file:
                            epoch_count = 0
                            captured_epochs = set()
                            for line in file:
                                # Check if the line matches the mean pattern
                                match_mean = pattern_mean.search(line)
                                
                                if match_mean:
                                    # Extract the timestamp to identify epochs
                                    timestamp_match = re.search(
                                        r'\[INFO\] (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})', line
                                    )
                                    if timestamp_match:
                                        timestamp = timestamp_match.group(1)
                                        # Use the timestamp to identify unique epochs
                                        if timestamp not in captured_epochs:
                                            mean_accuracy = float(match_mean.group(1))
                                            means.append(mean_accuracy)
                                            captured_epochs.add(timestamp)
                                            epoch_count += 1

                                            # Stop after collecting the maximum number of epochs
                                            if epoch_count >= max_epochs:
                                                break

                        # Ensure the list has the correct length (up to max_epochs)
                        while len(means) < max_epochs + 1:  # +1 for the filename column
                            means.append(np.nan)  # Use NaN for missing values

                        # Compute the max accuracy among the collected mean accuracies
                        mean_accuracies = means[1:]  # Exclude the filename
                        max_accuracy = np.nanmax(mean_accuracies)  # Compute max ignoring NaNs
                        means.append(max_accuracy)  # Append max accuracy

                        # Append the list of mean accuracies to the data
                        data.append(means[:max_epochs + 2])  # +2 to include Max accuracy

        # Define the DataFrame columns, including the Max accuracy column
        columns = ['Filename'] + [f'Epoch {i+1}' for i in range(max_epochs)] + ['Max Accuracy']
        df = pd.DataFrame(data, columns=columns)

        # Save the DataFrame to an Excel file
        df.to_excel(output_file, index=False)
        print(f"Data saved to {output_file}")

    except Exception as e:
        print(f"An error occurred: {e}")

# Replace with the path to your directory and desired output file name
directory_path = "Results/rc_log_shuffle_patch_mix_o_all_true/VISDA-C"
output_file = "Results/rc_log_shuffle_patch_mix_o_all_true/accuracy_visda.xlsx"

read_and_save_mean_accuracy_to_excel(directory_path, output_file)

import pandas as pd
import re

# Load the data from the Excel file
df = pd.read_excel(output_file)

# Function to extract Mix_Ratio, Patch_Height, and SPM_Start from the filename
def extract_info_from_filename(filename):
    # Updated regex pattern to handle variations in Mix_Ratio and capture all needed values
    match = re.search(r'_([\d.]+)_resnet\d+_(\d+)_([\d.]+)_(\d+)_([\d.]+)_([\w]+_[\w]+)_\d+\.err', filename)
    if match:
        mix_ratio = float(match.group(1))
        patch_height = int(match.group(2))
        spm_start = float(match.group(3))
        nn = int(match.group(4))
        c = float(match.group(5))
        return mix_ratio, patch_height, spm_start, nn, c
    else:
        return None, None, None, None, None

# Apply the updated function to the 'Filename' column
df['Mix_Ratio'], df['Patch_Height'], df['SPM_Start'], df['NN'], df['C'] = zip(*df['Filename'].map(extract_info_from_filename))

# Check for any rows where the extraction failed
if df['Mix_Ratio'].isnull().any() or df['Patch_Height'].isnull().any() or df['SPM_Start'].isnull().any():
    print("Warning: Some filenames did not match the expected pattern and were not processed correctly.")

# Drop the 'Filename' column as it's no longer needed
df = df.drop(columns=['Filename'])

print("Columns in df:", df.columns)

# Reorder the columns to place 'Mix_Ratio', 'Patch_Height', and 'SPM_Start' as the first columns
df = df[['Mix_Ratio', 'Patch_Height', 'SPM_Start', 'NN', 'C', 'Max Accuracy']]

# Sort by 'Patch_Height' and then by 'SPM_Start'
df_sorted = df.sort_values(by=['Mix_Ratio', 'Patch_Height', 'SPM_Start', 'NN', 'C'])

# Save the sorted DataFrame back to an Excel file
output_file = "Results/rc_log_shuffle_patch_mix_o_all_true/output_visda.xlsx"
df_sorted.to_excel(output_file, index=False)

print(f"Sorted data saved to {output_file}")

Reading file: 2022_0.8_resnet101_56_8.0_train_validation_shuffle_patch_mix_o_all_20323779.err
Data saved to Results/rc_log_shuffle_patch_mix_o_all_true/accuracy_visda.xlsx
Columns in df: Index(['Epoch 1', 'Epoch 2', 'Epoch 3', 'Epoch 4', 'Epoch 5', 'Epoch 6',
       'Epoch 7', 'Epoch 8', 'Epoch 9', 'Epoch 10',
       ...
       'Epoch 98', 'Epoch 99', 'Epoch 100', 'Epoch 101', 'Max Accuracy',
       'Mix_Ratio', 'Patch_Height', 'SPM_Start', 'NN', 'C'],
      dtype='object', length=107)
Sorted data saved to Results/rc_log_shuffle_patch_mix_o_all_true/output_visda.xlsx
