In [7]:
import os
import pandas as pd

data_sizes = [50.0, 60.0, 72.0, 80.0, 90.0, 100] # 30.0, 40.0, 
model_names = ["lora_large_clip", "resnet50", "resnet101", "resnext50", "resnext101"]
results_path = r"C:\Users\my\Desktop\Yolo\yolo111\lora_vit\results"

# Initialize a DataFrame for accuracy_data_size.csv
accuracy_data_size_df = pd.DataFrame(columns=["data size", "Ours", "ResNet-50", "ResNet-101", "ResNext-50", "ResNext-101"])

data_path = "./data/"

for data_size in data_sizes:
    accuracy_data = {"data size": data_size}
    for model_name in model_names:
        folder_name = f"mobileODT{data_size}%_{model_name}"
        valid_log_path = os.path.join(results_path, folder_name, f"mobileODT{data_size}%_valid_log.csv")
        
        # Read validation log
        valid_log_df = pd.read_csv(valid_log_path)
        
        # Find the highest validation accuracy
        max_valid_acc = valid_log_df["ACC"].max()
        
        # Map model names to column names correctly
        if model_name == "lora_large_clip":
            accuracy_data["Ours"] = max_valid_acc
        else:
            model_column_name = model_name.replace("resnet", "ResNet-").replace("resnext", "ResNext-").replace("50", "50").replace("101", "101")
            accuracy_data[model_column_name] = max_valid_acc

    # Append to the DataFrame
    accuracy_data_size_df = pd.concat([accuracy_data_size_df, pd.DataFrame([accuracy_data])], ignore_index=True)


# Save accuracy_data_size.csv
accuracy_data_size_df.to_csv(data_path + "accuracy_data_size.csv", index=False)

# For each data size, generate accuracy_epoch_{data_percentage}.csv and train_loss_epoch_{data_percentage}.csv
for data_size in data_sizes:
    accuracy_epoch_dict = {}
    train_loss_epoch_dict = {}
    
    for model_name in model_names:
        folder_name = f"mobileODT{data_size}%_{model_name}"
        train_log_path = os.path.join(results_path, folder_name, f"mobileODT{data_size}%_train_log.csv")
        valid_log_path = os.path.join(results_path, folder_name, f"mobileODT{data_size}%_valid_log.csv")
        
        # Read logs
        train_log_df = pd.read_csv(train_log_path)
        valid_log_df = pd.read_csv(valid_log_path)

        model_prefix = "Ours" if model_name == "lora_large_clip" else model_name.replace("resnet", "ResNet-").replace("resnext", "ResNext-")

        # Process train and valid logs
        for epoch in train_log_df["epoch"].unique():
            if epoch not in accuracy_epoch_dict:
                accuracy_epoch_dict[epoch] = {"Epoch": epoch}
                train_loss_epoch_dict[epoch] = {"Epoch": epoch}
            
            accuracy_epoch_dict[epoch][f"{model_prefix}_Train"] = train_log_df.loc[train_log_df["epoch"] == epoch, "ACC"].values[0]
            accuracy_epoch_dict[epoch][f"{model_prefix}_Test"] = valid_log_df.loc[valid_log_df["epoch"] == epoch, "ACC"].values[0]
            train_loss_epoch_dict[epoch][model_prefix] = train_log_df.loc[train_log_df["epoch"] == epoch, "LOSS"].values[0]

    accuracy_epoch_df = pd.DataFrame.from_dict(accuracy_epoch_dict, orient="index").sort_values(by="Epoch")
    train_loss_epoch_df = pd.DataFrame.from_dict(train_loss_epoch_dict, orient="index").sort_values(by="Epoch")


    accuracy_epoch_df.to_csv(data_path + f"accuracy_epoch_{data_size}%.csv", index=False)
    train_loss_epoch_df.to_csv(data_path + f"train_loss_epoch_{data_size}%.csv", index=False)

print("Datasets have been created successfully.")


  accuracy_data_size_df = pd.concat([accuracy_data_size_df, pd.DataFrame([accuracy_data])], ignore_index=True)


Datasets have been created successfully.
