# Training

In [97]:
import csv
import pandas as pd

train_commands = []

base_path = "/visinf/home/vilab25/Confidence-based-generative-data-Augmentation-for-Meta-Learning"
for mode in ["run_within_domain", "run_cross_domain"]:
    if "run_within_domain":
        mode_shortcut = "WD"
    else:
        mode_shortcut = "CD"
    

    for model in ["finetuning", "maml"]:
        
        for dropout_mode in ["baseline", "dropout"]:
            
            for k in [1, 5, 10]:
                cmd_strig = f"""{base_path}/scripts/isolated_run.sh \
                {base_path}/scripts/train/train.sh \
                --datasets_dir "/fastdata/vilab24/meta-album" \
                --{mode} \
                --model_dir "baselines/{model}" \
                --config_path "{base_path}/configs/train/{dropout_mode}/{mode_shortcut}/{dropout_mode}_k_{k}.yml" \
                --output_dir "/fastdata/vilab25/output/scheduler/training" \
                --log_file /visinf/home/vilab25/logs/train_{mode}_{model}_{dropout_mode}_k_{k}.log
                """
                
                train_commands.append(cmd_strig)
                
                

# # Write commands to CSV file
# csv_file_path = "scripts1.csv"
# with open(csv_file_path, mode='w', newline='') as file:
#     writer = csv.writer(file)

#     writer.writerow(["ID", "command", "status", "attached to process", "attached to gpu", "process alive", "expected_cpu_usage (in %)", "expected_gpu_usage (in MB)", "timestamp_started", "last_timestamp"])
#     for index, cmd in enumerate(train_commands[::2], start=1):
#         writer.writerow([index, cmd, "waiting", " ", " ", " ", "1800", "3500", " ", " "])

# print(f"CSV file '{csv_file_path}' has been created successfully.")

# # Write commands to CSV file
# csv_file_path = "scripts2.csv"
# with open(csv_file_path, mode='w', newline='') as file:
#     writer = csv.writer(file)

#     writer.writerow(["ID", "command", "status", "attached to process", "attached to gpu", "process alive", "expected_cpu_usage (in %)", "expected_gpu_usage (in MB)", "timestamp_started", "last_timestamp"])
#     for index, cmd in enumerate(train_commands[1::2], start=1):
#         writer.writerow([index, cmd, "waiting", " ", " ", " ", "1800", "3500", " ", " "])

# print(f"CSV file '{csv_file_path}' has been created successfully.")

CSV file 'scripts1.csv' has been created successfully.
CSV file 'scripts2.csv' has been created successfully.


# Eval

In [3]:
import itertools


dropout_mode = ["baseline", "dropout"]
confidence_method = ['"ConstantConfidenceProvider"', '"MCDropoutConfidenceEstimator"', '"GTConfidenceEstimator"']
data_aug = ['null', '"StandardAugmentation"', '"GenerativeAugmentation"', '"PseudoAugmentation"']

ignored_config = []
configs = []
for config in itertools.product(dropout_mode, confidence_method, data_aug):
    match config:
        case ("baseline", '"ConstantConfidenceProvider"', "null"): 
            configs.append(config)
        case ("baseline", _, _): 
            ignored_config.append(config)
        case ("dropout", '"ConstantConfidenceProvider"', _):
            configs.append(config)
        case ("dropout", _, "null"):
            ignored_config.append(config)
        case ("dropout", '"MCDropoutConfidenceEstimator"' | '"GTConfidenceEstimator"', _):
            configs.append(config)       
        case _:
            print(config)
            
            
model = ["finetuning", "maml"]
mode = ["run_within_domain", "run_cross_domain"]
k = [1, 5, 10]

base_product = itertools.product(model, mode, k)

full_config_dict = []

for model, mode, k in base_product:
    for dropout_mode, confidence_method, data_aug  in configs:
        full_config_dict.append({"model": model,
         "mode": mode,
         "k": k,
         "k_config": k + 10*(data_aug == '"PseudoAugmentation"'),
         "dropout_mode": dropout_mode,
         "confidence_method": confidence_method,
         "confidence": 1 if data_aug == "null" else 0,
         "data_aug": data_aug,
         })


In [112]:
def cmd_script_eval(base_path, config, config_path):
    k = config['k']
    mode = config['mode'] 
    dropout_mode = config['dropout_mode']
    model = config['model']
    return f"""{base_path}/scripts/isolated_run.sh \
            {base_path}/scripts/eval/eval.sh \
            --datasets_dir "/fastdata/vilab24/meta-album" \
            --{mode} \
            --model_path "/fastdata/vilab25/output/scheduler/training/{dropout_mode}_k_{k}/{model}" \
            --config_path "{config_path}" \
            --output_dir "/fastdata/vilab25/output/scheduler/eval/" \
            --log_file /visinf/home/vilab25/logs/eval_{dropout_mode}_{k}_{model}_{mode}.log
            """

def generate_config(config):
    return f"""dataset: 
    test: 
        n_ways:
            value: 5
        k_shots: 
            value: {config["k_config"]}
        query_size: 20
            
evaluation:
        tasks_per_dataset: {250 if config["data_aug"] == '"GenerativeAugmentation"' else 1000}
        confidence_estimators:
            use: {config["confidence_method"]}
            ConstantConfidenceProvider:
                confidence: {config["confidence"]}
            GTConfidenceEstimator: {{}} 
            MCDropoutConfidenceEstimator:
                num_samples: 100

        augmentors:
            use: {config["data_aug"]}
            PseudoAugmentation: 
                keep_original_data: True
                augmentation_size: 
                    scale: 1
                    offset: 3 
                    maximum: 20
                    threshold: 0.8
            StandardAugmentation:
                threshold: 0.75
                scale: 2
                keep_original_data: True
                rotation: 25
                flip: True
            GenerativeAugmentation:
                keep_original_data: True
                augmentation_size: 
                    scale: 1
                    offset: 3 
                    maximum: 20
                    threshold: 0.8
                annotator_type: "hed"
    """

def generate_config_path(config):    
    confidence_method =  {'"ConstantConfidenceProvider"': "constant_confidence",
                        '"MCDropoutConfidenceEstimator"': "mc", 
                        '"GTConfidenceEstimator"': "gt"}[config["confidence_method"]]
    
    data_aug = {"null": "baseline",
                '"StandardAugmentation"': "standard_aug",
                '"GenerativeAugmentation"': "generative_aug",
                '"PseudoAugmentation"': "pseudo_aug"}[config['data_aug']]

    k = config["k"]
    return f"configs/eval/{data_aug}_{confidence_method}_k_{k}.yml"

In [113]:
all_paths = [generate_config_path(config) for config in full_config_dict]
all_configs = [generate_config(config) for config in full_config_dict]
base_path = "/visinf/home/vilab25/Confidence-based-generative-data-Augmentation-for-Meta-Learning"

cmd_scripts = [cmd_script_eval(base_path, config, config_path) for config, config_path in zip(full_config_dict, all_paths)]

In [114]:
df= pd.DataFrame(full_config_dict)
df["config_path"] = all_paths
df["config"] = all_configs
df["cmd_scripts"] = cmd_scripts

In [115]:
# Check for duplicates
print(df.groupby("cmd_scripts")["config"].value_counts().sort_values())

print(df.groupby("config_path")["config"].nunique().sort_values())

cmd_scripts                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           config                                                                                                                                                                                                                                                                                                                            

In [116]:
# Anlegen von ymls
grouped = df.groupby('config_path')

for config_path, group_df in grouped:
    with open(base_path+"/"+config_path, 'w') as f:
        f.write(group_df['config'].values[0] )

In [117]:
import re
import os


output_dir = "fastdata/vilab25/output/scheduler/eval/"
# Define a function to extract the desired substring
def extract_paths(text, path_type):
    pattern = rf'--{path_type}_path\s(.*?)\s'
    match = re.search(pattern, text)
    if match:
        return match.groups()
    else:
        return (None)

model_cfg_name = df['cmd_scripts'].apply(lambda x: pd.Series(extract_paths(x, "model")))[0].str.split("/").str[-2]
eval_cfg_name = df['cmd_scripts'].apply(lambda x: pd.Series(extract_paths(x, "config")))[0].str.split("/").str[-1].str.replace('.yml"', "") 
eval_base_path = (output_dir + "train_cfg_" +  model_cfg_name + "/eval_cfg_" + \
    eval_cfg_name + "/" +df["model"] + "/" + \
    df["mode"].str.replace("run_", "").str.replace("_", "-"))

def check_files(path):
    files_to_check = ['config.yaml', 'evaluation.pkl', 'predictions.pkl', 'training_config.yaml']
    all_files_exist = all([os.path.exists(os.path.join(path, file)) for file in files_to_check])
    return all_files_exist

datasets = ["DOG","INS_2", "PLT_NET", "MED_LF", "PNU" , "RSICB" , "APL" , "TEX_DTD" , "ACT_40" , "MD_5_BIS"]

cfg_finished = []
for eval_base in eval_base_path:
    finished = False
    if "within-domain" in eval_base:
        finished = all([check_files(f"/{eval_base}/{dataset}") for dataset in datasets])
    else:
        finished = check_files(f"/{eval_base}/")
    cfg_finished.append(finished)
    
df["finished"] = cfg_finished

In [129]:
baselines = list(df[df["dropout_mode"] == "baseline"]["cmd_scripts"])
remaining_df = df[df["dropout_mode"] != "baseline"]
# Write commands to CSV file
csv_file_path = "baselines.csv"
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)

    writer.writerow(["ID", "command", "status", "attached to process", "attached to gpu", "process alive", "expected_cpu_usage (in %)", "expected_gpu_usage (in MB)", "timestamp_started", "last_timestamp"])
    for index, cmd in enumerate(baselines, start=1):
        writer.writerow([index, cmd, "waiting", " ", " ", " ", "1800", "3500", " ", " "])

In [130]:
generative = list(remaining_df[remaining_df["data_aug"] == '"GenerativeAugmentation"']["cmd_scripts"])
remaining_df = remaining_df[remaining_df["data_aug"] != '"GenerativeAugmentation"']
# Write commands to CSV file
csv_file_path = "generative.csv"
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)

    writer.writerow(["ID", "command", "status", "attached to process", "attached to gpu", "process alive", "expected_cpu_usage (in %)", "expected_gpu_usage (in MB)", "timestamp_started", "last_timestamp"])
    for index, cmd in enumerate(generative, start=1):
        writer.writerow([index, cmd, "waiting", " ", " ", " ", "1800", "20000", " ", " "])

In [131]:
standard = list(remaining_df[remaining_df["data_aug"] == '"StandardAugmentation"']["cmd_scripts"])
remaining_df = remaining_df[remaining_df["data_aug"] != '"StandardAugmentation"']
# Write commands to CSV file
csv_file_path = "standard.csv"
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)

    writer.writerow(["ID", "command", "status", "attached to process", "attached to gpu", "process alive", "expected_cpu_usage (in %)", "expected_gpu_usage (in MB)", "timestamp_started", "last_timestamp"])
    for index, cmd in enumerate(standard, start=1):
        writer.writerow([index, cmd, "waiting", " ", " ", " ", "1800", "20000", " ", " "])

In [134]:
pseudo = list(remaining_df[remaining_df["data_aug"] == '"PseudoAugmentation"']["cmd_scripts"])
remaining_df = remaining_df[remaining_df["data_aug"] != '"PseudoAugmentation"']
# Write commands to CSV file
csv_file_path = "pseudo.csv"
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)

    writer.writerow(["ID", "command", "status", "attached to process", "attached to gpu", "process alive", "expected_cpu_usage (in %)", "expected_gpu_usage (in MB)", "timestamp_started", "last_timestamp"])
    for index, cmd in enumerate(pseudo, start=1):
        writer.writerow([index, cmd, "waiting", " ", " ", " ", "1800", "20000", " ", " "])

In [135]:
dropout_baselines = list(remaining_df["cmd_scripts"])
remaining_df = remaining_df[[False for _ in range(remaining_df.shape[0])]]

# Write commands to CSV file
csv_file_path = "dropout_baselines.csv"
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)

    writer.writerow(["ID", "command", "status", "attached to process", "attached to gpu", "process alive", "expected_cpu_usage (in %)", "expected_gpu_usage (in MB)", "timestamp_started", "last_timestamp"])
    for index, cmd in enumerate(dropout_baselines, start=1):
        writer.writerow([index, cmd, "waiting", " ", " ", " ", "1800", "20000", " ", " "])

In [141]:
remaining_df[[False for _ in range(remaining_df.shape[0])]]

Unnamed: 0,model,mode,k,k_config,dropout_mode,confidence_method,confidence,data_aug,config_path,config,cmd_scripts,finished
