In [3]:
import os
import pandas as pd

In [4]:
base_dir = "reduced_evolutions/two_stage_surr_evolution_v2"
target_hash = "c0f1f7885c"
for generation_num in range(1, 16):
    generation_dir = os.path.join(base_dir, f'generation_{generation_num}')

    for individual_hash in os.listdir(generation_dir):
        individual_dir = os.path.join(generation_dir, individual_hash)

        if target_hash == individual_hash:
            print(f"Found target hash in {individual_dir}")

Found target hash in reduced_evolutions/two_stage_surr_evolution_v2/generation_1/c0f1f7885c


In [None]:
#valid includes best epochs of running individual

compiled_data_gen_1_2 = []
compiled_data_gen_3_above = []

base_dir = "reduced_evolutions/two_stage_surr_evolution_v2"

for generation_num in range(1, 16):
    print(f"Processing generation {generation_num}")
    generation_dir = os.path.join(base_dir, f'generation_{generation_num}')
    
    if not os.path.isdir(generation_dir):
        print(f"Generation {generation_num} not found")
        continue
    
    for individual_hash in os.listdir(generation_dir):
        individual_dir = os.path.join(generation_dir, individual_hash)
        
        metric_file = os.path.join(individual_dir, 'metrics.csv')
        if not os.path.isfile(metric_file):
            print(f"Metric file not found at {metric_file}")
            continue

        out_file = os.path.join(base_dir, 'out.csv')
        if not os.path.isfile(out_file):
            print(f"out.csv not found at {out_file}")
            continue

        try:
            metrics_df = pd.read_csv(metric_file)
            if 'epoch_num' not in metrics_df.columns:
                continue
            
            # Select the best epoch (minimum val_epoch_loss)
            best_epoch_row = metrics_df.loc[metrics_df['val_epoch_loss'].idxmin()]
            
            out_df = pd.read_csv(out_file)
            if 'genome' not in out_df.columns:
                print(f"Genome column not found in {out_file}")
                continue
            
            genome_value = out_df[out_df['hash'] == individual_hash]['genome'].iloc[0]
            
            best_epoch_row['generation'] = generation_num
            best_epoch_row['individual_hash'] = individual_hash
            best_epoch_row['genome'] = genome_value
            
            if generation_num in [1, 2]:
                compiled_data_gen_1_2.append(best_epoch_row)
            else:
                compiled_data_gen_3_above.append(best_epoch_row)
        
        except Exception as e:
            print(f"Error processing {metric_file} or {out_file}: {e}")
            continue

compiled_df_gen_1_2 = pd.DataFrame(compiled_data_gen_1_2)
compiled_df_gen_3_above = pd.DataFrame(compiled_data_gen_3_above)

compiled_df_gen_1_2.to_csv("valid_data_train.csv", index=False)
compiled_df_gen_3_above.to_csv("valid_data_valid.csv", index=False)

In [None]:
#invalid includes best epochs of each individual
compiled_data_gen_1_2 = []
compiled_data_gen_3_above = []

base_dir = "reduced_evolutions/two_stage_surr_evolution_v2"

for generation_num in range(1, 16):
    print(f"Processing generation {generation_num}")
    generation_dir = os.path.join(base_dir, f'generation_{generation_num}')
    
    if not os.path.isdir(generation_dir):
        print(f"Generation {generation_num} not found")
        continue
    
    for individual_hash in os.listdir(generation_dir):
        individual_dir = os.path.join(generation_dir, individual_hash)
        
        metric_file = os.path.join(individual_dir, 'metrics.csv')
        if not os.path.isfile(metric_file):
            print(f"Metric file not found at {metric_file}")
            continue

        out_file = os.path.join(base_dir, 'out.csv')
        if not os.path.isfile(out_file):
            print(f"out.csv not found at {out_file}")
            continue

        try:
            metrics_df = pd.read_csv(metric_file)
            
            if 'epoch_num' not in metrics_df.columns:
                dummy_row = {
                    'train_epoch_loss': 300,
                    'uw_val_epoch_loss': 300,
                    'val_epoch_loss': 300,
                    'iou_loss': 300,
                    'giou_loss': 300,
                    'diou_loss': 300,
                    'ciou_loss': 300,
                    'center_loss': 300,
                    'size_loss': 300,
                    'obj_loss': 300,
                    'precision': -300,
                    'recall': -300,
                    'f1_score': -300,
                    'average_precision': -300,
                    'true_positives': -300,
                    'false_positives': -300,
                    'false_negatives': -300,
                    'epoch_num': 0
                }
                metrics_df = pd.DataFrame([dummy_row])
            
            best_epoch_row = metrics_df.loc[metrics_df['val_epoch_loss'].idxmin()]
            
            out_df = pd.read_csv(out_file)
            if 'genome' not in out_df.columns:
                print(f"Genome column not found in {out_file}")
                continue
            
            genome_value = out_df[out_df['hash'] == individual_hash]['genome'].iloc[0]
            
            best_epoch_row['generation'] = generation_num
            best_epoch_row['individual_hash'] = individual_hash
            best_epoch_row['genome'] = genome_value
            
            if generation_num in [1, 2]:
                compiled_data_gen_1_2.append(best_epoch_row)
            else:
                compiled_data_gen_3_above.append(best_epoch_row)
        
        except Exception as e:
            print(f"Error processing {metric_file} or {out_file}: {e}")
            continue

compiled_df_gen_1_2 = pd.DataFrame(compiled_data_gen_1_2)
compiled_df_gen_3_above = pd.DataFrame(compiled_data_gen_3_above)

compiled_df_gen_1_2.to_csv("invalid_data_train.csv", index=False)
compiled_df_gen_3_above.to_csv("invalid_data_valid.csv", index=False)



In [None]:
# compile has every genome and every epoch
import os
import pandas as pd

compiled_data = []

base_dir = "reduced_evolutions/two_stage_surr_evolution_v2"

for generation_num in range(1, 16):
    print(f"Processing generation {generation_num}")
    generation_dir = os.path.join(base_dir, f'generation_{generation_num}')

    if not os.path.isdir(generation_dir):
        print(f"Generation {generation_num} not found")
        continue

    for individual_hash in os.listdir(generation_dir):
        individual_dir = os.path.join(generation_dir, individual_hash)

        metric_file = os.path.join(individual_dir, 'metrics.csv')
        if not os.path.isfile(metric_file):
            print(f"Metric file not found at {metric_file}")
            continue

        out_file = os.path.join(base_dir, 'out.csv')
        if not os.path.isfile(out_file):
            print(f"out.csv not found at {out_file}")
            continue

        try:
            metrics_df = pd.read_csv(metric_file)

            if 'epoch_num' not in metrics_df.columns:
                dummy_row = {
                    'train_epoch_loss': 300,
                    'uw_val_epoch_loss': 300,
                    'val_epoch_loss': 300,
                    'iou_loss': 300,
                    'giou_loss': 300,
                    'diou_loss': 300,
                    'ciou_loss': 300,
                    'center_loss': 300,
                    'size_loss': 300,
                    'obj_loss': 300,
                    'precision': -300,
                    'recall': -300,
                    'f1_score': -300,
                    'average_precision': -300,
                    'true_positives': -300,
                    'false_positives': -300,
                    'false_negatives': -300,
                    'epoch_num': 0
                }
                metrics_df = pd.DataFrame([dummy_row])

            out_df = pd.read_csv(out_file)
            if 'genome' not in out_df.columns:
                print(f"Genome column not found in {out_file}")
                continue

            genome_value = out_df[out_df['hash'] == individual_hash]['genome'].iloc[0]
            # Add generation, individual hash, and genome to all rows
            metrics_df['generation'] = generation_num
            metrics_df['individual_hash'] = individual_hash
            metrics_df['genome'] = genome_value

            # Append all rows from metrics_df to compiled_data
            compiled_data.extend(metrics_df.to_dict('records'))

        except Exception as e:
            print(f"Error processing {metric_file} or {out_file}: {e}")
            continue

compiled_df = pd.DataFrame(compiled_data)
compiled_df.to_csv("compiled_data.csv", index=False)

In [None]:
#compiled_data_best_epoch includes best epoch of each individual

compiled_data_best_epoch = []

base_dir = "reduced_evolutions/two_stage_surr_evolution_v2"

for generation_num in range(1, 16):
    print(f"Processing generation {generation_num}")
    generation_dir = os.path.join(base_dir, f'generation_{generation_num}')
    
    if not os.path.isdir(generation_dir):
        print(f"Generation {generation_num} not found")
        continue
    
    for individual_hash in os.listdir(generation_dir):
        individual_dir = os.path.join(generation_dir, individual_hash)
        
        metric_file = os.path.join(individual_dir, 'metrics.csv')
        if not os.path.isfile(metric_file):
            print(f"Metric file not found at {metric_file}")
            continue

        out_file = os.path.join(base_dir, 'out.csv')
        if not os.path.isfile(out_file):
            print(f"out.csv not found at {out_file}")
            continue

        try:
            metrics_df = pd.read_csv(metric_file)
            if 'epoch_num' not in metrics_df.columns:
                continue
            
            # Select the best epoch (minimum val_epoch_loss)
            best_epoch_row = metrics_df.loc[metrics_df['val_epoch_loss'].idxmin()]
            
            out_df = pd.read_csv(out_file)
            if 'genome' not in out_df.columns:
                print(f"Genome column not found in {out_file}")
                continue
            
            genome_value = out_df[out_df['hash'] == individual_hash]['genome'].iloc[0]
            
            best_epoch_row['generation'] = generation_num
            best_epoch_row['individual_hash'] = individual_hash
            best_epoch_row['genome'] = genome_value
            
            compiled_data_best_epoch.append(best_epoch_row)
        
        except Exception as e:
            print(f"Error processing {metric_file} or {out_file}: {e}")
            continue

compiled_df_best_epoch = pd.DataFrame(compiled_data_best_epoch)

compiled_df_best_epoch.to_csv("compiled_data_best_epoch.csv", index=False)