In [1]:
import os
from utils.Logger import Logger
import pandas as pd

In [None]:
log_folder = "./log/llama3.0/"

In [19]:
log_files = os.listdir(log_folder)
rs_files = [file for file in log_files if "rs" in file]
rs_files = [file for file in rs_files if "suffix" not in file]

ga_files = [file for file in log_files if "rs" not in file]
ga_files = [file for file in ga_files if "suffix" not in file]

# Merging execution chunks

In [None]:
# random search
output_file = "./log/llama3.0-rs.csv"
runs_list = []
suffixes_list = []
runs_per_file = 3
i = 0

def extract_row_info(row):
  return row.tolist()

for file in rs_files:
  run_info = pd.read_csv(log_folder + file)
  for run in run_info['run'].unique():
    run_info.replace({'run': run}, i * runs_per_file + run, inplace=True)
  
  suffixes_file = pd.read_csv(log_folder + file[:-4] + "-suffix.csv")
  for run in suffixes_file['run'].unique():
    suffixes_file.replace({'run': run}, i * runs_per_file + run, inplace=True)
  
  runs = run_info.apply(extract_row_info, axis=1)
  suffixes = suffixes_file.apply(extract_row_info, axis=1)
  runs_list += list(runs)
  suffixes_list += list(suffixes)
  
  i += 1
  
export_file = pd.DataFrame(runs_list, columns=run_info.columns)
export_file['run'] = export_file['run'].astype(int)
export_file['iteration'] = export_file['iteration'].astype(int)
export_file['sure_generated'] = export_file['sure_generated'].astype(int)
export_file.to_csv(output_file, index=False)
export_file = pd.DataFrame(suffixes_list, columns=suffixes_file.columns)
export_file['run'] = export_file['run'].astype(int)
export_file.to_csv(output_file[:-4] + "-suffix.csv", index=False)

In [None]:
# genetic algorithm
output_file = "./log/llama3.0-ga.csv"
runs_list = []
suffixes_list = []
runs_per_file = 3
i = 0

def extract_row_info(row):
  return row.tolist()

for file in ga_files:
  run_info = pd.read_csv(log_folder + file)
  for run in run_info['run'].unique():
    run_info.replace({'run': run}, i * runs_per_file + run, inplace=True)
  
  suffixes_file = pd.read_csv(log_folder + file[:-4] + "-suffix.csv")
  for run in suffixes_file['run'].unique():
    suffixes_file.replace({'run': run}, i * runs_per_file + run, inplace=True)
  
  runs = run_info.apply(extract_row_info, axis=1)
  suffixes = suffixes_file.apply(extract_row_info, axis=1)
  runs_list += list(runs)
  suffixes_list += list(suffixes)
  
  i += 1
  
export_file = pd.DataFrame(runs_list, columns=run_info.columns)
export_file['run'] = export_file['run'].astype(int)
export_file['iteration'] = export_file['iteration'].astype(int)
export_file['sure_count'] = export_file['sure_count'].astype(int)
export_file.to_csv(output_file, index=False)
export_file = pd.DataFrame(suffixes_list, columns=suffixes_file.columns)
export_file['run'] = export_file['run'].astype(int)
export_file.to_csv(output_file[:-4] + "-suffix.csv", index=False)

# File for transferability

In [20]:
data = []

In [None]:
# random search
i = 0

def extract_row_info(row, type):
  return [type, int(row['run']), row['fitness'], row['suffix']]

for file in rs_files:
  run_info = pd.read_csv(log_folder + file)
  for run in run_info['run'].unique():
    run_info.replace({'run': run}, i * runs_per_file + run, inplace=True)

  idx = run_info.groupby('run')['fitness'].idxmax()
  runs = run_info.loc[idx].reset_index(drop=True)
  
  suffixes_file = pd.read_csv(log_folder + file[:-4] + "-suffix.csv")
  for run in suffixes_file['run'].unique():
    suffixes_file.replace({'run': run}, i * runs_per_file + run, inplace=True)
  
  runs = pd.merge(runs, suffixes_file, on=['run'])
  runs = runs.apply(extract_row_info, axis=1, type="rs")
  data += list(runs)
  
  i += 1

In [None]:
# genetic algorithm
i = 0

def extract_row_info(row, type):
  return [type, int(row['run']), row['fitness_max'], row['suffix']]

for file in ga_files:
  run_info = pd.read_csv(log_folder + file)
  for run in run_info['run'].unique():
    run_info.replace({'run': run}, i * runs_per_file + run, inplace=True)

  idx = run_info.groupby('run')['iteration'].idxmax()
  runs = run_info.loc[idx].reset_index(drop=True)
  
  suffixes_file = pd.read_csv(log_folder + file[:-4] + "-suffix.csv")
  for run in suffixes_file['run'].unique():
    suffixes_file.replace({'run': run}, i * runs_per_file + run, inplace=True)
  
  runs = pd.merge(runs, suffixes_file, on=['run'])
  runs = runs.apply(extract_row_info, axis=1, type="ga")
  data += list(runs)
  
  i += 1

In [23]:
columns = ['type', 'run', 'fitness', 'suffix']
log = pd.DataFrame(data, columns=columns)
log['type'] = log['type'].astype(str)
log['run'] = log['run'].astype(int)
log['fitness'] = log['fitness'].astype(float)
log['suffix'] = log['suffix'].astype(str)

In [26]:
log.to_csv(output_file, index=False)

# Export converted suffixes

In [None]:
from utils.Chat import Chat
import pandas as pd
import ast

def get_suffix(suffix, chat):
    suffix = suffix.replace("|", ',')
    suffix = ast.literal_eval(suffix)
    adv_suffix = chat.detokenize([suffix])
    return adv_suffix[0]

transferability_info = "./transferability/llama3.0/self-transferability-info.csv"
output_file  = "./transferability/llama3.0/self-transferability-converted.csv"

data = pd.read_csv(transferability_info)
data['suffix'] = data['suffix'].astype(str)

model_name = "meta-llama/Llama-3.2-3B-Instruct"

device = 'cuda'
quantized = False
chat = Chat(model_name, device=device, quantized=quantized)

for idx,row in data.iterrows():
    suffix = get_suffix(row['suffix'])
    data.loc[idx, 'suffix'] = suffix
    
data.to_csv(output_file, index=False)