## Handling of results from a single run of the simulation

In [25]:
# Import libraries
import pandas as pd
import json
import os 
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload

In [None]:
# Define the parameters and print the command

# The settings of the current experiment
model = "llama3.1-8b"
temperature = 1
initial_examples_generated = 10
feedback_iterations = 10
one_shot = True
spec_type = "both" # choices=["natural", "formal", "both"]

# The input specifications
formal_specification_name = "formal-specification.h"
natural_language_specification_name = "natural-language-specification.h"
function_signature_name = "function-signature.h"

# Included prompt information
prompt_type = "one-shot" if one_shot else "zero-shot"

# Get the output folder name
output_folder_name = f"{model}-{initial_examples_generated}-{feedback_iterations}-{temperature}-{prompt_type}-{spec_type}"


command =   f"python3 main.py generate_code_folder" + \
            f" -d ../paper_problems/" + \
            f" -ieg {initial_examples_generated}"+ \
            f" -iter {feedback_iterations}"+ \
            f" -temp {temperature}"+ \
            f" -wpt 5"+ \
            f" -o ../output/{output_folder_name}"+ \
            f" -output-file generated_code.c"+ \
            f" -fsf {formal_specification_name}"+ \
            f" -nl {natural_language_specification_name}"+ \
            f" -sig {function_signature_name}"+ \
            f" -pt {prompt_type}"+ \
            f" -spectype {spec_type}" + \
            f" -model {model}"

print(f"output_folder: {output_folder_name}")
print(command) 

output_folder: llama3.1-8b-10-10-1-one-shot-both
python3 main.py generate_code_folder -d ../paper_problems/ -ieg 10 -iter 10 -temp 1 -wpt 5 -o ../output/llama3.1-8b-10-10-1-one-shot-both -output-file generated_code.c -fsf formal-specification.h -nl natural-language-specification.h -sig function-signature.h -pt one-shot -spectype both -model llama3.1-8b


In [18]:
# print all folders in the directory sorted by name
folders = os.listdir(f'../output/{output_folder_name}')

# Sort on number
folders.sort(key=lambda x: int(x.split('-')[0]))

# Create an empty dataframe that will be filled with info of the iterations
iteration_array = []

# Filter the folders if need be, only take ones higher than 300
# folders = [folder for folder in folders if int(folder.split('-')[0]) >= 427]

total_completions_used = []
total_tokens_used = []
total_completions = []
verification_time = []

# For each folder read the output and add it to the dataframe
for folder_name in folders:

    # Read the output given in the file 
    with open(f"../output/{output_folder_name}/{folder_name}/results.txt", 'r') as file:
        # Read the file which contains an array
        data = json.load(file)

    verified_goals = []
    passed_tests = []
    verified_goals_percentage = []
    passed_tests_percentage = []
    generated_code_length = []
    
    total_completions_used.append(data['total_completions_used'])
    total_tokens_used.append(data['total_tokens_used'])
    total_completions.append(data['total_completions_requested'])
    verification_time.append(data['total_time_taken_verification'])
    
    # Get all completions in the initial code generation and in each code improvement iteration
    completions = []
    for i in data["initial_code_generation_information"][0]['completions']:
        completions.append(i)
    
    for i in data["code_improvement_information"]:
        for j in i['completions']:
            completions.append(j)

    # for each iteration in the array add it to the dataframe
    for i in completions:
        verified_goals.append(i['verified_goals_count'])

        # If the goals is 0 / 0 the percentage is 0
        if (i['verified_goals_count'] == "0 / 0"):
            verified_goals_percentage.append(0)
        else:
            verified_goals_percentage.append(eval(i['verified_goals_count']))
        # If there is a compilation error the length is only 1
        if (len(i["test_information"]) == 1):
            passed_tests.append(f"{i['test_information']['summary']['passed']} / {i['test_information']['summary']['total']}")
            passed_tests_percentage.append(0)
        else:
            passed_tests.append(f"{i['test_information'][-1]['summary']['passed']} / {i['test_information'][-1]['summary']['total']}")
            passed_tests_percentage.append(i['test_information'][-1]['summary']['passed'] / i['test_information'][-1]['summary']['total'])

        # Get the length of the generated code at this iteration by counting the newlines
        generated_code_length.append(i['gpt_output'].count("\n"))

    # Add it as a column to the dataframe
    iteration_array.append(verified_goals)
    iteration_array.append(passed_tests)
    iteration_array.append(verified_goals_percentage)
    iteration_array.append(passed_tests_percentage)
    iteration_array.append(generated_code_length)

# Create a dataframe with the information of the iterations
df = pd.DataFrame(iteration_array)


In [19]:
print(df.T)

         0        1     2     3     4          5        6         7   \
0     0 / 0    0 / 0     0     0    10    27 / 28  26 / 34  0.964286   
1   21 / 21  30 / 30   1.0   1.0     7    17 / 18  21 / 34  0.944444   
2      None     None  None  None  None  112 / 123  29 / 34  0.910569   
3      None     None  None  None  None    37 / 38  33 / 34  0.973684   
4      None     None  None  None  None    25 / 26  21 / 34  0.961538   
..      ...      ...   ...   ...   ...        ...      ...       ...   
78     None     None  None  None  None       None     None      None   
79     None     None  None  None  None       None     None      None   
80     None     None  None  None  None       None     None      None   
81     None     None  None  None  None       None     None      None   
82     None     None  None  None  None       None     None      None   

          8     9   ...       65       66        67        68    69       70  \
0   0.764706     8  ...  37 / 38   7 / 34  0.973684  0.

## Work together with google sheets and google drive to store the information and results


In [20]:
# define the scope
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']

# add credentials to the account
creds = ServiceAccountCredentials.from_json_keyfile_name('../tmp/vecogen-1ec545f23e03.json', scope)

# authorize the clientsheet 
client = gspread.authorize(creds)

# get the instance of the Spreadsheet

# Print the sheets
sheet = client.open('Paper Results')

# get the template sheets
template_sheet_descriptive_name = "result_template"
template_sheet_descriptive = sheet.worksheet(template_sheet_descriptive_name)

# Copy the sheet called "template" to a new sheet
new_sheet_name = output_folder_name

result_sheet = sheet.duplicate_sheet(template_sheet_descriptive.id)
result_sheet.update_title(new_sheet_name)

# Change the order to the last sheet
sheet.batch_update({
    "requests": {
        "updateSheetProperties": {
            "properties": {
                "sheetId": result_sheet.id,
                "index": len(sheet.worksheets())  # Move to the last position
            },
            "fields": "index"
        }
    }
})

# Add the data to the sheet
worksheet = sheet.worksheet(new_sheet_name)

# Add the data to the sheet
start_cell = 'R50'
end_cell =  'ZZZ199'

# Convert DataFrame to list of lists
data_to_update = df.values.tolist()

# Update the range with the DataFrame values
worksheet.update(start_cell + ':' + end_cell, data_to_update)

worksheet.update_acell('C3', model)
worksheet.update_acell('C4', temperature)
worksheet.update_acell('C5', initial_examples_generated)
worksheet.update_acell('C6', feedback_iterations)
worksheet.update_acell('C7', spec_type)
worksheet.update_acell('C8', command)
worksheet.update_acell('C9', sum(verification_time))
worksheet.update_acell('C11', prompt_type)

# Create on array that has all the values
total_extra_data = []

for i in range(15):
    total_extra_data.append([total_tokens_used[i], total_completions_used[i], total_completions[i], verification_time[i]])
    total_extra_data.append(["", "", "", ""])
    total_extra_data.append(["", "", "", ""])
    total_extra_data.append(["", "", "", ""])
    total_extra_data.append(["", "", "", ""])

worksheet.update("L50:O199", total_extra_data)


  worksheet.update(start_cell + ':' + end_cell, data_to_update)
  worksheet.update("L50:O199", total_extra_data)


{'spreadsheetId': '18N_AtAxvDFzIaBw-eZGcjJy54zkE8kW_lD0UvPoYomQ',
 'updatedRange': "'llama-3.1-70b-10-10-1-zero-shot-both'!L50:O124",
 'updatedRows': 75,
 'updatedColumns': 4,
 'updatedCells': 300}

## Store the data in the google drive folder    

In [21]:
# Define the scope and authenticate using service account credentials
SCOPES = ['https://www.googleapis.com/auth/drive']
creds = Credentials.from_service_account_file('../tmp/vecogen-1ec545f23e03.json', scopes=SCOPES)

# Authenticate Google Drive API client
drive_service = build('drive', 'v3', credentials=creds)

def create_folder(name, parent_id=None):
    """Create a folder in Google Drive."""
    folder_metadata = {
        'name': name,
        'mimeType': 'application/vnd.google-apps.folder'
    }
    if parent_id:
        folder_metadata['parents'] = [parent_id]
    folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
    return folder.get('id')

def upload_file(file_path, parent_id):
    """Upload a file to Google Drive."""
    file_name = os.path.basename(file_path)
    media = MediaFileUpload(file_path, resumable=True)
    file_metadata = {
        'name': file_name,
        'parents': [parent_id]
    }
    file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
    return file.get('id')

def upload_folder(local_folder_path, parent_id=None):
    """Upload a folder to Google Drive, recursively including all files and subfolders."""
    folder_name = os.path.basename(local_folder_path)
    folder_id = create_folder(folder_name, parent_id)
    for item in os.listdir(local_folder_path):
        item_path = os.path.join(local_folder_path, item)
        if os.path.isdir(item_path):
            upload_folder(item_path, folder_id)
        else:
            upload_file(item_path, folder_id)

def list_folders():
    """List all folders in Google Drive."""
    query = "mimeType='application/vnd.google-apps.folder' and trashed=false"
    results = drive_service.files().list(q=query, spaces='drive', fields='nextPageToken, files(id, name)').execute()
    folders = results.get('files', [])

    if not folders:
        print('No folders found.')
    else:
        print('Folders:')
        for folder in folders:
            print(f"Name: {folder['name']}, ID: {folder['id']}")

# Define the local folder you want to upload and the Google Drive parent folder ID
local_folder_path = f'../output/{output_folder_name}'
drive_parent_folder_id = '1ZvsBlLV94f1TTG-2aIfeX_v86I4bOxq8'

# Start the folder upload process
upload_folder(local_folder_path, drive_parent_folder_id)

print(f'Folder {local_folder_path} uploaded to Google Drive folder with ID {drive_parent_folder_id}')

# Put the link in C10
worksheet.update_acell('C10', f"https://drive.google.com/drive/folders/{drive_parent_folder_id}")


Folder ../output/llama-3.1-70b-10-10-1-zero-shot-both uploaded to Google Drive folder with ID 1ZvsBlLV94f1TTG-2aIfeX_v86I4bOxq8


{'spreadsheetId': '18N_AtAxvDFzIaBw-eZGcjJy54zkE8kW_lD0UvPoYomQ',
 'updatedRange': "'llama-3.1-70b-10-10-1-zero-shot-both'!C10",
 'updatedRows': 1,
 'updatedColumns': 1,
 'updatedCells': 1}