## Handling of results from a single run of the simulation

In [2]:
# Import libraries
import pandas as pd
import json
import os 
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload

In [2]:
# Define the parameters and print the command

# The settings of the current experiment
model = "3.5"
temperature = 1
initial_examples_generated = 10
feedback_iterations = 10
reboot = 1000

# The dataset can be one of the following: solution-tied", "descriptive"
dataset = "solution-tied"
header_file_name = "specification-solution-tied.h"
spec_file_name = "specification-solution-tied.h"
prompt_type = "zero-shot"
extra_info = ""

# Get the output folder name
output_folder = f"{model}-{initial_examples_generated}-{feedback_iterations}-{temperature}-{dataset}{extra_info}-{prompt_type}"

# Print the command to generate the code
command = f"python3 main.py generate_code_folder -d ../no_loop_problems/ -ieg {initial_examples_generated} -iter {feedback_iterations} -temp {temperature}  -reboot {reboot} -wpt 5 -o ../output/{output_folder} -output-file generated_code.c -sfn {header_file_name} -fsf {spec_file_name} -pt {prompt_type} -model CodeLlama"

natural_language_only = False
if natural_language_only:
    command += " -nl"

print(f"output_folder: {output_folder}")
print(command)

output_folder: 3.5-10-10-1-solution-tied-zero-shot
python3 main.py generate_code_folder -d ../no_loop_problems/ -ieg 10 -iter 10 -temp 1  -reboot 1000 -wpt 5 -o ../output/3.5-10-10-1-solution-tied-zero-shot -output-file generated_code.c -sfn specification-solution-tied.h -fsf specification-solution-tied.h -pt zero-shot -model CodeLlama -nl


In [4]:
# print all folders in the directory sorted by name
folders = os.listdir(f'../output/{output_folder}')

# Sort on number
folders.sort(key=lambda x: int(x.split('-')[0]))

# Create an empty dataframe that will be filled with info of the iterations
iteration_array = []

# Filter the folders if need be, only take ones higher than 300
# folders = [folder for folder in folders if int(folder.split('-')[0]) >= 427]

total_completions_used = []
total_tokens_used = []
total_completions = []

# For each folder read the output and add it to the dataframe
for folder_name in folders:

    # Read the output given in the file 
    with open(f"../output/{output_folder}/{folder_name}/results.txt", 'r') as file:
        # Read the file which contains an array
        data = json.load(file)

    verified_goals = []
    passed_tests = []
    verified_goals_percentage = []
    passed_tests_percentage = []
    generated_code_length = []

    total_completions_used.append(data[-1]['total_completions_used'])
    total_tokens_used.append(data[-1]['total_tokens_used'])
    total_completions.append(data[-1]['total_completions'])

    # for each iteration in the array add it to the dataframe
    for i in data:
        verified_goals.append(i['verified_goals'])

        # If the goals is 0 / 0 the percentage is 0
        if (i['verified_goals'] == "0 / 0"):
            verified_goals_percentage.append(0)
        else:
            verified_goals_percentage.append(eval(i['verified_goals']))
        # If there is a compilation error the length is only 1
        if (len(i["test_information"]) == 1):
            passed_tests.append(f"{i['test_information']['summary']['passed']} / {i['test_information']['summary']['total']}")
            passed_tests_percentage.append(0)
        else:
            passed_tests.append(f"{i['test_information'][-1]['summary']['passed']} / {i['test_information'][-1]['summary']['total']}")
            passed_tests_percentage.append(i['test_information'][-1]['summary']['passed'] / i['test_information'][-1]['summary']['total'])

        # Get the length of the generated code at this iteration by counting the newlines
        generated_code_length.append(i['gpt_output'].count("\n"))

    # Add it as a column to the dataframe
    iteration_array.append(verified_goals)
    iteration_array.append(passed_tests)
    iteration_array.append(verified_goals_percentage)
    iteration_array.append(passed_tests_percentage)
    iteration_array.append(generated_code_length)

# Create a dataframe with the information of the iterations
df = pd.DataFrame(iteration_array)

## Work together with google sheets and google drive to store the information and results


In [5]:
# define the scope
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']

# add credentials to the account
creds = ServiceAccountCredentials.from_json_keyfile_name('../tmp/vecogen-6422006a7b0e.json', scope)

# authorize the clientsheet 
client = gspread.authorize(creds)

# get the instance of the Spreadsheet
sheet = client.open('Master Thesis Merlijn Results')

# get the template sheets
template_sheet_solution_tied_name = "template-solution-tied"
template_sheet_solution_tied = sheet.worksheet(template_sheet_solution_tied_name)
template_sheet_descriptive_name = "template-descriptive"
template_sheet_descriptive = sheet.worksheet(template_sheet_descriptive_name)

# Copy the sheet called "template" to a new sheet
new_sheet_name = f"{model}-{initial_examples_generated}-{feedback_iterations}-{temperature}-{dataset}"  + str(extra_info if extra_info != "" else "")

# Copy the template sheet to a new sheet
if (dataset == "solution-tied"):
    template_sheet = template_sheet_solution_tied
else:
    template_sheet = template_sheet_descriptive
   

result_sheet = sheet.duplicate_sheet(template_sheet.id)
result_sheet.update_title(new_sheet_name)

# Change the order to the last sheet
sheet.batch_update({
    "requests": {
        "updateSheetProperties": {
            "properties": {
                "sheetId": result_sheet.id,
                "index": len(sheet.worksheets())  # Move to the last position
            },
            "fields": "index"
        }
    }
})

# Add the data to the sheet
worksheet = sheet.worksheet(new_sheet_name)

# Add the data to the sheet
start_cell = 'R50'

if (dataset == "solution-tied"):
    end_cell =  'ZZZ199'
else:
    end_cell =  'ZZZ199'

# Convert DataFrame to list of lists
data_to_update = df.values.tolist()

# Update the range with the DataFrame values
worksheet.update(start_cell + ':' + end_cell, data_to_update)

worksheet.update_acell('C3', model)
worksheet.update_acell('C4', temperature)
worksheet.update_acell('C5', initial_examples_generated)
worksheet.update_acell('C6', feedback_iterations)
worksheet.update_acell('C7', dataset)
worksheet.update_acell('C8', command)
worksheet.update_acell('C9', dataset + " " + extra_info)


# Create on array that has all the values
total_extra_data = []

for i in range(int(df.shape[0] / 5)):
    total_extra_data.append([total_tokens_used[i], total_completions_used[i], total_completions[i]])
    total_extra_data.append(["", "", ""])
    total_extra_data.append(["", "", ""])
    total_extra_data.append(["", "", ""])
    total_extra_data.append(["", "", ""])

worksheet.update("L50:N199", total_extra_data)


  worksheet.update(start_cell + ':' + end_cell, data_to_update)
  worksheet.update("L50:N199", total_extra_data)


{'spreadsheetId': '1QgsGGnlo6DWp0rS5wwbnouUjDsMZSUqd3fofFc2aKAQ',
 'updatedRange': "'3.5-10-10-1-solution-tied-nl9'!L50:N199",
 'updatedRows': 150,
 'updatedColumns': 3,
 'updatedCells': 450}

## Store the data in the google drive folder    

In [6]:
# Define the scope and authenticate using service account credentials
SCOPES = ['https://www.googleapis.com/auth/drive']
creds = Credentials.from_service_account_file('../tmp/vecogen-6422006a7b0e.json', scopes=SCOPES)

# Authenticate Google Drive API client
drive_service = build('drive', 'v3', credentials=creds)

def create_folder(name, parent_id=None):
    """Create a folder in Google Drive."""
    folder_metadata = {
        'name': name,
        'mimeType': 'application/vnd.google-apps.folder'
    }
    if parent_id:
        folder_metadata['parents'] = [parent_id]
    folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
    return folder.get('id')

def upload_file(file_path, parent_id):
    """Upload a file to Google Drive."""
    file_name = os.path.basename(file_path)
    media = MediaFileUpload(file_path, resumable=True)
    file_metadata = {
        'name': file_name,
        'parents': [parent_id]
    }
    file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
    return file.get('id')

def upload_folder(local_folder_path, parent_id=None):
    """Upload a folder to Google Drive, recursively including all files and subfolders."""
    folder_name = os.path.basename(local_folder_path)
    folder_id = create_folder(folder_name, parent_id)
    for item in os.listdir(local_folder_path):
        item_path = os.path.join(local_folder_path, item)
        if os.path.isdir(item_path):
            upload_folder(item_path, folder_id)
        else:
            upload_file(item_path, folder_id)

def list_folders():
    """List all folders in Google Drive."""
    query = "mimeType='application/vnd.google-apps.folder' and trashed=false"
    results = drive_service.files().list(q=query, spaces='drive', fields='nextPageToken, files(id, name)').execute()
    folders = results.get('files', [])

    if not folders:
        print('No folders found.')
    else:
        print('Folders:')
        for folder in folders:
            print(f"Name: {folder['name']}, ID: {folder['id']}")

# Define the local folder you want to upload and the Google Drive parent folder ID
local_folder_path = f'../output/{output_folder}'
drive_parent_folder_id = '1ZvsBlLV94f1TTG-2aIfeX_v86I4bOxq8'

# Start the folder upload process
upload_folder(local_folder_path, drive_parent_folder_id)

print(f'Folder {local_folder_path} uploaded to Google Drive folder with ID {drive_parent_folder_id}')

# Put the link in C10



Folder ../output/3.5-10-10-1-solution-tied-nl9-one-shot uploaded to Google Drive folder with ID 1ZvsBlLV94f1TTG-2aIfeX_v86I4bOxq8
