In [None]:
from openai import OpenAI
import os
import urllib.request
import subprocess
import time
import keyboard
from abc import ABC, abstractmethod
import json
import http.server
from http.server import HTTPServer
import threading
import socket
import platform
import win32com.client
import psutil
import re
import sys
import paramiko

# CONFIGURATION:

# Set to False after the first run to avoid repeating the setup (Set to False if you changed any VM or Docker relevant configuration)
FIRST_TIME_SETUP = True

# General
MAX_NUMBER_OF_PROBLEMS_TO_TEST_IN_BATCH = 10 # To separate the testing into batches to avoid long running times
EXPERIMENT_NAME = 'Experiment 1' # If there are multiple experiments, their results will be stored in the same table, so this is used to differentiate them
START_FROM_PROBLEM_NAME = None # Set this if the script didn't finish or batches are being used and you want to continue from a specific problem without losing the collected results from previous problems
ATTEMPTS = 2 # Number of attempts to generate a solution for a problem

# VM setup:
VM_NAME = 'debian_vbox'
DEBIAN_ISO = 'https://cdimage.debian.org/debian-cd/current/amd64/iso-cd/debian-12.9.0-amd64-netinst.iso' #'https://cdimage.debian.org/mirror/cdimage/archive/12.1.0/i386/iso-cd/debian-12.1.0-i386-netinst.iso'
ISO_PATH = 'iso'
VM_FOLDER = 'debian'
SHARED_FOLDER = 'scripts'
PRESEED_FOLDER = 'preseed'
PRESEED_PORT = 8080 # If this port is being used already, it can be changed
# IMPORTANT: Change the guest additions url to the version of VirtualBox you are using
GUEST_ADDITIONS_ISO_URL = 'https://download.virtualbox.org/virtualbox/7.1.6/VBoxGuestAdditions_7.1.6.iso'

# Script generation:
TEMPERATURES = [0, 0.2, 0.7]
TOP_P = [0.1, 0.4, 0.9]
NOT_A_BASH = 'NOT_A_BASH_SCRIPT'
SYSTEM_CONTEXT = """You answer requests with only a block of Bash code for Debian
Linux to achieve the requested effect. If prompted again with issues about the
code, you provide an improved code. You assume that you are working as
root. You may use any utilities, that can be installed available via `apt install`, assume that they are already installed."""
KEYS_DIR = 'apiKeys'
# List of LLMs to test (OpenAI models don't require a url, rest are specified)
MODELS = [
    {'name': 'deepseek-r1', 'keyFile': 'llama.key', 'url': 'https://api.llama-api.com'},
    {'name': 'deepseek-v3', 'keyFile': 'llama.key', 'url': 'https://api.llama-api.com'},
    {'name': 'gpt-3.5-turbo', 'keyFile': 'openai.key', 'url': None},
    {'name': 'gpt-4o', 'keyFile': 'openai.key', 'url': None},
    {'name': 'o1-mini', 'keyFile': 'openai.key', 'url': None},
    {'name':'llama3.3-70b', 'keyFile': 'llama.key', 'url':'https://api.llama-api.com'},
    {'name':'qwen/qwen2.5-coder-32b-instruct', 'keyFile': 'nvidia.key', 'url':'https://integrate.api.nvidia.com/v1'},
]

for model in MODELS:
    with open(f'{KEYS_DIR}/{model["keyFile"]}', 'r') as f:
        model['key'] = f.read().strip()


# Testing:
HOST_SCRIPTS_DIR = 'scripts'
SHELLCHECK_VM_SCRIPTS_DIR = '/usr/scripts' # Script path in the shellcheck Docker container
DEBIAN_VM_SCRIPTS_DIR = f'/media/sf_{SHARED_FOLDER}' # Shared folder in the VM
START_CHECKPOINT = 'start_checkpoint' # Debian VM checkpoint used in the beggining of the setup for each test case, also to prevent having to repeat Debian installation
# Utilities that will be needed to setup the tests (all are installed before START_CHECKPOINT to speed up tests)
STARTING_UTILITIES = ['git', 'openssh-server', 'apt-file', 'jq']
SKIP_CORRECT_SCRIPTS = True # If set to True, the prewritten correct scripts will not be tested, this can save time if they were already executed before but is not recommended otherwise since they insure the tests are not faulty

# BASIC FUNCTIONS:
def run_command(command):
    res = subprocess.run(command,
                         shell=True, 
                         stderr=subprocess.PIPE, 
                         stdout=subprocess.PIPE)
    if res.returncode != 0 and ('docker' not in command or res.returncode != 1):
        raise Exception(f'Error running command (return code {res.returncode}): {res.stderr}')
    
    return type('', (object,),{
        "stdout": res.stdout.decode() if res.stdout != None else None,
        "stderr": res.stderr.decode() if res.stderr != None else None
    })

cached_ip = None
def debian_exec (*commands, raise_errors=True):
    # Initialize SSH client
    client = paramiko.SSHClient()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())

    # Connect to the VM
    grep_utility = 'findstr' if platform.system() == 'Windows' else 'grep'
    attempts = 2
    ip = None
    exception = None
    while ip == None and attempts > 0:
        try:
            if cached_ip != None:
                ip = cached_ip
            else:
                ip = re.search("'([0-9\.]+)'", run_command(f'VBoxManage guestproperty enumerate "debian_vbox" | {grep_utility} IP').stdout).group(1)
            client.connect(ip, username='root', password='preseed', timeout=10)
        except Exception as e:
            ip = None
            cached_ip = None
            attempts -= 1
            time.sleep(5)
            exception = e
    if ip == None:
        raise exception
    
    cached_ip = ip

    _, stdout, stderr = client.exec_command(' && '.join(commands))
    exit_code = stdout.channel.recv_exit_status()
    if raise_errors and exit_code != 0:
        client.close()
        raise Exception(f'Error running command {commands}: (exit code: {exit_code}) {stderr.read().decode()}')
    
    ret = type('', (object,),{
        "stdout": stdout.read().decode() if stdout != None else None,
        "stderr": stderr.read().decode() if stderr != None else None
    })

    client.close()

    return ret

def save_script (script_name, script_content):
    with open(f'{HOST_SCRIPTS_DIR}/{script_name.replace("/", "_")}.sh', 'w', newline='\n') as f:
        f.write(script_content.replace('\r\n', '\n'))

def read_script (script_name):
    if not os.path.exists(f'{HOST_SCRIPTS_DIR}/{script_name.replace("/", "_")}.sh'):
        return None
    with open(f'{HOST_SCRIPTS_DIR}/{script_name.replace("/", "_")}.sh', 'r', newline='\n') as f:
        return f.read()
    
def append_to_script_json(obj):
    curr = None
    if os.path.exists(f'{HOST_SCRIPTS_DIR}/scripts.json'):
        with open(f'{HOST_SCRIPTS_DIR}/scripts.json', 'r') as f:
            curr = json.load(f)
            curr.append(obj)
    else:
        curr = [obj]
    
    with open(f'{HOST_SCRIPTS_DIR}/scripts.json', 'w') as f:
        json.dump(curr, f)

def get_script_data_from_json():
    if os.path.exists(f'{HOST_SCRIPTS_DIR}/scripts.json'):
        with open(f'{HOST_SCRIPTS_DIR}/scripts.json', 'r') as f:
            return json.load(f)
    return []

def debian_poweroff():
    try:
        run_command(f'VBoxManage controlvm {VM_NAME} poweroff')
        time.sleep(1) # Wait for VM to power off
    except:
        pass # VM is not running

def debian_exec_script (directory, script_name):
    if not os.path.exists(f'{HOST_SCRIPTS_DIR}/{script_name}.sh'):
        raise Exception(f'Script {script_name}.sh does not exist in {HOST_SCRIPTS_DIR}')

    return debian_exec (f'cd {directory}', 
                        f'bash {DEBIAN_VM_SCRIPTS_DIR}/{script_name}.sh',
                        raise_errors=False)

def debian_checkpoint (checkpoint_name):
    try:
        # Delete the checkpoint if it already exists
        run_command(f'VBoxManage snapshot {VM_NAME} delete {checkpoint_name}')
    except:
        pass
    run_command(f'VBoxManage snapshot {VM_NAME} take {checkpoint_name}')

def debian_startup():
    run_command(f'VBoxManage startvm "{VM_NAME}" --type headless')
    time.sleep(7)


def debian_checkpoint_reset (checkpoint_name):
    debian_poweroff()
    run_command(f'VBoxManage snapshot {VM_NAME} restore {checkpoint_name}')
    debian_startup()
    
def debian_install_utilities():
    # apt-update is already run from the preseed file
    for utility in STARTING_UTILITIES:
        debian_exec(f'DEBIAN_FRONTEND="noninteractive" apt-get install -y {utility}')
    
    debian_exec(f'apt-file update') # For automatic installation of missing utilities used in scripts

def docker_exec (container, command):
    return run_command(f'docker exec {container} {command}')

def shellcheck_exec (command):
    return docker_exec('shellcheck', f'bash -c "{command}"')

def shellcheck_copy_scripts():
    return run_command(f'docker cp {HOST_SCRIPTS_DIR}/. shellcheck:{SHELLCHECK_VM_SCRIPTS_DIR}')

def shellcheckRunOnScript (script_name):
    if '#!' in read_script(script_name):
        res = shellcheck_exec(f'shellcheck --format=json {SHELLCHECK_VM_SCRIPTS_DIR}/{script_name}.sh').stdout
    else:
        res = shellcheck_exec(f'shellcheck --format=json <(echo \\"#!/bin/bash\\n\\"; cat {SHELLCHECK_VM_SCRIPTS_DIR}/{script_name}.sh)').stdout
    return json.loads(res)
def postgresExec (command):
    return docker_exec('postgres_db', command)

def saveResultRow (
    experiment_name,
    problem_name,
    problem_category,
    problem_complexity,
    attempt,
    generated_code,
    temperature,
    topp,
    model_name,
    case_name,
    case_complexity,
    error,
    is_correct,
    result,
    sh_style_cnt,
    sh_info_cnt,
    sh_warning_cnt,
    sh_error_cnt,
    sh_output,
    score,
    tokens_used,
    ms_to_generate
):
    query = f"""
    INSERT INTO ExperimentResults (
        experiment_name, 
        problem_name, 
        problem_category, 
        problem_complexity, 
        attempt, 
        generated_code, 
        temperature, 
        topp, 
        model_name, 
        case_name, 
        case_complexity, 
        error, 
        is_correct, 
        result, 
        sh_style_cnt, 
        sh_info_cnt, 
        sh_warning_cnt, 
        sh_error_cnt, 
        sh_output, 
        score,
        tokens_used,
        ms_to_generate
    ) VALUES (
        '{experiment_name.replace("'", "''")}', 
        '{problem_name.replace("'", "''")}', 
        '{problem_category.replace("'", "''")}', 
        {problem_complexity}, 
        {attempt}, 
        '{generated_code.replace("'", "''")}', 
        {temperature}, 
        {topp}, 
        '{model_name.replace("'", "''")}', 
        '{case_name.replace("'", "''")}', 
        {case_complexity}, 
        '{error.replace("'", "''")}', 
        {is_correct}, 
        '{result.replace("'", "''")}', 
        {sh_style_cnt}, 
        {sh_info_cnt}, 
        {sh_warning_cnt}, 
        {sh_error_cnt}, 
        '{sh_output.replace("'", "''")}', 
        {score},
        {tokens_used},
        {ms_to_generate}
    );
    """.replace('\n', ' ').replace('    ', ' ').replace('  ', ' ')
    postgresExec(f'psql -U superset_user -d superset_db -c "{query}"')

def clearProblem(experiment_name, problem_name):
    query = f"""
    DELETE FROM ExperimentResults
    WHERE experiment_name = '{experiment_name}' AND problem_name = '{problem_name}';
    """.replace('\n', ' ').replace('    ', ' ').replace('  ', ' ')
    postgresExec(f'psql -U superset_user -d superset_db -c "{query}"')

def resultExists(experiment_name, problem_name, model_name, case_name, topp, temperature, attempt):
    query = f"""
    SELECT COUNT(*) FROM ExperimentResults
    WHERE experiment_name = '{experiment_name}' AND problem_name = '{problem_name}' AND model_name = '{model_name}' AND case_name = '{case_name}' AND topp = {topp} AND temperature = {temperature} AND attempt = {attempt};
    """.replace('\n', ' ').replace('    ', ' ').replace('  ', ' ')
    return int(postgresExec(f'psql -U superset_user -d superset_db -t -c "{query}"').stdout.strip()) > 0

def setup_vbox_api():
    print('Starting virtualbox http server to access the SOAP API...')
    def start_vbox_server():
        run_command('vboxwebsrv -H 127.0.0.1 -v')
    vbox_server_thread = threading.Thread(target=start_vbox_server, daemon=True)
    vbox_server_thread.start()


  ip = re.search("'([0-9\.]+)'", run_command(f'VBoxManage guestproperty enumerate "debian_vbox" | {grep_utility} IP').stdout).group(1)


# Setup external tools
### Requirements:
1. Installed VirtualBox (scripted was tested with version 7.1.6)
2. Installed and running Docker Desktop (script was tested with version 4.37.0)
3. Internet connection
4. ~15GB free space
5. Windows OS (though only small changes are needed to adapt the code to be run on a linux/mac system)

### Setup Debian VM in VirtualBox:

In [2]:
# Create the directory if it does not exist
os.makedirs(ISO_PATH, exist_ok=True)

if not os.path.isfile(f'{ISO_PATH}/{VM_NAME}.iso'):
    print('Downloading Debian ISO...')
    urllib.request.urlretrieve(DEBIAN_ISO, f'{ISO_PATH}/{VM_NAME}.iso')
    print('Download complete!')
else:
    print('Debian ISO already downloaded, skipping')

if not os.path.isfile(f'{ISO_PATH}/VBoxGuestAdditions.iso'):
    print('Downloading VirtualBox Guest Additions ISO...')
    urllib.request.urlretrieve(GUEST_ADDITIONS_ISO_URL, f'{ISO_PATH}/VBoxGuestAdditions.iso')
    print('Download complete!')
else:
    print('VirtualBox Guest Additions ISO already downloaded, skipping')

Debian ISO already downloaded, skipping
VirtualBox Guest Additions ISO already downloaded, skipping


In [19]:
if FIRST_TIME_SETUP:
    def vm_exists(vm_name):
        vms = run_command('VBoxManage list vms').stdout
        return f'"{vm_name}"' in vms  # VBoxManage encloses VM names in quotes

    if vm_exists(VM_NAME):
        # Delete the VM if it already exists
        debian_poweroff()
        run_command(f'VBoxManage unregistervm {VM_NAME} --delete')
    # Create a new VM
    run_command(f'VBoxManage createvm --name "{VM_NAME}" --ostype Debian_64 --register')

    # Set VM settings (VM specs may be adjusted as needed, though lowering them has been observed to cause instability in the VM)
    run_command(f'VBoxManage modifyvm "{VM_NAME}" --memory 8192 --cpus 7 --vram 64 --graphicscontroller VMSVGA')

    # Create virtual hard disk (~15GB)
    os.makedirs(f'{VM_FOLDER}', exist_ok=True)
    run_command(f'VBoxManage createmedium disk --filename "{VM_FOLDER}/{VM_NAME}.vdi" --size 15000')

    # Attach storage controller
    run_command(f'VBoxManage storagectl "{VM_NAME}" --name "SATA Controller" --add sata --controller IntelAhci')

    # Attach hard disk
    run_command(f'VBoxManage storageattach "{VM_NAME}" --storagectl "SATA Controller" --port 0 --device 0 --type hdd --medium "{VM_FOLDER}/{VM_NAME}.vdi"')

    # Attach Debian ISO
    run_command(f'VBoxManage storagectl "{VM_NAME}" --name "IDE Controller" --add ide')
    run_command(f'VBoxManage storageattach "{VM_NAME}" --storagectl "IDE Controller" --port 1 --device 0 --type dvddrive --medium "{ISO_PATH}/{VM_NAME}.iso"')

    # Add a second sata controller for guest additions iso (because IDE Controller is already used)
    run_command(f'VBoxManage storageattach "{VM_NAME}" --storagectl "SATA Controller" --port 1 --device 0 --type dvddrive --medium "{ISO_PATH}/VBoxGuestAdditions.iso"')

    run_command(f'VBoxManage modifyvm "{VM_NAME}" --biosbootmenu disabled')

    # Add scripts as shared folder
    if not os.path.exists(SHARED_FOLDER):
        os.makedirs(SHARED_FOLDER)
    run_command(f'VBoxManage sharedfolder add "{VM_NAME}" --name "scripts" --hostpath "{SHARED_FOLDER}" --automount')

    # Set only one network adapter for the NAT connection
    def get_network_adapter_full_name(interface):
        # Retrieve the full name of a network adapter.
        if platform.system() == "Windows" and win32com:
            objWMIService = win32com.client.Dispatch("WbemScripting.SWbemLocator").ConnectServer(".", "root\cimv2")
            adapters = objWMIService.ExecQuery("SELECT * FROM Win32_NetworkAdapter")
            for adapter in adapters:
                if adapter.NetConnectionID == interface:
                    return adapter.Name
        return interface  # Fallback to the interface name if lookup fails

    def get_active_network_adapter():
        # Find a network adapter that has internet access
        for interface, addrs in psutil.net_if_addrs().items():
            for addr in addrs:
                if addr.family == socket.AF_INET:  # IPv4 addresses only
                    try:
                        # Create a socket using this adapter to test connectivity
                        test_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                        test_socket.bind((addr.address, 0))
                        test_socket.connect(("8.8.8.8", 80))
                        test_socket.close()
                        return get_network_adapter_full_name(interface)  # Return full adapter name
                    except Exception:
                        pass
        return None

    active_adapter = get_active_network_adapter()
    if active_adapter == None:
        raise "Network connected adapter not found!"

    run_command(f'VBoxManage modifyvm "{VM_NAME}" --nic1 bridged')
    run_command(f'VBoxManage modifyvm "{VM_NAME}" --bridgeadapter1 "{active_adapter}"')

    # Start the VM
    print("Starting VM...")
    debian_startup()
    time.sleep(7) # Make sure the boot menu has time to open
    print('Debian started. Installing...')
    
    def debian_enter_input(command):
        # Enter input in the VM (as if from keyboard), useful before guest additions has been installed
        # VirtualBox uses keyboard scancode set 1 (https://users.utcluj.ro/~baruch/sie/labor/PS2/Scan_Codes_Set_1.htm)
        def get_text_hex(scancode):
            text = hex(scancode)[2:].lower()
            if len(text) % 2 == 1:
                text = '0' + text
            if len(text) > 2:
                text = ' '.join([text[i:i+2] for i in range(0, len(text), 2)])
            return text
        
        scan_codes = []
        for char in command:
            scan_code_numeric = keyboard.key_to_scan_codes(char.lower())[0]

            needs_shift = False
            if char.isupper() or char in '~!@#$%^&*()_+{}|:"<>?':
                scan_codes.append('2a') # Press left shift
                needs_shift = True

            scan_codes.append(get_text_hex(scan_code_numeric))
            scan_codes.append(get_text_hex(scan_code_numeric | 0x80)) # Release key

            if needs_shift:
                scan_codes.append('aa') # Release left shift

        run_command(f'VBoxManage controlvm {VM_NAME} keyboardputscancode {" ".join(scan_codes)}')
        run_command(f'VBoxManage controlvm {VM_NAME} keyboardputscancode 1c 9c') # Click Enter

    def navigate_down(times):
        for _ in range(times):
            run_command(f'VBoxManage controlvm {VM_NAME} keyboardputscancode e0 50 e0 d0') # Click down
            time.sleep(0.2)
    
    # Navigate "Advanced options"
    navigate_down(2)
    run_command(f'VBoxManage controlvm {VM_NAME} keyboardputscancode 1c 9c') # Click Enter

    # Navigate "Automated install"
    navigate_down(6)
    run_command(f'VBoxManage controlvm {VM_NAME} keyboardputscancode 1c 9c') # Click Enter

    # Start local http server to host the preseed file

    # Get host address in local network
    def get_ipv4_address():
        with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
            s.connect(("8.8.8.8", 80))  # Connect to an external server
            return s.getsockname()[0]
    host_ip = get_ipv4_address()

    # Start server on a background thread since server_forever is synchronous
    server = HTTPServer(("0.0.0.0", PRESEED_PORT), http.server.SimpleHTTPRequestHandler)
    current_dir = os.getcwd()
    os.chdir(PRESEED_FOLDER)
    try:
        def start_preseed_http_server():
            server.serve_forever()
        server_thread = threading.Thread(target=start_preseed_http_server, daemon=True)
        server_thread.start()

        time.sleep(100) # Wait for preseed prompt to load
        debian_enter_input(f'http://{host_ip}:{PRESEED_PORT}/preseed.cfg') # Virtual box host address IP
        time.sleep(600) # Wait for the installation
    except Exception as e:
        server.shutdown()
        os.chdir(current_dir)
        raise e
    server.shutdown()
    os.chdir(current_dir)

    print('OS installed! Installing utilities...')
    debian_install_utilities()
    debian_checkpoint(START_CHECKPOINT)
else:
    print('Restoring VM checkpoint...')
    debian_checkpoint_reset(START_CHECKPOINT)
    
print('VM is ready to use!')

  objWMIService = win32com.client.Dispatch("WbemScripting.SWbemLocator").ConnectServer(".", "root\cimv2")


Restoring VM checkpoint...
VM is ready to use!


### Setup the dockerized tools (ShellCheck, Postgres DB and Apache Superset):

In [7]:
%%cmd
# This won't delete data since it's kept on host (via Docker's volumes)
docker compose down
docker compose build
docker compose up -d

Microsoft Windows [Version 10.0.26100.3194]
(c) Microsoft Corporation. All rights reserved.

c:\Users\Banknote\Documents\1_Mani Dokumenti\bakalaura darbs\kods\Project># This won't delete data since it's kept on host (via Docker's volumes)


'#' is not recognized as an internal or external command,
operable program or batch file.



c:\Users\Banknote\Documents\1_Mani Dokumenti\bakalaura darbs\kods\Project>docker compose down


 Container shellcheck  Stopping
 Container superset  Stopping
 Container shellcheck  Stopped
 Container shellcheck  Removing
 Container shellcheck  Removed
 Container superset  Stopped
 Container superset  Removing
 Container superset  Removed
 Container postgres_db  Stopping
 Container postgres_db  Stopped
 Container postgres_db  Removing
 Container postgres_db  Removed
 Network project_default  Removing
 Network project_default  Removed



c:\Users\Banknote\Documents\1_Mani Dokumenti\bakalaura darbs\kods\Project>docker compose build


 Service shellcheck  Building
 Service postgres  Building


#0 building with "desktop-linux" instance using docker driver

#1 [postgres internal] load build definition from Dockerfile_postgres
#1 transferring dockerfile: 381B 0.0s done
#1 DONE 0.0s

#2 [shellcheck internal] load build definition from Dockerfile_shellcheck
#2 transferring dockerfile: 195B 0.0s done
#2 DONE 0.0s

#3 [postgres internal] load metadata for docker.io/library/postgres:latest
#3 ...

#4 [shellcheck auth] library/alpine:pull token for registry-1.docker.io
#4 DONE 0.0s

#5 [postgres auth] library/postgres:pull token for registry-1.docker.io
#5 DONE 0.0s

#6 [shellcheck internal] load metadata for docker.io/library/alpine:latest
#6 DONE 1.2s

#3 [postgres internal] load metadata for docker.io/library/postgres:latest


 Service shellcheck  Built


#3 ...

#7 [shellcheck internal] load .dockerignore
#7 transferring context: 2B done
#7 DONE 0.0s

#8 [shellcheck 1/2] FROM docker.io/library/alpine:latest@sha256:a8560b36e8b8210634f77d9f7f9efd7ffa463e380b75e2e74aff4511df3ef88c
#8 DONE 0.0s

#9 [shellcheck 2/2] RUN apk add --no-cache     shellcheck     bash
#9 CACHED

#10 [shellcheck] exporting to image
#10 exporting layers done
#10 writing image sha256:d722c8ac82cef9dfb46e87e378992d7b1ca3b255a4be8be722f5fd5e5d3f0bf0 done
#10 naming to docker.io/library/sh-img done
#10 DONE 0.0s

#11 [shellcheck] resolving provenance for metadata file
#11 DONE 0.0s

#3 [postgres internal] load metadata for docker.io/library/postgres:latest
#3 DONE 1.7s

#12 [postgres internal] load .dockerignore
#12 transferring context: 2B done
#12 DONE 0.0s

#13 [postgres internal] load build context
#13 transferring context: 30B done
#13 DONE 0.0s

#14 [postgres 1/2] FROM docker.io/library/postgres:latest@sha256:1c47b7139517eeb5e9747882983297a745517c3d70d2451e376f9f

 Service postgres  Built
 Service superset  Building


#17 DONE 0.0s

#18 [superset internal] load build definition from Dockerfile_superset
#18 transferring dockerfile: 1.75kB done
#18 DONE 0.0s

#19 [superset internal] load metadata for docker.io/apache/superset:4.1.1
#19 ...

#20 [superset auth] apache/superset:pull token for registry-1.docker.io
#20 DONE 0.0s

#19 [superset internal] load metadata for docker.io/apache/superset:4.1.1
#19 DONE 1.0s


 Service superset  Built



#21 [superset internal] load .dockerignore
#21 transferring context: 2B done
#21 DONE 0.0s

#22 [superset 1/2] FROM docker.io/apache/superset:4.1.1@sha256:1d1fdaaeb19ce9cdba71620ee1cc6117d73813b2f3b422ce5a1bf752c247b7c0
#22 DONE 0.0s

#23 [superset 2/2] RUN apt-get update &&     apt-get -y install libpq-dev gcc &&     pip install --no-cache-dir Pillow &&     pip install --no-cache-dir psycopg2	&&     superset db upgrade &&     superset init &&     superset fab create-admin         --username "admin"         --password "admin"         --firstname "Admin"         --lastname "User"         --email "admin@example.com"
#23 CACHED

#24 [superset] exporting to image
#24 exporting layers done
#24 writing image sha256:d5c3bf4bf437ec3c5b7e397123f96dfcae2b1eaa827d63db42d12dd34b01380f done
#24 naming to docker.io/library/superset-img done
#24 DONE 0.0s

#25 [superset] resolving provenance for metadata file
#25 DONE 0.0s

c:\Users\Banknote\Documents\1_Mani Dokumenti\bakalaura darbs\kods\Project>do

 Network project_default  Creating
 Network project_default  Created
 Container postgres_db  Creating
 Container shellcheck  Creating
 Container shellcheck  Created
 Container postgres_db  Created
 Container superset  Creating
 Container superset  Created
 Container postgres_db  Starting
 Container shellcheck  Starting
 Container postgres_db  Started
 Container superset  Starting
 Container shellcheck  Started
 Container superset  Started



c:\Users\Banknote\Documents\1_Mani Dokumenti\bakalaura darbs\kods\Project>

# Script generation implementation

In [20]:
def extract_bash_code(text):
    pattern = re.compile(r'(?:bash|sh)```(.*?)```', re.DOTALL)
    matches1 = re.finditer(pattern, text)
    pattern = re.compile(r'```(?:bash|sh)\n(.*?)```', re.DOTALL)
    matches2 = re.finditer(pattern, text)

    results = [s[1].strip('\n') for s in sorted([(match.start(), match.group(1)) for match in matches1] + [(match.start(), match.group(1)) for match in matches2])]
    ret = "\n".join(results)

    # If no code marked with bash or sh is found try to extract all code blocks
    if len(ret) == 0:	
        pattern = re.compile(r'```(.*?)```', re.DOTALL)
        matches = pattern.findall(text)
        ret = "\n".join([match.strip('\n') for match in matches])
    
    return ret

'''
previous_conversation = [
    {
        'ai_answer': 'string',
        'issues': 'string'
    },
    ...
]
'''
def generate_script(problem_text, model, temp, top_p, previous_conversation):
    model_is_o1 = 'o1' in model['name']

    # o1 doesn't support system role
    system_role = 'system' if not model_is_o1 else 'user'
    
    messages = [
        {
            'role': system_role, 
            'content': SYSTEM_CONTEXT
        },
        {
            'role': 'user',
            'content': problem_text
        },
    ]

    for prev in previous_conversation:
        messages.append({
            'role': 'assistant',
            'content': prev['ai_answer']
        })
        
        messages.append({
            'role': 'user',
            'content': prev['issues']
        })

    attempts_remaining = 3
    completion = None
    exception = None
    while attempts_remaining > 0 and completion == None: 
        try:
            client = OpenAI(api_key=model['key'], base_url=model['url'])
            completion = client.chat.completions.create(
                model=model['name'],
                messages=messages,
                temperature=temp,
                top_p=top_p
            )

            if isinstance(completion, list) and 'error' in completion[0].model_extra:
                raise f'Error generating script: {completion[0].model_etxra["error"]}'
        except Exception as e:
            exception = e
            attempts_remaining -= 1
            time.sleep(5)
            completion = None

    if completion == None:
        raise exception

    tokens_used = completion.usage.completion_tokens

    # Get rid of formatting around the script
    # If no bash script is found, return NOT_A_BASH
    generated_code = extract_bash_code(completion.choices[0].message.content)
    if len(generated_code) == 0:
        return NOT_A_BASH, tokens_used
    else:
        return generated_code, tokens_used


# Generate all scripts required for the experiment
def generate_scripts_all(problem_text):
    results = []
    for model in MODELS:
        model_is_o1 = 'o1' in model['name']

        if not model_is_o1:
            for temp in TEMPERATURES:
                for top_p in TOP_P:
                    # Check if the script is already generated
                    script_found = False
                    for script in SCRIPTS_ALREADY_GENERATED:
                        if script['problem_text'] == problem_text and script['model'] == model['name'] and script['temperature'] == temp and script['top_p'] == top_p:
                            text = read_script(script['script_name'])
                            if text != None:
                                script_found = True
                                results.append({
                                    'model': model['name'],
                                    'script': text,
                                    'tokens_used': script['tokens_used'],
                                    'top-p': top_p,
                                    'time_taken_ms': script['time_taken_ms'],
                                    'temperature': temp,
                                    'loaded_from_cache': True
                                })
                                break
                    if script_found:
                        continue
                    
                    start_time = time.time()
                    result, tokens_used = generate_script(problem_text, model, temp, top_p, [])
                    time_elapsed = time.time() - start_time

                    results.append({
                        'model': model['name'],
                        'temperature': temp,
                        'top-p': top_p,
                        'script': result,
                        'time_taken_ms': time_elapsed * 1000,
                        'tokens_used': tokens_used,
                        'loaded_from_cache': False
                    })
        else:
            # Check if the script is already generated
            script_found = False
            for script in SCRIPTS_ALREADY_GENERATED:
                if script['problem_text'] == problem_text and script['model'] == model['name'] and script['temperature'] == 1 and script['top_p'] == 1:
                    text = read_script(script['script_name'])
                    if text != None:
                        script_found = True
                        results.append({
                            'model': model['name'],
                            'script': text,
                            'tokens_used': script['tokens_used'],
                            'temperature': 1,
                            'top-p': 1,
                            'time_taken_ms': script['time_taken_ms'],
                            'loaded_from_cache': True
                        })
                        break
            if script_found:
                continue

            # o1 doesn't support temperature and top-p besides 1
            start_time = time.time()
            result, tokens_used = generate_script(problem_text, model, 1, 1, [])
            time_elapsed = time.time() - start_time

            results.append({
                'model': model['name'],
                'temperature': 1,
                'top-p': 1,
                'script': result,
                'time_taken_ms': time_elapsed * 1000,
                'tokens_used': tokens_used,
                'loaded_from_cache': False
            })
    return results


# Testing implementation

In [21]:
class Problem(ABC):

    # Implementē katram uzdevumam
    name = None
    category = None
    level = None
    text = None
    PROBLEM_CHECKPOINT = None

    test_cases = []

    def __init__(self, test_cases):
        self.test_cases = test_cases

    # Generated scripts
    scripts = []

    # Rezultāti
    results = dict()
    '''
    { 
        script_name: {
            test_cases: {
                {test_case}: {
                    test_name: 'string',
                    test_level: 'string',
                    functional_executed: bool,
                    functional_test_success: bool,
                    static_clean: bool,
                    functional_error: 'string',
                    functional_issue: 'string',
                    output: 'string',
                }
            },
            static_issues: 'string',
            sh_style_cnt: int,
            sh_info_cnt: int,
            sh_warning_cnt: int,
            sh_error_cnt: int,
        },
        ...
    }
    '''

    # Ģenerē visus skriptus pirmajam mēģinājumam, saglabā tos objektā
    def generateScripts(self):
        scripts = generate_scripts_all(self.text)
        for script in scripts:
            script_name = f'{self.name}_{script["model"]}_{script["temperature"]}_{script["top-p"]}'
            script["name"] = script_name
            self.scripts.append(script)
            if not script['loaded_from_cache']:
                save_script(script_name, script['script'])
                append_to_script_json({
                    'problem_text': self.text,
                    'model': script['model'],
                    'temperature': script['temperature'],
                    'top_p': script['top-p'],
                    'script_name': script_name,
                    'time_taken_ms': script['time_taken_ms'],
                    'tokens_used': script['tokens_used']
                })

    # Otreizēji ģenerē konkrētas parametrizācijas skriptu padodot iepriekšējo skriptu un problēmu, atgriež skripta nosaukumu
    def regenerateScript(self, model, temperature, top_p, issue):
        original_name = f'{self.name}_{model}_{temperature}_{top_p}'
        original_content = read_script(original_name)

        model_params = None
        for m in MODELS:
            if m['name'] == model:
                model_params = m
                break

        start_time = time.time()
        new_script_content, tokens_used = generate_script(self.text, model_params, temperature, top_p, [{'ai_answer': f'```bash\n{original_content}\n```', 'issues': issue}])
        time_elapsed = time.time() - start_time
        
        new_name = f'{original_name}_2'
        save_script(new_name, new_script_content)
        return {
                'model': model,
                'name': new_name,
                'temperature': temperature,
                'top-p': top_p,
                'script': new_script_content,
                'time_taken_ms': time_elapsed * 1000,
                'tokens_used': tokens_used,
                'loaded_from_cache': False
        }

    def reset_checkpoint(self):
        debian_checkpoint_reset(START_CHECKPOINT)
        self.init_problem()

    @abstractmethod
    def init_problem(self):   
        '''
        Uzstāda uzdevuma pamata konfigurāciju
        '''
        pass

    def run_static(self, script_name):
        '''
        1. Pārbauda skriptu ar statiskiem ShellCheck testiem
        2. Saglabā rezultātus objektā
    
        Piemēr ShellCheck izvadei:
        [
            {
                "file": "usr/scripts/example.sh",
                "line": 1,
                "endLine": 1,
                "column": 12,
                "endColumn": 12,
                "level": "error",
                "code": 1017,
                "message": "Literal carriage return. Run script through tr -d '\\r' .",
                "fix": null
            },
            ...
        ]
        '''
        result = shellcheckRunOnScript(script_name)

        # Only unique issues are counted
        unique_static_issues_dict = {}
        for issue in result:
            unique_static_issues_dict[issue['code']] = issue

        self.results[script_name]['sh_style_cnt'] = len([issue for issue in unique_static_issues_dict.values() if issue['level'] == 'style'])
        self.results[script_name]['sh_info_cnt'] = len([issue for issue in unique_static_issues_dict.values() if issue['level'] == 'info'])
        self.results[script_name]['sh_warning_cnt'] = len([issue for issue in unique_static_issues_dict.values() if issue['level'] == 'warning'])
        self.results[script_name]['sh_error_cnt'] = len([issue for issue in unique_static_issues_dict.values() if issue['level'] == 'error'])

        if len([issue for issue in unique_static_issues_dict.values() if issue['level'] != 'info']) > 0:
            self.results[script_name]['static_clean'] = False
        else:
            self.results[script_name]['static_clean'] = True

        # Formatē static_issues teksstu no JSON rezultātiem modelim saprotamā veidā
        self.results[script_name]['static_issues'] = ''
        for issue in result:
            if issue['line'] == issue['endLine']:
                self.results[script_name]['static_issues'] += f"Line {issue['line']}: {issue['message']}\n"
            else:
                self.results[script_name]['static_issues'] += f"Lines {issue['line']} - {issue['endLine']}: {issue['message']}\n"

    # Palaiž testus padotajam skriptam, atgriež rezultātus
    def run_test_cases(self, script_name, skip_static=False):
        self.results[script_name] = {'test_cases': dict()}
        if not skip_static:
            self.run_static(script_name)

        for test_case in self.test_cases:
            test_case.run(script_name, self)

        return self.results[script_name]

class TestCase(ABC):
    
    # Šo jādefinē katrai mantojošajai klasei
    name = None
    level = None
    CHECKPOINT_NAME = None

    def run(self, script_name, problem, skip_checkpoint=False):
        if not skip_checkpoint:
            problem.reset_checkpoint()
        self.setup_functional(problem)
        self.run_functional_internal(problem, script_name)

        # If the issue was due to a utility not being installed, install it and rerun the test
        if not problem.results[script_name]['test_cases'][self.name]['functional_test_success'] and 'command not found' in problem.results[script_name]['test_cases'][self.name]['functional_error']:
            missing_utility = re.search(r"([A-Za-z0-9\-\_]+): command not found", problem.results[script_name]['test_cases'][self.name]['functional_error']).group(1)
            apt_file_output = debian_exec(f'apt-file search --regexp "/{missing_utility}$').stdout # Only search for executables with the exact name
            package_found = False
            
            # Since often the utility has the same name as the package apt-file search can return multiple pacakges, prioritize the one that has the utility name in the beginning
            if f'{missing_utility}:' in apt_file_output:
                package = missing_utility
                package_found = True
            else:
                regex_res = re.search(r"^([A-Za-z0-9\-\_]+):", apt_file_output, re.MULTILINE)
                if regex_res:
                    package = regex_res.group(1)
                    package_found = True
            if not package_found:
                problem.results[script_name][self.name]['functional_issue'] = f'The script attempted to use a utility {missing_utility} that is not available in the Debian repositories (via apt-get install)'
                return

            debian_checkpoint_reset(START_CHECKPOINT)
            print(f'Installing {package}...')
            debian_exec(f'DEBIAN_FRONTEND="noninteractive" apt-get install -y {package}')
            debian_checkpoint(START_CHECKPOINT) # Save the checkpoint in case other tests also require the utility
            self.run(script_name, problem, skip_checkpoint=True) # rerun the test

    @abstractmethod
    def setup_functional(self, problem):
        '''
        Uzstāda testa konfigurāciju
        '''

    @abstractmethod
    def run_functional_internal(self, problem, script_name):
        '''
        1. Darbina skriptu ar nosaukumu script_name
        2. Pārbauda rezultātus un saglabā tos objektā (jāuzstāda functional_executed, functional_test_success, functional_error, functional_issue)
        '''
        pass
    

def get_check_command(condition):
    return f'if [ {condition} ]; then echo "YES"; else echo "NO"; fi'

# Experiment problems

In [22]:
problems = []

def test_exec(test, problem, script_name, find_issue_func, exec_dir):
        result_dict = dict()

        result_dict['test_name'] = test.name
        result_dict['test_level'] = test.level
        
        results = debian_exec_script(exec_dir, script_name)
        
        # Check results
        issue = find_issue_func(test, problem, results)
        if issue:
            result_dict['functional_error'] = results.stderr
            result_dict['functional_issue'] = issue
            result_dict['output'] = results.stdout
            result_dict['functional_test_success'] = False
        else:
            result_dict['functional_test_success'] = True

        problem.results[script_name]['test_cases'][test.name] = result_dict

'''
Kategorija: Ikdienas uzdevumu automatizācija 
Sarežģītības līmenis: 1
Uzdevuma teksts: Delete all the image files in the current directory and it’s subdirectories
Testa gadījumi 
    1. (1. līmenis): Attēli ar populārāriem paplašinājumiem tajā pašā direktorijā
    2. (1. līmenis): Attēli ar populārāriem paplašinājumiem 1 apakšdirektorijā
    3. (2. līmenis): Attēli ar populārāriem paplašinājumiem sākotnējā direktorijā un vairāku līmeņu apakšdirektorijās katrā pa attēlam un vienai citai datnei
    4. (3. līmenis): Attēli ar nepopulārāriem paplašinājumiem sākotnējā direktorijā un vairāku līmeņu apakšdirektorijās katrā pa attēlam un vienai citai datnei
    5. (3. līmenis): 1 attēls ar paplašinājumu “JPG” (Debian noklusējuma ext4 failsistēmā paplašinājumi ir reģistrjūtīgi) un viena direktorija ar attēla paplašinājumu nosaukumā
'''

class DeleteImages(Problem):
    name = 'DeleteImages'
    category = 'Ikdienas uzdevumu automatizācija'
    level = 1
    text = 'Delete all the image files in the current directory and it’s subdirectories'

    image_dir = '/usr/images_to_delete'

    def init_problem(self):
        debian_exec(f'mkdir -p {self.image_dir}')

class TestCaseDeleteImages(TestCase):
    name = 'Attēli ar populārāriem paplašinājumiem tajā pašā direktorijā'
    level = 1

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.image_dir}',
                    'touch image1.jpg',
                    'touch image2.png',
                    'touch image3.jpeg'
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.image_dir}',
                                get_check_command('-e image1.jpg'),
                                get_check_command('-e image2.png'), 
                                get_check_command('-e image3.jpeg')
                                )
                if 'YES' in out.stdout.strip():
                    return 'One or more images were not deleted.'
            except:
                    return 'The directory was corrupted. Checking whether the images were deleted is not possible.'
        test_exec(self, problem, script_name, find_issue, problem.image_dir)

class TestCaseDeleteImages2(TestCase):
    name = 'Attēli ar populārāriem paplašinājumiem 1 apakšdirektorijā'
    level = 1

    def setup_functional(self, problem):
        debian_exec(f'mkdir -p {problem.image_dir}/subdir',
                    f'cd {problem.image_dir}/subdir',
                    'touch image1.jpg',
                    'touch image2.png',
                    'touch image3.jpeg'
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.image_dir}/subdir',
                                get_check_command('-e image1.jpg'),
                                get_check_command('-e image2.png'), 
                                get_check_command('-e image3.jpeg')
                                )
                if 'YES' in out.stdout.strip():
                    return 'One or more images were not deleted.'
            except:
                return 'The directory was corrupted. Checking whether the images were deleted is not possible.'
        test_exec(self, problem, script_name, find_issue, problem.image_dir)
    
class TestCaseDeleteImages3(TestCase):
    name = 'Attēli ar populārāriem paplašinājumiem sākotnējā direktorijā un vairāku līmeņu apakšdirektorijās katrā pa attēlam un vienai citai datnei'
    level = 2

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.image_dir}',
                    'touch image1.jpg',
                    'touch text1.txt',
                    'mkdir -p subdir1',
                    'cd subdir1',
                    'touch image2.png',
                    'touch text2.txt',
                    'cd ..',
                    'mkdir -p subdir2',
                    'cd subdir2',
                    'touch image3.jpeg',
                    'touch text3.txt',
                    'mkdir -p subdir3',
                    'cd subdir3',
                    'touch image4.jpg',
                    'touch text4.json' 
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.image_dir}',
                                  get_check_command('-e image1.jpg')
                                )
                if out.stdout.strip() != 'NO':
                    return "An image was not deleted"
                
                out = debian_exec(f'cd {problem.image_dir}',
                                    get_check_command('-e text1.txt')
                                )
                if out.stdout.strip() != 'YES':
                    return "A non-image file was deleted"

                subdirectories_not_deleted = False
                subdirectories_too_much_deleted = False

                out = debian_exec(f'cd {problem.image_dir}/subdir1',
                                    get_check_command('-e image2.png')
                                )

                if out.stdout.strip() != 'NO':
                    subdirectories_not_deleted = True
                
                out = debian_exec(f'cd {problem.image_dir}/subdir1',
                                    get_check_command('-e text2.txt')
                                )
                if out.stdout.strip() != 'YES':
                    subdirectories_too_much_deleted = True
                
                out = debian_exec(f'cd {problem.image_dir}/subdir2',
                                    get_check_command('-e image3.jpeg')
                                )
                if out.stdout.strip() != 'NO':
                    subdirectories_not_deleted = True
                
                out = debian_exec(f'cd {problem.image_dir}/subdir2',
                                    get_check_command('-e text3.txt')
                                )
                if out.stdout.strip() != 'YES':
                    subdirectories_too_much_deleted = True
                
                out = debian_exec(f'cd {problem.image_dir}/subdir2/subdir3',
                                    get_check_command('-e image4.jpg')
                                )
                if out.stdout.strip() != 'NO':
                    subdirectories_not_deleted = True
                
                out = debian_exec(f'cd {problem.image_dir}/subdir2/subdir3',
                                    get_check_command('-e text4.json')
                                )
                if out.stdout.strip() != 'YES':
                    subdirectories_too_much_deleted = True

                if subdirectories_not_deleted:
                    return "An image was not deleted in a subdirectory"
                
                if subdirectories_too_much_deleted:
                    return "A non-image file was deleted in a subdirectory"
            except:
                return 'The directory was corrupted. Checking whether the images were deleted is not possible.'

        test_exec(self, problem, script_name, find_issue, problem.image_dir)

class TestCaseDeleteImages4(TestCase):
    name = 'Attēli ar nepopulārāriem paplašinājumiem sākotnējā direktorijā un vairāku līmeņu apakšdirektorijās katrā pa attēlam un vienai citai datnei'
    level = 3

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.image_dir}',
                    'touch image1.apng',
                    'touch text1.txt',
                    'mkdir -p subdir1',
                    'cd subdir1',
                    'touch image2.avif',
                    'touch text2.txt',
                    'cd ..',
                    'mkdir -p subdir2',
                    'cd subdir2',
                    'touch image3.jfif',
                    'touch text3.txt',
                    'mkdir -p subdir3',
                    'cd subdir3',
                    'touch image4.webp',
                    'touch text4.json' 
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.image_dir}',
                                  get_check_command('-e image1.apng')
                                )
                if out.stdout.strip() != 'NO':
                    return "An image with extensions .apng was not deleted"
                
                out = debian_exec(f'cd {problem.image_dir}',
                                    get_check_command('-e text1.txt')
                                )
                if out.stdout.strip() != 'YES':
                    return "A non-image file was deleted"

                subdirectories_too_much_deleted = False

                out = debian_exec(f'cd {problem.image_dir}/subdir1',
                                    get_check_command('-e image2.avif')
                                )

                if out.stdout.strip() != 'NO':
                    return "An image with extensions .avif was not deleted"
                
                out = debian_exec(f'cd {problem.image_dir}/subdir1',
                                    get_check_command('-e text2.txt')
                                )
                if out.stdout.strip() != 'YES':
                    subdirectories_too_much_deleted = True
                
                out = debian_exec(f'cd {problem.image_dir}/subdir2',
                                    get_check_command('-e image3.jfif')
                                )
                if out.stdout.strip() != 'NO':
                    return "An image with extensions .jfif was not deleted"
                
                out = debian_exec(f'cd {problem.image_dir}/subdir2',
                                    get_check_command('-e text3.txt')
                                )
                if out.stdout.strip() != 'YES':
                    subdirectories_too_much_deleted = True
                
                out = debian_exec(f'cd {problem.image_dir}/subdir2/subdir3',
                                    get_check_command('-e image4.webp')
                                )
                if out.stdout.strip() != 'NO':
                    return "An image with extensions .webp was not deleted"
                
                out = debian_exec(f'cd {problem.image_dir}/subdir2/subdir3',
                                    get_check_command('-e text4.json')
                                )
                if out.stdout.strip() != 'YES':
                    subdirectories_too_much_deleted = True
                
                if subdirectories_too_much_deleted:
                    return "A non-image file was deleted in a subdirectory"
            except:
                return 'The directory was corrupted. Checking whether the images were deleted is not possible.'

        test_exec(self, problem, script_name, find_issue, problem.image_dir)

class TestCaseDeleteImages5(TestCase):
    name = '1 attēls ar paplašinājumu “JPG” un 1 direktorija ar attēla paplašinājumu nosaukumā'
    level = 3

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.image_dir}',
                    'touch image1.JPG',
                    'mkdir -p data.jpg'
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.image_dir}',
                                  get_check_command('-e image1.JPG')
                                )
                if out.stdout.strip() != 'YES':
                    return "An file with extension .JPG was deleted. The system uses default case-sensitive ext4 filesystem."
                
                out = debian_exec(f'cd {problem.image_dir}',
                                    get_check_command('-d data.jpg')
                                )
                if out.stdout.strip() != 'YES':
                    return 'A directory with the name "data.jpg" was deleted.'  
            except:
                return 'The directory was corrupted. Checking whether the images were deleted is not possible.'

        test_exec(self, problem, script_name, find_issue, problem.image_dir)

DeleteImagesInstance = DeleteImages([
                                    TestCaseDeleteImages(),
                                    TestCaseDeleteImages2(), 
                                    TestCaseDeleteImages3(), 
                                    TestCaseDeleteImages4(),
                                    TestCaseDeleteImages5()
                                ])
problems.append(DeleteImagesInstance)

'''
Kategorija: Ikdienas uzdevumu automatizācija 
Sarežģītības līmenis: 3
Uzdevuma teksts: In the current directory, find json data in files with “user” keys and print out their corresponding values alphabetically (they will be strings) in the format “Here are the users: {user_1}, {user_2} … {user_n)”. If there are no users print “No users found”.
Testa gadījumi 
    1. (1. līmenis): Viena datne ar vienu korektu json, kur 3 lietotāji
    2. (1. līmenis): Viena datne ar vienu json, kur nav neviena lietotāja
    3. (2. līmenis): Divas json datnes ar lietotājiem abās un vēl viena papildus datne ar lietotājiem apakšdirektorijā (kuru nevajadzētu ņemt vērā)
    4. (2. līmenis): Viena datne, kur ir ievadīts “users”: “Tommy Black”, bet nav json, otra ar json, kur ir citi lietotāji                 
    5. (2. līmenis): Viena datne, kur ir vairāki json ar lietotājiem abos 
    6. (3. līmenis): Viena datne are 1 korektu json ar lietotājiem un otru nekorekta formāta, bet līdzīgu json (arī ar lietotājiem)
'''

class FindInJSON(Problem):
    name = 'FindInJSON'
    category = 'Ikdienas uzdevumu automatizācija'
    level = 3
    text = 'In the current directory, find json data in files with “user” keys and print out their corresponding values (they will be strings) in the format “Here are the users: {user_1}, {user_2} … {user_n)”. If there are no users print “No users found”.'

    json_dir = '/usr/json_files'

    def init_problem(self):
        debian_exec(f'mkdir -p {self.json_dir}')

class TestCaseFindInJSON1(TestCase):
    name = 'Viena datne ar vienu korektu json, kur 3 lietotāji'
    level = 1

    json_data = """
        {
            "username": "Jack32",
            "user": "Jack Larson",
            "array": [1, 2, 3]
        }
    """

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.json_dir}',
                    'touch data.json',
                    f"echo -e '{self.json_data.replace('\t', '\\t').replace('\n', '\\n')}' > data.json", 
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            if 'Here are the users: Jack Larson' not in results.stdout:
                if 'Jack' in results.stdout and 'Larson' in results.stdout:
                    return 'String with spaces outputted incorrectly.'
                return 'A user was not found!'
        test_exec(self, problem, script_name, find_issue, problem.json_dir)

class TestCaseFindInJSON2(TestCase):
    name = 'Viena datne ar vienu json, kur nav neviena lietotāja'
    level = 1

    json_data = """
        {
            "username": "Jack32",
            "array": [1, 2, 3]
        }
    """

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.json_dir}',
                    'touch data.json',
                    f"echo -e '{self.json_data.replace('\t', '\\t').replace('\n', '\\n')}' > data.json", 
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            if 'No users found' not in results.stdout:
                return 'A user was found when there were none to be found!'
        test_exec(self, problem, script_name, find_issue, problem.json_dir)

class TestCaseFindInJSON3(TestCase):
    name = 'Divas json datnes ar lietotājiem abās un vēl viena papildus datne ar lietotājiem apakšdirektorijā'
    level = 2

    json_data1 = """
        {
            "username": "JohnDoe",
            "user": "John Doe",
            "array": [1, 2, 3]
        }
    """

    json_data2 = """
        {
            "username": "Jack32",
            "user": "Jack Larson",
            "array": [1, 2, 3]
        }
    """

    json_data3 = """
        {
            "data": {
                "user": "Anne Page"
            }
        }
    """

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.json_dir}',
                    'touch data1.json',
                    f"echo -e '{self.json_data1.replace('\t', '\\t').replace('\n', '\\n')}' > data1.json", 
                    'touch data2.json',
                    f"echo -e '{self.json_data2.replace('\t', '\\t').replace('\n', '\\n')}' > data2.json", 
                    'mkdir -p subdir',
                    f'cd subdir',
                    'touch data3.json',
                    f"echo -e '{self.json_data3.replace('\t', '\\t').replace('\n', '\\n')}' > data3.json", 
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            if 'Anne Page' in results.stdout:
                return 'The script should only search in the current directory, not subdirectories.'

            if 'Here are the users: Jack Larson, John Doe' not in results.stdout:
                if 'Jack Larson' in results.stdout and 'John Doe' in results.stdout:
                    return 'Users were found but the output should be alphabetically sorted.'
                return 'A user was not found!'
        test_exec(self, problem, script_name, find_issue, problem.json_dir)

class TestCaseFindInJSON4(TestCase):
    name = 'Viena datne, kur ir ievadīts “users”: “Tommy Black”, bet nav json, otra ar json, kur ir citi lietotāji'
    level = 2

    json_data = """
        {
            "username": "Jack32",
            "user": "Jack Larson",
            "array": [1, 2, 3],
            "someData": {
                "user": "Henry Watson",
                "moreData": {
                    "user": "Lyle Johnson"
                }
            }
        }
    """

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.json_dir}',
                    'touch data1.txt',
                    'echo \'"users\\": "Tommy Black"\' > data1.txt', 
                    'touch data2.json',
                    f"echo -e '{self.json_data.replace('\t', '\\t').replace('\n', '\\n')}' > data2.json", 
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            if 'Tommy Black' in results.stdout:
                return 'The script should only search in valid format JSON.'
            if 'Here are the users: Henry Watson, Jack Larson, Lyle Johnson' not in results.stdout:
                if 'Jack Larson' in results.stdout and 'Henry Watson' in results.stdout and 'Lyle Johnson' in results.stdout:
                    return 'Users were found but the output should be alphabetically sorted.'
                return 'A user was not found!'

        test_exec(self, problem, script_name, find_issue, problem.json_dir)

class TestCaseFindInJSON5(TestCase):
    name = 'Viena datne, kur ir vairāki json ar lietotājiem abos'
    level = 2

    js_data = """
        let x =  "{
            "username": "JohnDoe",
            "user": "John Doe",
            "array": [1, 2, 3]
        }"

        function y() {
            return 3;
        }

        z("{
            "username": "Jack32",
            "user": "Jack Larson",
            "array": [1, 2, 3]
        }")
    """

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.json_dir}',
                    'touch program.js',
                    f"echo -e '{self.js_data.replace('\t', '\\t').replace('\n', '\\n')}' > program.js"
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            if 'Here are the users: Jack Larson, John Doe' not in results.stdout:
                if 'Jack Larson' in results.stdout and 'John Doe' in results.stdout:
                    return 'Users were found but the output should be alphabetically sorted.'
                return 'A user was not found, there can be multiple json blocks in a file!'
        test_exec(self, problem, script_name, find_issue, problem.json_dir)

class TestCaseFindInJSON6(TestCase):
    name = 'Viena datne are 1 korektu json ar lietotājiem un otru nekorekta formāta, bet līdzīgu json (arī ar lietotājiem)'
    level = 3

    json_data1 = """
        {
            "username": "Jack32",
            "user": "Jack Larson",
            "array": [1, 2, 3],
            "someData": {
                "user": "Henry Watson",
                "moreData": {
                    "user": "Lyle Johnson"
                }
            }
        }
    """

    json_data2 = """
        {
            "someData": {
                "user": "Tommy Black",,
            }
        }
    """

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.json_dir}',
                    'touch data1.json',
                    f"echo -e '{self.json_data1.replace('\t', '\\t').replace('\n', '\\n')}' > data1.json", 
                    'touch data2.json',
                    f"echo -e '{self.json_data2.replace('\t', '\\t').replace('\n', '\\n')}' > data2.json", 
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            if 'Tommy Black' in results.stdout:
                return 'The script should only search in valid format JSON.'
            if 'Here are the users: Henry Watson, Jack Larson, Lyle Johnson' not in results.stdout:
                if 'Jack Larson' in results.stdout and 'Henry Watson' in results.stdout and 'Lyle Johnson' in results.stdout:
                    return 'Users were found but the output should be alphabetically sorted.'
                return 'A user was not found!'
        test_exec(self, problem, script_name, find_issue, problem.json_dir)

FindInJSONInstance = FindInJSON([
                                TestCaseFindInJSON1(),
                                TestCaseFindInJSON2(),
                                TestCaseFindInJSON3(),
                                TestCaseFindInJSON4(),
                                TestCaseFindInJSON5(),
                                TestCaseFindInJSON6()
                            ])
problems.append(FindInJSONInstance)

'''
Kategorija: DevOps CI/CD kategorija 
Sarežģītības līmenis: 2
Uzdevuma teksts: Pull changes from the “develop” branch of remote origin GIT repositry, 
to the same name branch in the current directory’s repository. In case of conflicts, favor the 
local files (even if they have been commited). 
Testa gadījumi 
    1. (1. līmenis): Repozitorijos izmaiņas nerada konfliktus 
    2. (1. līmenis): Lokālajā repozitorijā ir commit, kas rada konfliktus 
    3. (2. līmenis): Repozitorijos izmaiņas nerada konfliktus, bet lokālā repozitorija 
    atrodas zarā “main”  nevis “develop” 
    4. (2. līmenis): Lokālajā repozitorijā “develop” zarā ir commit, kas rada                 
    konfliktus, bet lokālā repozitorija atrodas zarā “main”   
    5. (3. līmenis): Lokālajā repozitorijā “develop” zarā ir nesaglabātas izmaiņas (kurām 
    būtu jāveic stash un tās jāatjauno).  
    6. (3. līmenis): Lokālajā repozitorijā “develop” zarā ir commit, kas rada                 
    konfliktus 1 datnei, bet lokālā repozitorija atrodas zarā “main”, kurā ir 
    nesaglabātas izmaiņas (kurām nav jāparādās “develop”, bet tās arī nedrīkst 
    pazaudēt).
'''

class PullChangesFromGit(Problem):
    name = 'PullChangesFromGit'
    category = 'DevOps CI/CD'
    level = 2
    text = 'Pull changes from the “develop” branch of remote origin GIT repositry, to the same name branch in the current directory’s repository. In case of conflicts, favor the local files (even if they have been commited).'

    server_dir = '/usr/git_server.git'
    local_dir = '/usr/git_local'
    pusher_dir = '/usr/git_pusher'
    server_url = f'file://{server_dir}'

    def init_problem(self):
        debian_exec(f'mkdir -p {self.server_dir}')
        debian_exec('git config --global user.email "experiment@example.com"')
        debian_exec('git config --global user.name "experiment"')
        debian_exec(f'cd {self.server_dir}',
                        'git init --bare -b develop',
                        f'mkdir -p {self.pusher_dir}',
                        f'cd {self.pusher_dir}',
                        f'git init -b develop',
                        f'git remote add origin {self.server_url}',
                        'touch text.txt',
                        'echo "Hello, Jake!" > text.txt',
                        'git add .',
                        'git commit -m "First commit"',
                        'git push origin develop'
                    )
        debian_exec(f'mkdir -p {self.local_dir}',
                    f'cd {self.local_dir}',
                    f'git clone {self.server_url} .'
                    )

class TestCasePullChangesFromGit1(TestCase):
    name = 'Nav konfliktu'
    level = 1

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.pusher_dir}',
                'echo "Hello, Tom!" > text.txt',
                'git add .',
                'git commit -m "Second commit"',
                'git push origin develop'
            )

    def run_functional_internal(self, problem, script_name):        
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.local_dir}',
                            get_check_command('-e text.txt')
                            )
                if out.stdout.strip() == 'NO':
                    return 'Incorrect changes have been pulled. A file is missing.'
        
                out = debian_exec(f'cd {problem.local_dir}',
                                'cat text.txt')
                
                if out.stdout.strip() == 'Hello, Jake!':
                    return 'Changes weren\'t pulled.'
                
                if out.stdout.strip() != 'Hello, Tom!':
                    return 'Incorrect changes have been pulled. File content is incorrect.'

            except:
                return 'The directory gets corrupted after running the script. Attempting to check results gives an error.'
        
        test_exec(self, problem, script_name, find_issue, problem.local_dir)

class TestCasePullChangesFromGit2(TestCase):
    name = 'Lokālajā repozitorijā ir commit, kas rada konfliktus'
    level = 1

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.pusher_dir}',
                        'echo "Hello, Tom!" > text.txt',
                        'touch text2.txt',
                        'git add .',
                        'git commit -m "Second commit"',
                        'git push origin develop'
                    )
        debian_exec(f'cd {problem.local_dir}',
                        'echo "Hello, Bob!" > text.txt',
                        'git add .',
                        'git commit -m "Local commit"'
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.local_dir}',
                            get_check_command('-e text.txt')
                            )
                if out.stdout.strip() == 'NO':
                    return 'Incorrect changes have been pulled. A file is missing.'
        
                out = debian_exec(f'cd {problem.local_dir}',
                            get_check_command('-e text2.txt')
                            )
                if out.stdout.strip() == 'NO':
                    return 'Changes have not been pulled. A file is missing.'

                out = debian_exec(f'cd {problem.local_dir}',
                                   'cat text.txt')
                
                if out.stdout.strip() == 'Hello, Tom!':
                    return 'The script seems to actually prioritize changes from origin. Local changes should be favored.'
            
                if out.stdout.strip() != 'Hello, Bob!':
                    return 'File content seems to be incorrect after running the script. It is equal to neither local, nor remote changes.'
            except:
                return 'The directory gets corrupted after running the script. Attempting to check results gives an error.'
            
        test_exec(self, problem, script_name, find_issue, problem.local_dir)
class TestCasePullChangesFromGit3(TestCase):
    name = 'Repozitorijos izmaiņas nerada konfliktus, bet lokālā repozitorija atrodas zarā “main” nevis “develop”'
    level = 2

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.pusher_dir}',
                        'echo "Hello, Tom!" > text.txt',
                        'git add .',
                        'git commit -m "Second commit"',
                        'git push origin develop'
                    )
        debian_exec(f'cd {problem.local_dir}',
                        'git checkout -b main'
                    )
        
    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.local_dir}',
                            get_check_command('-e text.txt')
                            )
                if out.stdout.strip() == 'NO':
                    return 'Incorrect changes have been pulled. A file is missing.'
        
                out = debian_exec(f'cd {problem.local_dir}',
                                'cat text.txt')
                
                if out.stdout.strip() == 'Hello, Jake!':
                    return 'Changes weren\'t pulled.'
                
                if out.stdout.strip() != 'Hello, Tom!':
                    return 'Incorrect changes have been pulled. File content is incorrect.'

            except:
                return 'The directory gets corrupted after running the script. Attempting to check results gives an error.'
        
        test_exec(self, problem, script_name, find_issue, problem.local_dir)

class TestCasePullChangesFromGit4(TestCase):
    name = 'Lokālajā repozitorijā “develop” zarā ir commit, kas rada konfliktus, bet lokālā repozitorija atrodas zarā “main”'
    level = 2

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.pusher_dir}',
                        'echo "Hello, Tom!" > text.txt',
                        'touch text2.txt',
                        'git add .',
                        'git commit -m "Second commit"',
                        'git push origin develop'
                    )
        debian_exec(f'cd {problem.local_dir}',
                'echo "Hello, Bob!" > text.txt',
                'git add .',
                'git commit -m "Local commit"'
            )
        debian_exec(f'cd {problem.local_dir}',
                'git checkout -b main'
            )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.local_dir}',
                                   'git rev-parse --abbrev-ref HEAD'
                                 )
                if out.stdout.strip() != 'develop':
                    return 'The repository is not in develop branch after running the script.'

                out = debian_exec(f'cd {problem.local_dir}',
                            get_check_command('-e text.txt')
                            )
                if out.stdout.strip() == 'NO':
                    return 'Incorrect changes have been pulled. A file is missing.'

                out = debian_exec(f'cd {problem.local_dir}',
                                    get_check_command('-e text2.txt')
                                    )
                if out.stdout.strip() == 'NO':
                    return 'Changes have not been pulled. A file is missing.'

                out = debian_exec(f'cd {problem.local_dir}',
                                   'cat text.txt')
                
                if out.stdout.strip() == 'Hello, Tom!':
                    return 'The script seems to actually prioritize changes from origin. Local changes should be favored.'
            
                if out.stdout.strip() != 'Hello, Bob!':
                    return 'File content seems to be incorrect after running the script. It is equal to neither local, nor remote changes.'
            except:
                return 'The directory gets corrupted after running the script. Attempting to check results gives an error.'

        test_exec(self, problem, script_name, find_issue, problem.local_dir)
class TestCasePullChangesFromGit5(TestCase):
    name = 'Lokālajā repozitorijā “develop” zarā ir nesaglabātas izmaiņas (kurām būtu jāveic stash un tās jāatjauno)'
    level = 3

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.pusher_dir}',
                        'echo "Hello, Tom!" > text.txt',
                        'touch text2.txt',
                        'git add .',
                        'git commit -m "Second commit"',
                        'git push origin develop'
                    )
        debian_exec(f'cd {problem.local_dir}',
                    'echo "Hello, Bob!" > text.txt'
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.local_dir}',
                            get_check_command('-e text.txt')
                            )
                if out.stdout.strip() == 'NO':
                    return 'Incorrect changes have been pulled. A local file is missing.'
        
                out = debian_exec(f'cd {problem.local_dir}',
                    get_check_command('-e text2.txt')
                    )
                if out.stdout.strip() == 'NO':
                    return 'Changes have not been pulled. A file is missing.'
                
                out = debian_exec(f'cd {problem.local_dir}',
                            'cat text.txt'
                            )

                if out.stdout.strip() == 'Hello, Tom!':
                    return 'Changes were pulled but in the scenario where there were local uncommited changes to the files, they should also have been kept.'

                if out.stdout.strip() != 'Hello, Bob!':
                    return 'After running the script, the file content is incorrect. Neither local changes have been preserved, not remote changes have been pulled.'

                out = debian_exec(f'cd {problem.local_dir}',
                                   'git log -1 --pretty=%s'
                                 )
                
                if out.stdout.strip() != 'Second commit':
                    return 'Changes weren\'t pulled.'
                
            except:
                return 'The directory gets corrupted after running the script. Attempting to check results gives an error.'
        
        test_exec(self, problem, script_name, find_issue, problem.local_dir)
    
class TestCasePullChangesFromGit6(TestCase):
    name = 'Lokālajā repozitorijā “develop” zarā ir commit, kas rada konfliktus 1 datnei, bet lokālā repozitorija atrodas zarā “main”, kurā ir nesaglabātas izmaiņas (kurām nav jāparādās “develop”, bet tās arī nedrīkst pazaudēt)'
    level = 3

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.pusher_dir}',
                        'echo "Hello, Tom!" > text.txt',
                        'touch text2.txt',
                        'git add .',
                        'git commit -m "Second commit"',
                        'git push origin develop'
                    )
        debian_exec(f'cd {problem.local_dir}',
                        'echo "Hello, Bob!" > text.txt',
                        'git add .',
                        'git commit -m "Local commit"',
                        'git checkout -b main',
                        'echo "Hello, Alice!" > text.txt'
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.local_dir}',
                            get_check_command('-e text.txt')
                )
                if out.stdout.strip() == 'NO':
                    return 'Incorrect changes have been pulled. A file is missing.'

                out = debian_exec(f'cd {problem.local_dir}',
                    get_check_command('-e text2.txt')
                    )
                if out.stdout.strip() == 'NO':
                    return 'Changes have not been pulled. A file is missing.'

                out = debian_exec(f'cd {problem.local_dir}',
                                    'cat text.txt'
                )

                if out.stdout.strip() == 'Hello, Alice!':
                    return 'Uncommited changes shouldn\'t have been kept in the scenario where the repository starts on a different branch than develop but stashed instead.' 

                if out.stdout.strip() == 'Hello, Tom!':
                    return 'The script seems to actually prioritize changes from origin. Local changes should be favored.'

                if out.stdout.strip() != 'Hello, Bob!':
                    return 'Incorrect changes have been pulled. File content is incorrect.'

                out = debian_exec(f'cd {problem.local_dir}',
                                   'git rev-parse --abbrev-ref HEAD'
                                 )
                if out.stdout.strip() != 'develop':
                    return 'The repository is not in develop branch after running the script.'
                
                out = debian_exec(f'cd {problem.local_dir}',
                                  'git checkout stash -- .',
                                  'cat text.txt')
                
                if out.stdout.strip() != 'Hello, Alice!':
                    return 'In the scenario where the repository starts on a different branch, any uncommited local changes should have been stashed!'
                
            except:
                return 'The directory gets corrupted after running the script. Attempting to check results gives an error.'
        
        test_exec(self, problem, script_name, find_issue, problem.local_dir)

PullChangesFromGitInstance = PullChangesFromGit(
            [
                TestCasePullChangesFromGit1(),
                TestCasePullChangesFromGit2(),
                TestCasePullChangesFromGit3(),
                TestCasePullChangesFromGit4(),
                TestCasePullChangesFromGit5(),
                TestCasePullChangesFromGit6()
            ]
)
problems.append(PullChangesFromGitInstance)


'''
Kategorija: DevOps CI/CD kategorija 
Sarežģītības līmenis: 2 
Uzdevuma teksts: Delete all files from the current directory and it’s subdirectories which would be ignored according to the .gitignore file in the directory
Testa gadījumi 
    1. (1. līmenis): Vairākas ignorētas un neignorētas datnes, bet nav apakšdirektoriju
    2. (2. līmenis): Vairākas ignorētas un neignorētas datnes tai skaitā apakšdirektorijās
    3. (3. līmenis): Vairākas ignorētas un neignorētas datnes tai skaitā apakšdirektorijās, bet nav git repozitorija (jo uzdevumā nav teikts, ka izmanto git repozitoriju)
'''

class DeleteGitIgnoredFiles(Problem):
    name = 'DeleteGitIgnoredFiles'
    category = 'DevOps CI/CD'
    level = 2
    text = 'Delete all files from the current directory and it’s subdirectories which would be ignored according to the .gitignore file in the directory.'

    ignore_dir = '/usr/ignore_files'

    def init_problem(self):
        debian_exec(f'mkdir -p {self.ignore_dir}',
                    f'cd {self.ignore_dir}',
                    'git init',
                    'touch .gitignore',
                    )

class TestCaseDeleteGitIgnoredFiles1(TestCase):
    name = 'Vairākas ignorētas un neignorētas datnes, bet nav apakšdirektoriju'
    level = 1

    ignore_file = '# ignore ALL .log files\n*.log\n'


    def setup_functional(self, problem):
        debian_exec(f'cd {problem.ignore_dir}',
                    f'echo -e "{self.ignore_file}" > .gitignore',
                    'touch program.js',
                    'touch data.log',
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.ignore_dir}',
                            get_check_command('-e program.js')
                            )
                if out.stdout.strip() == 'NO':
                    return 'A file was deleted that should not have been.'
                
                out = debian_exec(f'cd {problem.ignore_dir}',
                            get_check_command('-e data.log')
                            )
                if out.stdout.strip() == 'YES':
                    return 'A file was not deleted that should have been.'
                
                out = debian_exec(f'cd {problem.ignore_dir}',
                    get_check_command('-e .gitignore')
                )
                if out.stdout.strip() == 'NO':
                    return 'The .gitignore file was deleted.'
            except:
                return 'The directory was corrupted. Checking whether the files were deleted is not possible.'

        test_exec(self, problem, script_name, find_issue, problem.ignore_dir)
    
class TestCaseDeleteGitIgnoredFiles2(TestCase):
    name = 'Vairākas ignorētas un neignorētas datnes tai skaitā apakšdirektorijās'
    level = 2

    ignore_file = '# ignore ALL .log files\n*.log\n\n# ignore ALL files in ANY directory named temp\ntemp/*\n!temp/.gitkeep'


    def setup_functional(self, problem):
        debian_exec(f'cd {problem.ignore_dir}',
                    f'echo -e "{self.ignore_file}" > .gitignore',
                    'touch program.js',
                    'touch data.log',
                    'mkdir -p subdir',
                    'cd subdir',
                    'touch program2.js',
                    'touch data2.log',
                    'cd ..',
                    'mkdir -p temp',
                    'cd temp',
                    'touch program3.js',
                    'touch .gitkeep',
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.ignore_dir}',
                            get_check_command('-e program.js')
                            )
                if out.stdout.strip() == 'NO':
                    return 'A file was deleted that should not have been.'
                
                out = debian_exec(f'cd {problem.ignore_dir}',
                            get_check_command('-e data.log')
                            )
                if out.stdout.strip() == 'YES':
                    return 'A file was not deleted that should have been.'

                out = debian_exec(f'cd {problem.ignore_dir}/subdir',
                            get_check_command('-e program2.js')
                            )
                if out.stdout.strip() == 'NO':
                    return 'A file in a subdirectory was deleted that should not have been.'
                
                out = debian_exec(f'cd {problem.ignore_dir}/subdir',
                            get_check_command('-e data2.log')
                            )
                if out.stdout.strip() == 'YES':
                    return 'A file in a subdirectory was not deleted that should have been.'
                
                out = debian_exec(f'cd {problem.ignore_dir}/temp',
                                  get_check_command('-e program3.js')
                            )
                if out.stdout.strip() == 'YES':
                    return 'A file in a directory was not deleted that should have been.'

                out = debian_exec(f'cd {problem.ignore_dir}',
                                  get_check_command('-e .gitignore')
                                )
                if out.stdout.strip() == 'NO':
                    return 'The .gitignore file was deleted.'
            except:
                return 'The directory was corrupted. Checking whether the files were deleted is not possible.'

        test_exec(self, problem, script_name, find_issue, problem.ignore_dir)

class TestCaseDeleteGitIgnoredFiles3(TestCase):
    name = 'Vairākas ignorētas un neignorētas datnes tai skaitā apakšdirektorijās, bet nav git repozitorija'
    level = 3

    ignore_file = '# ignore ALL .log files\n*.log\n\n# ignore ALL files in ANY directory named temp\ntemp/*\n!temp/.gitkeep'

    def setup_functional(self, problem):
        debian_exec(f'cd {problem.ignore_dir}',
                    'rm -rf .git',
                    f'echo -e "{self.ignore_file}" > .gitignore',
                    'touch program.js',
                    'touch data.log',
                    'mkdir -p subdir',
                    'cd subdir',
                    'touch program2.js',
                    'touch data2.log',
                    'cd ..',
                    'mkdir -p temp',
                    'cd temp',
                    'touch program3.js',
                    'touch .gitkeep',
                    )

    def run_functional_internal(self, problem, script_name):
        def find_issue(test, problem, results):
            try:
                out = debian_exec(f'cd {problem.ignore_dir}',
                            get_check_command('-e program.js')
                            )
                if out.stdout.strip() == 'NO':
                    return 'A file was deleted that should not have been.'
                
                out = debian_exec(f'cd {problem.ignore_dir}',
                            get_check_command('-e data.log')
                            )
                if out.stdout.strip() == 'YES':
                    return 'A file was not deleted that should have been.'

                out = debian_exec(f'cd {problem.ignore_dir}/subdir',
                            get_check_command('-e program2.js')
                            )
                if out.stdout.strip() == 'NO':
                    return 'A file in a subdirectory was deleted that should not have been.'
                
                out = debian_exec(f'cd {problem.ignore_dir}/subdir',
                            get_check_command('-e data2.log')
                            )
                if out.stdout.strip() == 'YES':
                    return 'A file in a subdirectory was not deleted that should have been.'
                
                out = debian_exec(f'cd {problem.ignore_dir}/temp',
                                  get_check_command('-e program3.js')
                            )
                if out.stdout.strip() == 'YES':
                    return 'A file in a directory was not deleted that should have been.'

                out = debian_exec(f'cd {problem.ignore_dir}',
                                  get_check_command('-e .gitignore')
                                )
                if out.stdout.strip() == 'NO':
                    return 'The .gitignore file was deleted.'
                                
                out = debian_exec(f'cd {problem.ignore_dir}',
                                   get_check_command('-d .git'))
                
                if out.stdout.strip() == 'YES':
                    return 'The script shouldn\'t have initiated a git repository in the directory.'
            except:
                return 'The directory was corrupted. Checking whether the files were deleted is not possible.'

        test_exec(self, problem, script_name, find_issue, problem.ignore_dir)

DeleteGitIgnoredFilesInstance = DeleteGitIgnoredFiles([
                                                        TestCaseDeleteGitIgnoredFiles1(),
                                                        TestCaseDeleteGitIgnoredFiles2(),
                                                        TestCaseDeleteGitIgnoredFiles3()
                                                    ])
problems.append(DeleteGitIgnoredFilesInstance)


# Start experiment

In [None]:
starting_problem_found = START_FROM_PROBLEM_NAME == None
last_finished_problem = None

def attempt_score(results):
    functional_score = 0
    has_failed_test = False
    for _, test in results['test_cases'].items():
        functional_score += 6 / len(results['test_cases'].items()) if test['functional_test_success'] else 0
        has_failed_test = has_failed_test or not test['functional_test_success']
    if not has_failed_test:
        functional_score += 2

    static_score = 5
    static_score -= results['sh_style_cnt'] * 1
    static_score -= results['sh_warning_cnt'] * 2
    static_score -= results['sh_error_cnt'] * 3

    if static_score < 0:
        static_score = 0

    return functional_score + static_score

try:
    number_of_problems_tested = 0

    SCRIPTS_ALREADY_GENERATED = get_script_data_from_json()

    for i, problem in enumerate(problems):
        tested_cache = dict()
        if number_of_problems_tested >= MAX_NUMBER_OF_PROBLEMS_TO_TEST_IN_BATCH:
            print(f'Maximum number of problems to test in batch reached ({MAX_NUMBER_OF_PROBLEMS_TO_TEST_IN_BATCH}), stopping after finishing problem {last_finished_problem}')
            break
        if not starting_problem_found and (problem.name == START_FROM_PROBLEM_NAME or START_FROM_PROBLEM_NAME == None):
            total_in_batch = MAX_NUMBER_OF_PROBLEMS_TO_TEST_IN_BATCH if len(problems) - i > MAX_NUMBER_OF_PROBLEMS_TO_TEST_IN_BATCH else len(problems) - i
            starting_problem_found = True

        if starting_problem_found:
            if not SKIP_CORRECT_SCRIPTS:
                results_correct = problem.run_test_cases(f'correct_{problem.name}', skip_static=True)
                all_correct = True
                for _, test in results_correct['test_cases'].items():
                    all_correct = all_correct and test['functional_test_success']
                if not all_correct:
                    print(results_correct)
                    raise Exception(f'Correct example script for problem {problem.name} failed to execute correctly. There might be an issue with the tests.')

            #clearProblem(EXPERIMENT_NAME, problem.name) # In case last execution failed midway through, remove any partial results
            problem.generateScripts()
            shellcheck_copy_scripts() # Copy the generated scripts onto the shellcheck container for static analysis
            for script in problem.scripts:
                regenerated_script = None
                for attempt in range(1, ATTEMPTS + 1):
                    if regenerated_script != None:
                        curr_script = regenerated_script
                    else:
                        curr_script = script

                    if curr_script['script'] == NOT_A_BASH:
                        saveResultRow(EXPERIMENT_NAME, problem.name, problem.category,
                                    problem.level, attempt, curr_script['script'], curr_script['temperature'], 
                                    curr_script['top-p'], curr_script['model'], 'N/A', 'N/A',
                                    error, success, 'N/A', 0, 
                                    0, 0, 0,
                                    'N/A', 0, curr_script['tokens_used'],
                                    curr_script['time_taken_ms'])
                        break

                    # Often scripts are identical (especially for the same model with different parameters), so we can skip testing them again
                    if curr_script['script'] in tested_cache:
                        results = tested_cache[curr_script['script']]
                    else:
                        attempts_run_tests = 2
                        results = None
                        while attempts_run_tests > 0 and results == None:
                            try:
                                results = problem.run_test_cases(curr_script['name'])
                            except Exception as e:
                                attempts_run_tests -= 1
                                results = None
                                if attempts_run_tests == 0:
                                    raise e
                        tested_cache[curr_script['script']] = results
                    
                    static_issues = results['static_issues'] if results['static_issues'] else ''
                    success = results['static_clean']

                    first_functional_error = ""
                    first_functional_issue = ""
                    output_on_error = ""

                    for _, test in results['test_cases'].items():
                        error = test['functional_error'] if 'functional_error' in test else ''
                        functional_issue = test['functional_issue'] if 'functional_issue' in test else ''
                        success = success and test['functional_test_success']

                        if len(first_functional_error) == 0 and len(error) > 0:
                            first_functional_error = error
                            output_on_error = test['output'] if 'output' in test else ''
                        if len(first_functional_issue) == 0 and len(functional_issue) > 0:
                            first_functional_issue = functional_issue
                            output_on_error = test['output'] if 'output' in test else ''

                        saveResultRow(EXPERIMENT_NAME, problem.name, problem.category,
                                    problem.level, attempt, curr_script['script'], curr_script['temperature'], 
                                    curr_script['top-p'], curr_script['model'], test['test_name'], test['test_level'],
                                    error, success, functional_issue, results['sh_style_cnt'], 
                                    results['sh_info_cnt'], results['sh_warning_cnt'], results['sh_error_cnt'],
                                    static_issues, attempt_score(results), curr_script['tokens_used'],
                                    curr_script['time_taken_ms'])
                        
                    if not success and attempt < ATTEMPTS:
                        issues_explanation = ''
                        if len(first_functional_issue) > 0:
                            issues_explanation += first_functional_issue + '\n'
 
                        if len(first_functional_error) > 0:
                            issues_explanation += f'The stderr was { ' (truncated to last 500 characters) ' if len(first_functional_error) > 500 else ''}:\n {first_functional_error[-500:]}\n'
                        if len(first_functional_error) > 0 or len(first_functional_issue) > 0 and len(output_on_error) > 0:
                            issues_explanation += f'The stdout was { ' (truncated to last 500 characters) ' if len(output_on_error) > 500 else ''}:\n {output_on_error[-500:]}\n'

                        if len(static_issues) > 0:
                            issues_explanation += f'A static analysis of the script found the following issues:\n{static_issues}\n'

                        regenerated_script = problem.regenerateScript(curr_script['model'], curr_script['temperature'], curr_script['top-p'], issues_explanation)
                        shellcheck_copy_scripts() # Recopy scripts to include the new script
                    else:
                        break
            last_finished_problem = problem.name
            number_of_problems_tested += 1
            if i == len(problems) - 2:
                print('All problems have been tested! Results may be reviewed in the Postgres database or Apache Superset.')
            else:
                print(f'[{number_of_problems_tested}/{total_in_batch}] Problem {problem.name} has been tested!')
except Exception as e:
    print(f'Script terminated while working on problem {problem.name}, last successful problem was {last_finished_problem}')
    raise e

Script terminated while working on problem DeleteImages, last successful problem was None


  ip = re.search("'([0-9\.]+)'", run_command(f'VBoxManage guestproperty enumerate "debian_vbox" | {grep_utility} IP').stdout).group(1)


Exception: Error running command (return code 255): b"'jpeg\\' is not recognized as an internal or external command,\r\noperable program or batch file.\r\n"