# Function Generation
## Imports and Helpers

In [1]:
import os
from pathlib import Path
import json
from IPython.display import display, Markdown
from dotenv import load_dotenv
import shutil
from tqdm.auto import tqdm

from CreatePrompt import CreatePrompt
from LLMInterface import LLMInterface
from Gemini import Gemini
from OpenAIModel import OpenAIModel
from CodeQwen import CodeQwen
from ArtigenzCoder import ArtigenzCoder
from DeepSeek import DeepSeek
from LocalLLM import LocalLLM
from CodebleuCalculator import codebleu_score_calculator, avg_codebleu_score_calculator


In [2]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = "expandable_segments:True"


In [3]:
load_dotenv()


True

---

## Reading and Validating Config File

In [None]:
config_file_path = input("Enter the path to the config file: ")


In [None]:
config = json.load(open(config_file_path))
config


In [4]:
def validate_config(config: dict):
    required_keys = [
    "language", "summarize_codebase", "codebase_readme_path", "files_to_summarize_paths", "codebase_summary_prompt_template", "codebase_summary_prompt_save_path", "codebase_summary_save_path", "function_description_prompt_template", "function_description_prompt_save_path", "function_description_save_path",  "function_generation_prompt_template_type1", "function_generation_prompt_type1_save_path", "function_generation_prompt_template_type2", "function_generation_prompt_type2_save_path", "function_generation_prompt_template_type3", "function_generation_prompt_type3_save_path", "chosen_function_path", "chosen_function", "original_function_save_path", "example_function_description1", "example_function_code1", "example_function_description2", "example_function_code2", "generated_function_type1_save_dir", "generated_function_type2_save_dir", "generated_function_type3_save_dir", "run_codebleu", "codebleu_type1_save_dir", "codebleu_type2_save_dir", "codebleu_type3_save_dir"     
    ]
    
    for key in required_keys:
        if key not in config:
            raise ValueError(f"Missing required key: {key}")


In [None]:
validate_config(config)
print("Config validated")


---

## Create prompts

### Codebase summarization prompt

In [None]:
try:
    summarization_prompt = CreatePrompt.create_summary_prompt(config['codebase_summary_prompt_template'], config['files_to_summarize_paths'], config['language'], config['codebase_summary_prompt_save_path'])
except Exception as e:
    print(f"Error creating summarization prompt: {e}")
    
print("Created summarization prompt")


### Function description prompt

In [None]:
try:
    function_description_prompt = CreatePrompt.create_func_description_prompt(config['function_description_prompt_template'], config['chosen_function_path'], config['language'], config['function_description_prompt_save_path'])
except Exception as e:
    print(f"Error creating function description prompt: {e}")

print("Created function description prompt")


---

# Read summarization and function description prompts

In [None]:
with open(config['codebase_summary_prompt_save_path'], 'r') as f:
    summarization_prompt = f.read()


In [None]:
with open(config['function_description_prompt_save_path'], 'r') as f:
    function_description_prompt = f.read()


---

# Summarize codebase and generate function description

In [None]:
gemini = Gemini()


In [None]:
codebase_summary = gemini.generate(summarization_prompt)
gemini.write_to_file(config['codebase_summary_save_path'])
print("Summarized codebase")


In [None]:
function_description = gemini.generate(function_description_prompt)
gemini.write_to_file(config['function_description_save_path'])
print("Generated function description")


In [None]:
if not os.path.exists(config['original_function_save_path']):
    shutil.copyfile(config['chosen_function_path'], config['original_function_save_path'])


---

## Generating new function

### Function Generation Type 1 Prompt 

In [None]:
function_generation_prompt_type1 = CreatePrompt.create_func_generation_prompt_type1(
                                    config['function_generation_prompt_template_type1'], 
                                    config['codebase_readme_path'], 
                                    config['function_description_save_path'], 
                                    config['function_generation_prompt_type1_save_path']
                                    )
print("Created function generation prompt type 1")


### Function Generation Type 2 Prompt 

In [None]:
function_generation_prompt_type2 = CreatePrompt.create_func_generation_prompt_type2(
                                    config['function_generation_prompt_template_type2'], 
                                    config['codebase_summary_save_path'], 
                                    config['function_description_save_path'], 
                                    config['function_generation_prompt_type2_save_path']
                                    )
print("Created function generation prompt type 2")


### Function Generation Type 3 Prompt 

In [None]:
if config["function_generation_prompt_type3_save_path"] == "":
    print("Skipping function generation prompt type 3")
else:    
    example_functions = [[config['example_function_description1'], config['example_function_code1']], [config['example_function_description2'], config['example_function_code2']]]

    function_generation_prompt_type3 = CreatePrompt.create_func_generation_prompt_type3(
                                        config['function_generation_prompt_template_type3'], 
                                        config['codebase_summary_save_path'], 
                                        example_functions, 
                                        config['function_description_save_path'], 
                                        config['function_generation_prompt_type3_save_path']
                                        )
    print("Created function generation prompt type 3")


### Large Language Models

In [5]:
model_dict = {
    "GPT-3_5-Turbo": OpenAIModel("gpt-3.5-turbo"), 
    "GPT-4": OpenAIModel("gpt-4"), 
    "DeepSeek-Coder-V2": DeepSeek(), 
    "CodeQwen1_5-7B-Chat": CodeQwen(), 
    "Artigenz-Coder-DS-6_7B": ArtigenzCoder()
    }


Loaded as API: https://userName-codeqwen1-5-7b-chat.hf.space ✔
Loaded as API: https://userName-artigenz-artigenz-coder-ds-6-7b.hf.space ✔


In [7]:
load_func_generation_prompts = False

def load_func_generation_prompts(config):
    with open(config['function_generation_prompt_type1_save_path'], 'r') as f:
        function_generation_prompt_type1 = f.read()
    
    with open(config['function_generation_prompt_type2_save_path'], 'r') as f:
        function_generation_prompt_type2 = f.read()

    function_generation_prompt_type3 = ""
    if config["function_generation_prompt_type3_save_path"] != "":
        with open(config['function_generation_prompt_type3_save_path'], 'r') as f:
            function_generation_prompt_type3 = f.read()

    return function_generation_prompt_type1, function_generation_prompt_type2, function_generation_prompt_type3

In [None]:
function_generation_prompts = {
    "type1": function_generation_prompt_type1,
    "type2": function_generation_prompt_type2,
    "type3": function_generation_prompt_type3
}

generated_function_save_dirs = {
    "type1": config['generated_function_type1_save_dir'],
    "type2": config['generated_function_type2_save_dir'],
    "type3": config['generated_function_type3_save_dir']
}


In [None]:
model_names = ["GPT-3_5-Turbo", "GPT-4", "DeepSeek-Coder-V2", "CodeQwen1_5-7B-Chat", "Artigenz-Coder-DS-6_7B"]
prompt_types = ["type1", "type2", "type3"]

generated_function_save_paths = {t: {m: [] for m in model_names} for t in prompt_types}


In [None]:
generation_count = 1

In [None]:
for prompt_type in ["type1", "type2", "type3"]:

    if prompt_type == "type3" and config["generated_function_type3_save_dir"] == "":
        print("Skipping function generation prompt type 3")
        continue
    
    print(f"Running prompt type {prompt_type}")

    for model_name, model in model_dict.items():
        print(f"Running {model_name} model")

        for i in range(1, generation_count + 1):

            generated_function = model.generate(function_generation_prompts[prompt_type])

            filename = config["chosen_function"].split(".")
            generated_function_save_filename = f"{filename[0]}_{i}.{filename[1]}"
            generated_function_save_path = f"{generated_function_save_dirs[prompt_type]}/{model_name}/GENERATED-{generated_function_save_filename}"
            model.write_to_file(generated_function_save_path)
            
            generated_function_save_paths[prompt_type][model_name].append(generated_function_save_path)
            print(f"Generated function {i}")


---

## CodeBlEU Calculation

In [None]:
codebleu_save_dirs = {
    "type1": config['codebleu_type1_save_dir'],
    "type2": config['codebleu_type2_save_dir'],
    "type3": config['codebleu_type3_save_dir']
}


In [None]:
if config['run_codebley']:

    for prompt_type in prompt_types:

        if prompt_type == "type3" and config["codebleu_type3_save_dir"] == "":
            print("Skipping codebleu for prompt type 3")
            continue

        print(f"Running codebleu for prompt {prompt_type}")
        for model_name , generated_function_paths in generated_function_save_paths[prompt_type].items():
            
            codebleu_scores = []
            for i, generated_function_path in enumerate(generated_function_paths):
                
                codebleu_score = codebleu_score_calculator(
                    config['original_function_save_path'],
                    generated_function_path
                )
                codebleu_scores.append(codebleu_score)

            avg_codebleu_score = avg_codebleu_score_calculator(codebleu_scores)
            print(f"Calculated codebleu score for {model_name}\n")

            codebleu_save_path = f"{codebleu_save_dirs[prompt_type]}/{model_name}.txt"

            os.makedirs(os.path.dirname(codebleu_save_path), exist_ok=True) 
            with open(codebleu_save_path, 'w') as f:
                f.write(f"CodeBLEU Result:\n{avg_codebleu_score}\n")


---