### import

In [1]:
import os
import extract_class
import clang_parser
import opensource_codellm
import argparse

### CodeGenerator class

In [2]:
class CodeGenerator:
    def __init__(self, proj_dir, output_dir):
        self.proj_dir = proj_dir
        self.output_dir = output_dir
        self.create_output_dir()

    def create_output_dir(self):
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

    def write_data(self, content, save_path):
        with open(save_path, 'w', encoding='utf-8') as file:
            file.write(content)

    def output_destination(self,new_dir):
        output_fname = self.proj_dir.split("/")[-1]
        return os.path.join(new_dir, f"parsed_{output_fname}.txt")

    def generate_binding(self, user_choice, class_definition):
        generated_promt = opensource_codellm.promt_generation(class_definition)
        if user_choice == 1:
            print("Selected : SCB NCAT LLM server")
            model = opensource_codellm.init_axle_models()
            return opensource_codellm.phind_LLM("Phind/Phind-CodeLlama-34B-v2", generated_promt, model)
        elif user_choice == 2:
            print("Code generation from together.ai API")
            return opensource_codellm.together_api(generated_promt)
        else:
            print("Invalid option. Please select a valid option (1 or 2).")
            return None, None

    def run(self):
        new_dir = os.path.join(self.output_dir, self.proj_dir.split('/')[-1])
        if not os.path.exists(new_dir):
            os.makedirs(new_dir)

        output_dest = self.output_destination(new_dir)
        class_definition, selected_class = extract_class.extract_class_main(self.proj_dir, output_dest)
        self.write_data(class_definition, os.path.join(new_dir, f"{self.proj_dir.split('/')[-1]}_{selected_class}_class.cpp"))

        print("Select an option:")
        print("1. Generate code from SCB NCAT LLM server (Internal server)")
        print("2. Generate code from together.ai API")

        #user_choice = int(input("Enter the number of your choice: "))
        # generated_binding, execution_time = self.generate_binding(user_choice, class_definition)
        user_choice = 1

        print("By default - Chosing together.ai API server")
        generated_binding, execution_time = self.generate_binding(user_choice, class_definition)
        if generated_binding and execution_time:
            print(f" Model code generation time: {execution_time}")
            self.write_data(generated_binding, os.path.join(new_dir, f"GenAi_{selected_class}_binding.cpp"))

In [None]:
if __name__ == "__main__":
    proj_dir = input("project_directory : ")
    output_dir = input("ouptput_directory : ")
    generator = CodeGenerator(proj_dir, output_dir)
    generator.run()

project_directory :  filepattern
ouptput_directory :  output_folder


Available classes:
1. FilesystemStream : filepattern/src/filepattern/cpp/util/fs_stream.hpp
2. FilePattern : filepattern/src/filepattern/cpp/include/filepattern.h
3. ExternalMergeSort : filepattern/src/filepattern/cpp/util/sort.hpp
Selected class: FilesystemStream
File: filepattern/src/filepattern/cpp/util/fs_stream.hpp
Select an option:
1. Generate code from SCB NCAT LLM server (Internal server)
2. Generate code from together.ai API
By default - Chosing together.ai API server
Selected : SCB NCAT LLM server


Downloading config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/35.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/7 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00007.bin:   0%|          | 0.00/9.85G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00007.bin:   0%|          | 0.00/9.69G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00007.bin:   0%|          | 0.00/9.69G [00:00<?, ?B/s]

Downloading (…)l-00004-of-00007.bin:   0%|          | 0.00/9.69G [00:00<?, ?B/s]

Downloading (…)l-00005-of-00007.bin:   0%|          | 0.00/9.69G [00:00<?, ?B/s]

Downloading (…)l-00006-of-00007.bin:   0%|          | 0.00/9.69G [00:00<?, ?B/s]

Downloading (…)l-00007-of-00007.bin:   0%|          | 0.00/9.19G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Downloading generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/824 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/434 [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Phind-CodeLlama-34B-v2 generation in process
