In [None]:
import os
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

def convert_and_save_code(source_dir, target_base_dir, frameworks):
    model_id = "./2_finetune/combined_model" #change this to the path of the fine-tuned model
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16
    )
    
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=quantization_config,
        device_map="auto",
    )

    total_files = sum(len(files) for _, _, files in os.walk(source_dir) if files)

    with tqdm(total=total_files * len(frameworks), desc="Conversion Progress", leave=False, ncols=75) as pbar:
        for root, dirs, files in os.walk(source_dir):
            for file in files:
                if file.endswith(".py"):
                    filepath = os.path.join(root, file)
                    with open(filepath, 'r') as f:
                        code_content = f.read()

                    for framework in frameworks:
                        prompt = f"<s>[INST] Convert the following code to the {framework} framework\n\n{code_content}[/INST]"
                        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
                        outputs = model.generate(inputs["input_ids"], max_new_tokens=200, do_sample=True, top_p=0.9, temperature=0.1)
                        converted_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
                        
                        # Construct the target filepath
                        new_root = root.replace("historical_bugs", f"converted_codes/{framework}")
                        if not os.path.exists(new_root):
                            os.makedirs(new_root)
                        target_filepath = os.path.join(new_root, file)

                        # Save the converted code
                        with open(target_filepath, "w") as f:
                            f.write(converted_code)

                        pbar.update(1)

# Define the source directory and target base directory
source_dir = "./1_historical_bug_collection/Github_Issues_code"
target_base_dir = "./converted_bug_codes"

# List of frameworks to convert to
frameworks = ["MLX", "OneFlow", "MindSpore"]

# Call the function to start the conversion process
convert_and_save_code(source_dir, target_base_dir, frameworks)