<a href="https://colab.research.google.com/github/AkshayNagamalla/codeOptimisationGenAI/blob/main/backend_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
 %%capture
!pip install unsloth "xformers==0.0.28.post2"
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
!pip install xformers

In [None]:
from unsloth import FastLanguageModel
from transformers import TextStreamer
from datasets import load_dataset
from unsloth.chat_templates import get_chat_template, standardize_sharegpt
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported
from datetime import datetime
import torch

# Model Initialization
max_seq_length = 128000
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")


In [None]:
# Dataset Preparation
#dataset = load_dataset("mlabonne/FineTome-100k", split="train")
dataset = load_dataset("json", data_files="/content/drive/MyDrive/Colab Notebooks/optimized_dataset.json", split="train")
dataset = standardize_sharegpt(dataset)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [
        tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
        for convo in convos
    ]
    return {"text": texts}

dataset = dataset.map(formatting_prompts_func, batched=True)


In [None]:
# Fine-tuning Preparation
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part="<|start_header_id|>user<|end_header_id|>\n\n",
    response_part="<|start_header_id|>assistant<|end_header_id|>\n\n",
)

trainer.train()
FastLanguageModel.for_inference(model)


In [None]:
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

# Context to store user queries
user_context = []

def process_query(query):
    """
    Process the query, append it to the user context, and generate a response.
    """
    current_date = datetime.now().strftime("%Y-%m-%d")
    user_context.append(f"{current_date}\nUser: {query}")
    full_context = "\n".join(user_context)
    messages = [{"role": "user", "content": full_context}]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")
    output_tokens = model.generate(
        input_ids=inputs,
        streamer=text_streamer,
        max_new_tokens=128000,
        temperature=0.8,
        min_p=0.1,
    )
    return tokenizer.decode(output_tokens[0], skip_special_tokens=True)

In [None]:
def convert_code(code_snippet, target_language):
    """
    Convert code to the specified target language.
    """
    target_language = target_language.lower()
    if target_language == "python":
        return f"# Converted to Python:\n{code_snippet}"
    elif target_language == "java":
        return f"// Converted to Java:\npublic class Main {{\n    public static void main(String[] args) {{\n        {code_snippet}\n    }}\n}}"
    elif target_language == "c":
        return f"/* Converted to C */\n#include <stdio.h>\nint main() {{\n    {code_snippet}\n    return 0;\n}}"
    elif target_language == "c++":
        return f"// Converted to C++\n#include <iostream>\nusing namespace std;\nint main() {{\n    {code_snippet}\n    return 0;\n}}"
    else:
        return f"// Conversion to {target_language} not supported."


In [None]:
def parse_conversion_request(user_input):
    """
    Parse conversion request from user input.
    """
    parts = user_input.split(":", 1)[1].strip().split("|")
    code_snippet = parts[0].strip()
    target_language = parts[1].strip() if len(parts) > 1 else "Python"
    return code_snippet, target_language


In [None]:
def main():
    print("Welcome to the Llama Model Interactive Query Processor!")
    while True:
        print("\nEnter your query (or type 'exit' to quit):")
        user_input = input("> ").strip()

        if user_input.lower() == "exit":
            print("Exiting the program. Goodbye!")
            break

        if user_input.startswith("convert:"):
            code_snippet, target_language = parse_conversion_request(user_input)
            converted_code = convert_code(code_snippet, target_language)
            print(f"\nConverted Code:\n{converted_code}")
        else:
            response = process_query(user_input)
            #print(f"\nResponse:\n{response}")

if __name__ == "__main__":
    main()
