<a href="https://colab.research.google.com/github/AkshayNagamalla/codeOptimisationGenAI/blob/main/final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install bitsandbytes
!pip install transformers
!pip install torch

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
from datetime import datetime

model_path = "hemanth2102/PS-G210"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16).to("cuda")

# Context to store only user queries
user_context = []

def convert_code(code_snippet, target_language):
    """
    Convert code to the specified target language.
    """
    target_language = target_language.lower()
    if target_language == "python":
        return f"# Converted to Python:\n{code_snippet}"
    elif target_language == "java":
        return f"// Converted to Java:\npublic class Main {{\n    public static void main(String[] args) {{\n        {code_snippet}\n    }}\n}}"
    elif target_language == "c":
        return f"/* Converted to C */\n#include <stdio.h>\nint main() {{\n    {code_snippet}\n    return 0;\n}}"
    elif target_language == "c++":
        return f"// Converted to C++\n#include <iostream>\nusing namespace std;\nint main() {{\n    {code_snippet}\n    return 0;\n}}"
    else:
        return f"// Conversion to {target_language} not supported."

def process_query(query):
    """
    Process the query by appending it to the user context.
    Generates a new result based on the accumulated context.
    """
    # Check if the query is a conversion request
    if query.startswith("convert:"):
        parts = query.split(":", 1)[1].strip().split("|")
        code_snippet = parts[0].strip()
        target_language = parts[1].strip() if len(parts) > 1 else "Python"
        return convert_code(code_snippet, target_language)

    # Get the current date
    current_date = datetime.now().strftime("%Y-%m-%d")

    # Append the new query and the current date to the user context
    user_context.append(f"{current_date}\nUser: {query}")
    full_context = "\n".join(user_context)

    # Prepare inputs for the model
    messages = [
        {"role": "user", "content": full_context},
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    # Generate output
    output_tokens = model.generate(
        input_ids=inputs,
        max_new_tokens=128000,  # Adjust token length as needed
        use_cache=True,
        temperature=0.8,
        min_p=0.1,
    )

    # Decode the output tokens
    raw_output = tokenizer.decode(output_tokens[0], skip_special_tokens=True)

    # Filter out unwanted phrases
    unwanted_phrases = [
        "Cutting Knowledge Date:",  # Add any phrases to exclude
        "Today Date:"
    ]
    for phrase in unwanted_phrases:
        raw_output = raw_output.replace(phrase, "").strip()

    # Append only the user's query to the context to avoid repetition
    return raw_output



def main():
    print("Welcome to the Llama Model Interactive Query Processor!")
    while True:
        print("\nEnter your query (or type 'exit' to quit):")
        query = input("> ").strip()

        if query.lower() == 'exit':
            print("Exiting the program. Goodbye!")
            break

        # Process the query with accumulated context
        new_output = process_query(query)

        # Display the output
        print(f"\nOutput:\n{new_output}")

if __name__ == "__main__":
    main()
