In [None]:
#Inital Model
# Step 1: Install dependencies
# Run this in your terminal
# pip install transformers torch accelerate

from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

# Step 2: Load the model and tokenizer
def load_model(model_name_or_path="meta-llama/CodeLlama-7b-hf"):
    """
    Load Code Llama model and tokenizer from Hugging Face.

    Args:
        model_name_or_path (str): Path or name of the model.

    Returns:
        model, tokenizer: Loaded model and tokenizer.
    """
    tokenizer = LlamaTokenizer.from_pretrained(model_name_or_path)
    model = LlamaForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float16, device_map="auto")
    return model, tokenizer

# Step 3: Inference
if __name__ == "__main__":
    # Load the model and tokenizer
    model, tokenizer = load_model()

    # Example input prompt
    input_prompt = "# Write a Python function to calculate the factorial of a number"

    # Tokenize input
    inputs = tokenizer(input_prompt, return_tensors="pt").to(model.device)

    # Generate output
    with torch.no_grad():
        output_tokens = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.7,
            top_p=0.95,
            do_sample=True
        )

    # Decode output tokens
    output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    print(output_text)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

def load_model(model_path):
    """
    Load the CodeLlama model from the given path.
    
    Parameters:
        model_path (str): Path to the local CodeLlama model.
        
    Returns:
        model, tokenizer: The loaded model and tokenizer.
    """
    print("Loading model and tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16)
    print("Model loaded successfully.")
    return model, tokenizer

def query_model(model, tokenizer, system_prompt, user_prompt):
    """
    Generate a response using the CodeLlama model.
    
    Parameters:
        model: The loaded CodeLlama model.
        tokenizer: The tokenizer for the model.
        system_prompt (str): Instructions for the model.
        user_prompt (str): Code or query for the model.
        
    Returns:
        str: The response generated by the model.
    """
    prompt = f"{system_prompt}\n\n{user_prompt}"
    
    # Determine the device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)  # Ensure the model is on the correct device
    
    # Tokenize the input and move it to the same device as the model
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"], 
            max_length=256,  # Limit response length to avoid excessive output
            temperature=0.7, 
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Filter repeated lines
    response_lines = list(dict.fromkeys(response.split("\n")))
    return "\n".join(response_lines[len(prompt.split("\n")):])  # Trim prompt and remove duplicates

def interactive_debugging(model, tokenizer):
    """
    Interactive debugging session with the model.
    
    Parameters:
        model: The loaded CodeLlama model.
        tokenizer: The tokenizer for the model.
    """
    print("=== CodeLlama Debugger ===")
    system_prompt = input("System Prompt: ")
    
    print("\nPaste the code that needs debugging (end with 'END'): ")
    user_prompt_lines = []
    while True:
        line = input()
        if line.strip().upper() == "END":
            break
        user_prompt_lines.append(line)
    user_prompt = "\n".join(user_prompt_lines)
    
    print("\nGenerating response from CodeLlama...")
    response = query_model(model, tokenizer, system_prompt, user_prompt)
    
    print("\n=== CodeLlama Response ===")
    print(response)

# Load your model
model_path = "/path/to/your/codellama"  # Update this to your local CodeLlama path
model, tokenizer = load_model(model_path)

# Uncomment this line to start an interactive session
# interactive_debugging(model, tokenizer)

# Alternatively, use a predefined system prompt and code snippet
# Example:
system_prompt = (
    "You are an expert Python debugging assistant. "
    "Your task is to analyze the provided code, identify bugs, explain them clearly, "
    "and provide a corrected version of the code."
)
user_prompt = """
Here is some Python code that throws a syntax error. Identify the issue, explain why it happens, and rewrite the code to fix the bug.

def buggy_function(x):
    return x ** 2 + x /
buggy_function(5)
"""
response = query_model(model, tokenizer, system_prompt, user_prompt)
print("\n=== Predefined Example Response ===")
print(response)


In [None]:
#Code Generation
# Step 1: Install dependencies
# Run this in your terminal
# pip install transformers torch accelerate

from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

# Step 2: Load the model and tokenizer
def load_model(model_name_or_path="meta-llama/CodeLlama-7b-hf"):
    """
    Load Code Llama model and tokenizer from Hugging Face.

    Args:
        model_name_or_path (str): Path or name of the model.

    Returns:
        model, tokenizer: Loaded model and tokenizer.
    """
    tokenizer = LlamaTokenizer.from_pretrained(model_name_or_path)
    model = LlamaForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float16, device_map="auto")
    return model, tokenizer

# Step 3: Inference
if __name__ == "__main__":
    # Load the model and tokenizer
    model, tokenizer = load_model()

    # Example input prompt
    input_prompt = "# Write a Python function to calculate the factorial of a number"

    # Tokenize input
    inputs = tokenizer(input_prompt, return_tensors="pt").to(model.device)

    # Generate output
    with torch.no_grad():
        output_tokens = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.7,
            top_p=0.95,
            do_sample=True
        )

    # Decode output tokens
    output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    print(output_text)

In [None]:
#Code Debugging

# Step 1: Install dependencies
# Run this in your terminal
# pip install transformers torch accelerate

from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

# Step 2: Load the model and tokenizer
def load_model(model_name_or_path="meta-llama/CodeLlama-7b-hf"):
    """
    Load Code Llama model and tokenizer from Hugging Face.

    Args:
        model_name_or_path (str): Path or name of the model.

    Returns:
        model, tokenizer: Loaded model and tokenizer.
    """
    tokenizer = LlamaTokenizer.from_pretrained(model_name_or_path)
    model = LlamaForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float16, device_map="auto")
    return model, tokenizer

# Step 3: Code debugging with Code Llama
if __name__ == "__main__":
    # Load the model and tokenizer
    model, tokenizer = load_model()

    # Example input prompt for code debugging
    input_prompt = """
# The following Python code has a bug. Fix the code:

def find_max_value(lst):
    max_val = lst[0]
    for i in range(1, len(lst)):
    if lst[i] > max_val:
        max_val = lst[i]
    return max_val
"""

    # Tokenize input
    try:
        inputs = tokenizer(input_prompt, return_tensors="pt").to(model.device)
    except Exception as e:
        print(f"Error during tokenization: {e}")
        exit(1)

    # Generate code output
    try:
        with torch.no_grad():
            output_tokens = model.generate(
                **inputs,
                max_new_tokens=150,  # Increase max tokens for more complex code
                temperature=0.2,  # Lower temperature for more deterministic code
                top_p=0.9,  # Adjust top_p to balance creativity and reliability
                do_sample=False  # Set to False for deterministic output
            )
    except Exception as e:
        print(f"Error during code generation: {e}")
        exit(1)

    # Decode output tokens
    try:
        output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
        print("Suggested Fix:\n")
        print(output_text)
    except Exception as e:
        print(f"Error during decoding: {e}")
        exit(1)

In [None]:
#Code Debugging PT2
from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

def get_code_completion(messages, max_tokens=512, model_path="/path/to/local/codellama"):
    # Load the tokenizer and model
    tokenizer = LlamaTokenizer.from_pretrained(model_path)
    model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto")

    # Prepare the input prompt from messages
    prompt = ""
    for message in messages:
        role = message["role"]
        content = message["content"]
        if role == "system":
            prompt += f"[SYSTEM] {content}\n"
        elif role == "user":
            prompt += f"[USER] {content}\n"
        elif role == "assistant":
            prompt += f"[ASSISTANT] {content}\n"

    # Tokenize the input prompt
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)

    # Generate completion
    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            max_length=input_ids.shape[1] + max_tokens,
            do_sample=True,
            top_p=0.7,
            temperature=0.7,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id
        )

    # Decode the output
    output = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Extract the assistant's response
    completion = output[len(prompt):].strip()
    return completion

messages = [
    {
        "role": "system",
        "content": "You are an expert programmer that helps to review Python code for bugs."
    },
    {
        "role": "user",
        "content": """Where is the bug in this code?

    def fib(n):
        if n <= 0:
            return n
        else:
            return fib(n-1) + fib(n-2)"""
    }
]

chat_completion = get_code_completion(messages)
print(chat_completion)

In [None]:
#Code Documentation

import subprocess
import os

def generate_documentation(code_snippet):
    """
    Generate documentation for a given code snippet using a locally hosted version of CodeLlama.

    Parameters:
        code_snippet (str): The code that requires documentation.

    Returns:
        str: The generated documentation.
    """
    # Define the path to your local CodeLlama executable or script
    codellama_path = "./codellama"

    # Save the code snippet to a temporary file
    with open("temp_code.py", "w") as temp_file:
        temp_file.write(code_snippet)

    # Run CodeLlama with the code snippet as input
    result = subprocess.run(
        [codellama_path, "--input", "temp_code.py", "--task", "generate_documentation"],
        capture_output=True,
        text=True
    )

    # Clean up the temporary file
    os.remove("temp_code.py")

    # Return the generated documentation
    if result.returncode == 0:
        return result.stdout
    else:
        return f"Error generating documentation: {result.stderr}"

if __name__ == "__main__":
    # Example code snippet to document
    code = """
    def add_numbers(a, b):
        """Add two numbers together.

        Parameters:
            a (int): The first number.
            b (int): The second number.

        Returns:
            int: The sum of a and b.
        """
        return a + b
    """

    # Generate and print documentation
    documentation = generate_documentation(code)
    print(documentation)
