In [None]:
#Inital Model
# Step 1: Install dependencies
# Run this in your terminal
# pip install transformers torch accelerate

from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

# Step 2: Load the model and tokenizer
def load_model(model_name_or_path="meta-llama/CodeLlama-7b-hf"):
    """
    Load Code Llama model and tokenizer from Hugging Face.

    Args:
        model_name_or_path (str): Path or name of the model.

    Returns:
        model, tokenizer: Loaded model and tokenizer.
    """
    tokenizer = LlamaTokenizer.from_pretrained(model_name_or_path)
    model = LlamaForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float16, device_map="auto")
    return model, tokenizer

# Step 3: Inference
if __name__ == "__main__":
    # Load the model and tokenizer
    model, tokenizer = load_model()

    # Example input prompt
    input_prompt = "# Write a Python function to calculate the factorial of a number"

    # Tokenize input
    inputs = tokenizer(input_prompt, return_tensors="pt").to(model.device)

    # Generate output
    with torch.no_grad():
        output_tokens = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.7,
            top_p=0.95,
            do_sample=True
        )

    # Decode output tokens
    output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    print(output_text)

In [None]:
from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

def debug_code_with_model(model_path="", max_tokens=512):
    """
    Debug pasted code using the CodeLlama model.

    Args:
        model_path (str): Path to the pre-trained CodeLlama model.
        max_tokens (int): Maximum number of tokens to generate.
    """
    # Load the tokenizer and model
    print("Loading the model and tokenizer...")
    tokenizer = LlamaTokenizer.from_pretrained(model_path)
    model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto", torch_dtype=torch.float16)
    print("Model and tokenizer loaded successfully.")

    # Prompt for system instructions
    system_prompt = input("Enter the system prompt (e.g., 'You are a debugging assistant'): ")
    
    # Prompt for user input (code to debug)
    print("\nPaste the code you want to debug (end with 'END'):")
    user_code_lines = []
    while True:
        line = input()
        if line.strip().upper() == "END":
            break
        user_code_lines.append(line)
    user_code = "\n".join(user_code_lines)

    # Prepare the complete input prompt
    prompt = f"{system_prompt}\n\nHere is the code that needs debugging:\n\n{user_code}\n\n" \
             "Explain the issue with the code and provide a corrected version."

    # Tokenize the input and move to device
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate the output
    print("\nGenerating response...")
    with torch.no_grad():
        output_tokens = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=0.7,
            top_p=0.95,
            do_sample=True
        )

    # Decode the output tokens
    output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)

    # Process the output to remove prompt repetition
    output_cleaned = output_text[len(prompt):].strip()

    # Display the model's response
    print("\n=== Model Response ===")
    print(output_cleaned)

# Run the debugging function
debug_code_with_model()

In [None]:
from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

def debug_code_with_model(model_path="", max_tokens=512):
    """
    Debug pasted code using the CodeLlama model.

    Args:
        model_path (str): Path to the pre-trained CodeLlama model.
        max_tokens (int): Maximum number of tokens to generate.
    """
    # Load the tokenizer and model
    print("Loading the model and tokenizer...")
    tokenizer = LlamaTokenizer.from_pretrained(model_path)
    model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto", torch_dtype=torch.float16)
    print("Model and tokenizer loaded successfully.")

    # Prompt for system instructions
    system_prompt = input("Enter the system prompt (e.g., 'You are a debugging assistant'): ")
    
    # Prompt for user input (code to debug)
    print("\nPaste the code you want to debug (end with 'END'):")
    user_code_lines = []
    while True:
        line = input()
        if line.strip().upper() == "END":
            break
        user_code_lines.append(line)
    user_code = "\n".join(user_code_lines)

    # Prepare the complete input prompt
    prompt = f"{system_prompt}\n\nHere is the code that needs debugging:\n\n{user_code}\n\n" \
             "Explain the issue, and rewrite the code to fix the bug."

    # Tokenize the input and move to device
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate the output
    print("\nGenerating response...")
    with torch.no_grad():
        output_tokens = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=0.7,
            top_p=0.95,
            do_sample=True
        )

    # Decode the output tokens
    output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)

    # Display the model's response
    print("\n=== Model Response ===")
    print(output_text[len(prompt):])

# Run the debugging function
debug_code_with_model()


In [None]:
# Working Debugging Assistant

from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

def get_code_completion(messages, max_tokens=512, model_path="/commons/copra_share/VIPER_NLP/hf_model_hub/code_llama"):
    """
    Generate code completions using the CodeLlama model.
    
    Args:
        messages (list): A list of strings containing system and user prompts.
        max_tokens (int): Maximum number of tokens to generate.
        model_path (str): Path to the pre-trained CodeLlama model.
    
    Returns:
        str: Generated code completion from the model.
    """
    # Load the tokenizer and model
    print("Loading the model and tokenizer...")
    tokenizer = LlamaTokenizer.from_pretrained(model_path)
    model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto", torch_dtype=torch.float16)
    print("Model and tokenizer loaded successfully.")

    # Prepare the input prompt from messages
    prompt = ""
    for message in messages:
        prompt += message + "\n\n"

    # Tokenize the input and move to device
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate output
    print("Generating response...")
    with torch.no_grad():
        output_tokens = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=0.7,
            top_p=0.95,
            do_sample=True
        )

    # Decode output tokens
    output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    return output_text[len(prompt):]

# Example usage
messages = [
    "You are an expert Python debugging assistant.",
    "Write a Python function that takes in two numbers (a base and a height) and returns the area of a triangle.",
    "It should take in two numbers, calculate the area using the formula 'area = 0.5 * base * height', and return the result."
]

response = get_code_completion(messages, max_tokens=100)
print("\n=== Model Response ===")
print(response)

In [None]:
#Code Generation
# Step 1: Install dependencies
# Run this in your terminal
# pip install transformers torch accelerate

from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

# Step 2: Load the model and tokenizer
def load_model(model_name_or_path="meta-llama/CodeLlama-7b-hf"):
    """
    Load Code Llama model and tokenizer from Hugging Face.

    Args:
        model_name_or_path (str): Path or name of the model.

    Returns:
        model, tokenizer: Loaded model and tokenizer.
    """
    tokenizer = LlamaTokenizer.from_pretrained(model_name_or_path)
    model = LlamaForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float16, device_map="auto")
    return model, tokenizer

# Step 3: Inference
if __name__ == "__main__":
    # Load the model and tokenizer
    model, tokenizer = load_model()

    # Example input prompt
    input_prompt = "# Write a Python function to calculate the factorial of a number"

    # Tokenize input
    inputs = tokenizer(input_prompt, return_tensors="pt").to(model.device)

    # Generate output
    with torch.no_grad():
        output_tokens = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.7,
            top_p=0.95,
            do_sample=True
        )

    # Decode output tokens
    output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    print(output_text)

In [None]:
#Code Documentation

import subprocess
import os

def generate_documentation(code_snippet):
    """
    Generate documentation for a given code snippet using a locally hosted version of CodeLlama.

    Parameters:
        code_snippet (str): The code that requires documentation.

    Returns:
        str: The generated documentation.
    """
    # Define the path to your local CodeLlama executable or script
    codellama_path = "./codellama"

    # Save the code snippet to a temporary file
    with open("temp_code.py", "w") as temp_file:
        temp_file.write(code_snippet)

    # Run CodeLlama with the code snippet as input
    result = subprocess.run(
        [codellama_path, "--input", "temp_code.py", "--task", "generate_documentation"],
        capture_output=True,
        text=True
    )

    # Clean up the temporary file
    os.remove("temp_code.py")

    # Return the generated documentation
    if result.returncode == 0:
        return result.stdout
    else:
        return f"Error generating documentation: {result.stderr}"

if __name__ == "__main__":
    # Example code snippet to document
    code = """
    def add_numbers(a, b):
        """Add two numbers together.

        Parameters:
            a (int): The first number.
            b (int): The second number.

        Returns:
            int: The sum of a and b.
        """
        return a + b
    """

    # Generate and print documentation
    documentation = generate_documentation(code)
    print(documentation)


In [None]:
from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

def get_code_completion(messages, max_tokens=512, model_path=):
    """
    Generate code completions using the CodeLlama model.
    
    Args:
        messages (list): A list of strings containing system and user prompts.
        max_tokens (int): Maximum number of tokens to generate.
        model_path (str): Path to the pre-trained CodeLlama model.
    
    Returns:
        str: Generated code completion from the model.
    """
    # Load the tokenizer and model
    print("Loading the model and tokenizer...")
    tokenizer = LlamaTokenizer.from_pretrained(model_path)
    model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto", torch_dtype=torch.float16)
    print("Model and tokenizer loaded successfully.")

    # Prepare the input prompt from messages
    prompt = "\n".join(messages) + "\n\n"

    # Tokenize the input and move to device
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate output
    print("Generating response...")
    with torch.no_grad():
        output_tokens = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=0.6,  # Lower temperature for more focused output
            top_p=0.9,       # Reduce randomness slightly
            do_sample=True
        )

    # Decode output tokens
    output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    # Return the generated part only, after the prompt
    return output_text[len(prompt):]