In [1]:
import os
import re
import sys
import json
import requests
from dotenv import load_dotenv
from IPython.display import Markdown, display
import gradio as gr
from huggingface_hub import login, InferenceClient
from transformers import AutoTokenizer

KeyboardInterrupt: 

In [None]:
load_dotenv()
HF_TOKEN = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')

# Initialize Hugging Face
login(HF_TOKEN, add_to_git_credential=True)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [None]:
MODEL_NAMES = {
    "Mixtral": "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "Llama3": "meta-llama/Meta-Llama-3-8B-Instruct",
    "DeepSeek": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
    }

# Define API endpoints
MODEL_ENDPOINTS = {
    "Mixtral": "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1",
    "Llama3": "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct",
}

In [None]:
def system_prompt_for_comment():
    system = """
    You are a Python documentation expert. When writing documentation:
    - Follow PEP 257 and Google docstring style guidelines
    - Write clear, concise explanations
    - Include practical examples
    - Highlight edge cases and limitations
    - Use type hints in docstrings
    - Add inline comments only for complex logic
    - Never skip documenting parameters or return values
    - Validate that all documentation is accurate and complete
    """
    return system

def system_prompt_for_unit_test():
    system = """
    You are an expert Python testing engineer who specializes in creating comprehensive unit tests. Follow these principles:
    - Use pytest as the testing framework
    - Follow the Arrange-Act-Assert pattern
    - Test both valid and invalid inputs
    - Include edge cases and boundary conditions
    - Write descriptive test names that explain the scenario being tested
    - Create independent tests that don't rely on each other
    - Use appropriate fixtures and parametrize when needed
    - Add clear comments explaining complex test logic
    - Cover error cases and exceptions
    - Achieve high code coverage while maintaining meaningful tests
    """
    return system


In [None]:
def user_prompt_for_comment(code):
    user = f"""
    Please document this Python code with:
    
    1. A docstring containing:
    - A clear description of purpose and functionality
    - All parameters with types and descriptions
    - Return values with types
    - Exceptions that may be raised
    - Any important notes or limitations
    
    2. Strategic inline comments for:
    - Complex algorithms or business logic
    - Non-obvious implementation choices
    - Performance considerations
    - Edge cases
    
    Here's the code to document:
    \n{code}
    """
    return user

def user_prompt_for_unit_test(code):
    user = f"""
    Please generate unit tests for the following Python code. Include:
    
    1. Test cases for:
    - Normal/expected inputs
    - Edge cases and boundary values
    - Invalid inputs and error conditions
    - Different combinations of parameters
    - All public methods and functions
    
    2. For each test:
    - Clear test function names describing the scenario
    - Setup code (fixtures if needed)
    - Test data preparation
    - Expected outcomes
    - Assertions checking results
    - Comments explaining complex test logic
    
    3. Include any necessary:
    - Imports
    - Fixtures
    - Mock objects
    - Helper functions
    - Test data generators
    
    Here's the code to test:
    \n{code}
    """
    return user


In [None]:
def messages_for_comment(python, model_name):
    system = system_prompt_for_comment()
    user = user_prompt_for_comment(python)
    
    return [
        {"role": "system", "content": system},
        {"role": "user", "content": user}
    ]

def messages_for_unit_test(python, model_name):
    system = system_prompt_for_unit_test()
    user = user_prompt_for_unit_test(python)
    
    return [
        {"role": "system", "content": system},
        {"role": "user", "content": user}
    ]


## Uncomment below code if you want to run on local device a model such as deepseek

In [None]:
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch

# # Optional cache to avoid reloading model every time
# _local_models = {}

# def run_local_deepseek(prompt: str) -> str:
#     model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

#     if "DeepSeek" not in _local_models:
#         tokenizer = AutoTokenizer.from_pretrained(model_id)
#         model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).eval()
#         device = "cuda" if torch.cuda.is_available() else "cpu"
#         model.to(device)
#         _local_models["DeepSeek"] = (tokenizer, model, device)

#     tokenizer, model, device = _local_models["DeepSeek"]

#     inputs = tokenizer(prompt, return_tensors="pt").to(device)
#     outputs = model.generate(**inputs, max_new_tokens=300)
#     return tokenizer.decode(outputs[0], skip_special_tokens=True)


# import re

# def stream_with_hf_api(model_name, messages):
#     """Generate a response using Hugging Face's API or local inference for DeepSeek"""
#     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAMES[model_name])
#     text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

#     if model_name == "DeepSeek":
#         # Run locally
#         output = run_local_deepseek(text)
#         clean_output = re.sub(r'```(?:python)?\n?|\n?```', '', output.strip())
#         yield clean_output
#     else:
#         # Use Hugging Face hosted model
#         client = InferenceClient(MODEL_ENDPOINTS[model_name], token=HF_TOKEN)
#         stream = client.text_generation(
#             text,
#             stream=True,
#             details=True,
#             max_new_tokens=3000,
#             temperature=0.7,
#             top_p=0.95
#         )
#         result = ""
#         for r in stream:
#             result += r.token.text
#             yield re.sub(r'```(?:python)?\n?|\n?```', '', result.strip())

# def comment_code(python, model):
#     try:
#         result = stream_with_hf_api(model, messages_for_comment(python, model))
#         for stream_so_far in result:
#             yield stream_so_far
#     except Exception as e:
#         yield f"Error generating comments: {str(e)}"


In [None]:
def stream_with_hf_api(model_name, messages):
    """Generate a streaming response using Hugging Face's API"""
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAMES[model_name])
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    client = InferenceClient(MODEL_ENDPOINTS[model_name], token=HF_TOKEN)
    stream = client.text_generation(
        text, 
        stream=True, 
        details=True, 
        max_new_tokens=3000,
        temperature=0.7,
        top_p=0.95
    )
    
    result = ""
    for r in stream:
        result += r.token.text
        return re.sub(r'```(?:python)?\n?|\n?```', '', result.strip())


In [None]:
def comment_code(python, model):
    """Generate documentation and comments for the provided Python code"""
    try:
        result = stream_with_hf_api(model, messages_for_comment(python, model))
        for stream_so_far in result:
            yield stream_so_far
    except Exception as e:
        yield f"Error generating comments: {str(e)}"


In [None]:
def get_unit_test(python, model):
    """Generate unit tests for the provided Python code"""
    try:
        result = stream_with_hf_api(model, messages_for_unit_test(python, model))
        for stream_so_far in result:
            yield stream_so_far
    except Exception as e:
        yield f"Error generating unit tests: {str(e)}"

In [None]:
sample_code = """
import time

def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result

start_time = time.time()
result = calculate(100_000_000, 4, 1) * 4
end_time = time.time()

print(f"Result: {result:.12f}")
print(f"Execution Time: {(end_time - start_time):.6f} seconds")
"""

In [None]:
css = """
.container {max-width: 1200px; margin: auto;}
.model-selection {background-color: #f3f4f6; padding: 10px; border-radius: 8px;}
.code-box {font-family: monospace; border: 1px solid #e5e7eb;}
.button-primary {background-color: #3b82f6 !important;}
.button-secondary {background-color: #10b981 !important;}
"""

# Create the Gradio interface
with gr.Blocks(css=css, title="Python Code Documentation and Testing Generator") as ui:
    gr.Markdown("# Python Code Documentation and Testing Generator", elem_classes=["container"])
    gr.Markdown("This tool generates comprehensive docstrings, comments, and unit tests for your Python code using Hugging Face models.")
    
    with gr.Row():
        with gr.Column():
            python_input = gr.Textbox(
                label="Python Code", 
                value=sample_code, 
                lines=15,
                elem_classes=["code-box"]
            )
        
        with gr.Column():
            result_output = gr.Textbox(
                label="Generated Output", 
                lines=15,
                elem_classes=["code-box"]
            )
    
    with gr.Row(elem_classes=["model-selection"]):
        model_dropdown = gr.Dropdown(
            choices=list(MODEL_NAMES.keys()),
            value="Mixtral",
            label="Select Model"
        )
    
    with gr.Row():
        comment_button = gr.Button("Generate Documentation", elem_classes=["button-primary"])
        unit_test_button = gr.Button("Generate Unit Tests", elem_classes=["button-secondary"])
    
    comment_button.click(
        comment_code, 
        inputs=[python_input, model_dropdown], 
        outputs=[result_output]
    )
    
    unit_test_button.click(
        get_unit_test, 
        inputs=[python_input, model_dropdown], 
        outputs=[result_output]
    )


In [None]:
if __name__ == "__main__":
    ui.launch(share=True)

* Running on local URL:  http://127.0.0.1:7862

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.
