In [7]:
import guidance
import os
from guidance import models, gen, any_char, any_char_but, regex, substring, substring_no_empty, with_temperature, system, user, assistant
from typing import Optional
import nbformat

In [4]:
mistral = models.LlamaCpp("/Users/nicholasking/code/models/mixtral-8x7b-instruct-v0.1.Q3_K_M.gguf", n_gpu_layers=-1, n_ctx=4096)

model = os.getenv("AZUREAI_CHAT_MODEL", "Please set the model")
azure_endpoint = os.getenv("AZUREAI_CHAT_ENDPOINT", "Please set the endpoint")
api_key=os.getenv("AZUREAI_CHAT_KEY", "Please set API key")

gpt4 = models.AzureOpenAI(
    model=model,
    azure_endpoint=azure_endpoint,
    api_key=api_key
)

In [None]:
import os
import re
import guidance

def extract_text_from_ipynb(notebook_file):
    nb = nbformat.read(notebook_file, as_version=4)
    extracted_text = ""
    for cell in nb['cells']:
        if cell['cell_type'] == 'code':
            extracted_text += "```python\n" + cell['source'] + "\n```\n\n"
        elif cell['cell_type'] == 'markdown':
            extracted_text += cell['source'] + "\n\n"
    return extracted_text

def walk_and_match_files(start_path, include_file_regex=None, exclude_file_regex=None):
    """Walk through directories starting from start_path and collect files that match include_file_regex and don't match exclude_file_regex."""
    matched_files = []
    for root, _, files in os.walk(start_path):
        for file_name in files:
            if include_file_regex is None or re.match(include_file_regex, file_name):
                if exclude_file_regex is None or not re.match(exclude_file_regex, file_name):
                    matched_files.append(os.path.join(root, file_name))
    return matched_files

def read_files(file_paths):
    """Read the contents of the matched files."""
    file_contents = {}
    for file_path in file_paths:
        with open(file_path, 'r', encoding='utf-8') as f:
            if file_path.endswith('.ipynb'):
                file_contents[file_path] = extract_text_from_ipynb(file_path)
            else:
                file_contents[file_path] = f.read()
    return file_contents

def format_for_analysis(file_contents):
    """Format the contents for model prompting."""
    formatted_string = ""
    for file_path, content in file_contents.items():
        formatted_string += f"## File: {file_path}\n```{file_path.split('.')[-1]}\n{content}\n```\n\n"
    return formatted_string

# Orchestrator
def build_code_prompt(repo_paths, include_file_regex=None, exclude_file_regex=None):
    """Orchestrate the analysis of a repository."""
    all_file_paths = []
    for start_path in repo_paths:
        all_file_paths.extend(walk_and_match_files(start_path, include_file_regex, exclude_file_regex))
    
    all_file_contents = read_files(all_file_paths)
    formatted_code = format_for_analysis(all_file_contents)
    prompt = f"""# Code Analysis
Please analyze the code provided below.

{formatted_code}"""
    return prompt

@guidance
def analyze_code(lm, code_prompt: str, system_prompt: Optional[str] = None, user_message: Optional[str] = None, **kwargs):
    kwargs.setdefault('temperature', 0.8)
    kwargs.setdefault('max_tokens', 1000)
    if isinstance(lm, models.Chat):
        if system_prompt is None:
            system_prompt = "Provide insights into code quality, potential issues, and suggestions for improvement. Answer any questions the user has."
        with system():
            lm += system_prompt
        with user():
            lm += code_prompt
            if user_message is not None:
                lm += f"\n# User Message\n{user_message}"
        with assistant():
            lm += gen(**kwargs)
    else:
        lm += code_prompt
        if system_prompt is not None:
            lm += f"\n# Instructions\n{system_prompt}"
        if user_message is not None:
            lm += f"\n# User Message\n{user_message}"
        # Set default temperature and max_tokens in kwargs
        lm += gen(**kwargs)
    return lm

# Example usage
repo_paths = ['/path/to/repo1', '/path/to/repo2']
file_regex = r'\.(py)$'