In [36]:
# install the latest version of the openai library
!pip install openai -q --upgrade

# imports
from openai import OpenAI
import json
import time
import requests


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [37]:
# Load the OpenAI key from a file
with open('../secrets/openAI_key', 'r') as file:
    openAI_key = file.read().replace('\n', '')

# load the github token
with open('../secrets/github_token', 'r') as file:
    github_token = file.read().replace('\n', '')

In [38]:
# initalize the client
client = OpenAI(
    api_key = openAI_key
)

In [39]:
"""
ASK THE CODE API functions
"""

def get_repo_structure(url, branch=None, relativePaths=None):
    # URL of Askthecode API endpoint
    get_repo_structure_url = "https://gabriel.askthecode.ai/api/repository/structure"

    # parameters: url(required), branch(optional), relativePaths(optional)
    params = {
        'url': url,
        'branch': branch,
        'relativePaths': relativePaths
    }

    # headers
    headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {github_token}",  # Ensure your token is correctly referenced
    "Content-Type": "application/json"
    }

    # Make the post request
    response = requests.post(get_repo_structure_url, json=params, headers=headers)

    # check if the response is successful
    if response.status_code == 200:
        # Parsing the response JSON
        structure_response = response.json()
        # Return the response data instead of printing
        return structure_response
    else:
        # Return a formatted error message
        return f"Failed to get the repository structure: {response.status_code}, Reason: {response.reason}"


# get repo content
def get_repo_content(url, filePaths, branch=None, relativePath=None):
    # URL of Askthecode API endpoint
    get_repo_content_url = "https://gabriel.askthecode.ai/api/repository/content"

    # parameters: url(required), filePaths(required), branch(optional), relativePath(optional)
    params = {
        'url': url,
        'filePaths': filePaths,
        'branch': branch,
        'relativePath': relativePath
    }

    # headers
    headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {github_token}", 
    "Content-Type": "application/json"
    }

    # Make the post request
    response = requests.post(get_repo_content_url, json=params, headers=headers)

    # check if the response is successful
    if response.status_code == 200:
        # Parsing the response JSON
        content_response = response.json()
        # Return the response data instead of printing
        return content_response
    
    else:
       return f"Failed to get the repository content: {response.status_code}"

# get repo branches
def get_repo_branches(url):
    # URL of Askthecode API endpoint
    get_repo_branches_url = "https://gabriel.askthecode.ai/api/repository/branch/list"

    # parameters: url(required)
    params = {
        'url': url
    }

   # headers
    headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {github_token}", 
    "Content-Type": "application/json"
    }

    # Make the post request
    response = requests.post(get_repo_branches_url, json=params, headers=headers)

    # check if the response is successful
    if response.status_code == 200:
        # Parsing the response JSON
        branches_response = response.json()
        # Return the response data instead of printing
        return branches_response
    else:
        return f"Failed to get the repository branches: {response.status_code}"
    
# Get commit history
def get_commit_history(url, branch=None, filePath=None):
    # URL of Askthecode API endpoint
    get_commit_history_url = "https://gabriel.askthecode.ai/api/repository/commit/history"

    # parameters: url(required), branch(optional), filePath(optional)
    params = {
        'url': url,
        'branch': branch,
        'filePath': filePath
    }

    # headers
    headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {github_token}", 
    "Content-Type": "application/json"
    }

    # Make the post request
    response = requests.post(get_commit_history_url, json=params, headers=headers)

    # check if the response is successful
    if response.status_code == 200:
        # Parsing the response JSON
        commit_history_response = response.json()
        # Return the response data instead of printing
        return commit_history_response
    else:
        return f"Failed to get the commit history: {response.status_code}"
    
# search repo code
def search_repo_code(url, searchKeywords, branch=None, relativePath=None, searchHitLinesCount=None, skipMatchesCount=None):
    # URL of Askthecode API endpoint
    search_repo_code_url = "https://gabriel.askthecode.ai/api/search/repository/code"

    # parameters: url(required), searchKeywords(required), branch(optional), relativePath(optional), searchHitLinesCount(optional), skipMatchesCount(optional)
    params = {
        'url': url,
        'searchKeywords': searchKeywords,
        'branch': branch,
        'relativePath': relativePath,
        'searchHitLinesCount': searchHitLinesCount,
        'skipMatchesCount': skipMatchesCount
    }

    # headers
    headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {github_token}", 
    "Content-Type": "application/json"
    }

    # Make the post request
    response = requests.post(search_repo_code_url, json=params, headers=headers)

    # check if the response is successful
    if response.status_code == 200:
        # Parsing the response JSON
        search_code_response = response.json()
        # Return the response data instead of printing
        return search_code_response
    else:
        return f"Failed to search the repository code: {response.status_code}"


In [40]:
""""
Test the functions
"""
# Structure retrieval
test_repo_structure_url = get_repo_structure(url="https://github.com/recommenders-team/recommenders")
print("Here is the structure of the repository:")
print(test_repo_structure_url)

# Content retrieval
test_repo_content_url = get_repo_content(url="https://github.com/recommenders-team/recommenders", filePaths=[".github/.codecov.yml"])
print("Here is the content of the repository:")
print(test_repo_content_url)

# Branches retrieval
test_repo_branches_url = get_repo_branches(url="https://github.com/RecandChat/CodeCompass")
print("Here are the branches of the repository:")
print(test_repo_branches_url)

# Commit history retrieval
test_commit_history_url = get_commit_history(url="https://github.com/recommenders-team/recommenders", filePath=".devcontainer/devcontainer.json")
print("Here is the commit history of the repository:")
print(test_commit_history_url)

# Code search
search_keywords = ["Neural News Recommendation"]
test_search_repo_code_url = search_repo_code(url="https://github.com/recommenders-team/recommenders", searchKeywords=search_keywords)
print("Here is the search result:")
print(test_search_repo_code_url)






Here is the structure of the repository:
{'branchName': 'main', 'files': ['.devcontainer/Dockerfile', '.devcontainer/devcontainer.json', '.github/.codecov.yml', '.github/CODEOWNERS', '.github/ISSUE_TEMPLATE.md', '.github/ISSUE_TEMPLATE/bug_report.md', '.github/ISSUE_TEMPLATE/feature_request.md', '.github/ISSUE_TEMPLATE/general-ask.md', '.github/PULL_REQUEST_TEMPLATE.md', '.github/actions/azureml-test/action.yml', '.github/actions/get-test-groups/action.yml', '.github/workflows/azureml-cpu-nightly.yml', '.github/workflows/azureml-gpu-nightly.yml', '.github/workflows/azureml-release-pipeline.yml', '.github/workflows/azureml-spark-nightly.yml', '.github/workflows/azureml-unit-tests.yml', '.github/workflows/sarplus.yml', '.github/workflows/update_documentation.yml', 'AUTHORS.md', 'CODE_OF_CONDUCT.md', 'CONTRIBUTING.md', 'GLOSSARY.md', 'LICENSE', 'MANIFEST.in', 'NEWS.md', 'README.md', 'SECURITY.md', 'SETUP.md', 'contrib/README.md', 'contrib/azureml_designer_modules/README.md', 'contrib/azur

### Assistant API

In [41]:
tools = [{
    "type": "function",
    "function": {
        "name": "get_repo_structure",
            "description": "Retrieves the Github repository file structure to analyze it and be able to query only relevant files. If the provided URL contains specific branch and directory information, prioritize using that over querying the entire repository structure.",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {
                        "minLength": 1,
                        "type": "string",
                        "description": "Full Github repository URL provided by the user. For example: https://github.com/[owner]/[repo]/blob/[branch]/[file-path]#[additional-parameters]. The URL MUST be identical to the one, that was provided by the user, you MUST NEVER alter or truncate it. This is crucial for valid responses. You should NEVER truncate additional-parameters.",
                    },
                    "branch": {
                        "type": "string",
                        "description": "Repository branch. Provide only if user has explicitly specified it or the previous plugin response contains it.",
                        "nullable": True
                    },
                    "relativePaths": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "Relative paths to retrieve. USE only paths you are certain that exist, NEVER invent them. If the provided URL contains a specific directory path, extract and use it. Otherwise, this should be a directory path or pattern only. Patterns accept * symbol as 'any substring'",
                        "nullable": True
                    }
            },
            "required": ["url"],
            "additionalProperties": False}
        }
    },
    {
    "type": "function",
    "function": {
        "name": "get_repo_content",
        "description": "Retrieves github repository file contents, possibly filtered by file names. Line numbers can be specified in URL as well. NEVER query this endpoint without previously querying get_repo_structure endpoint and when the next step is set to get_repo_structure.",
        "parameters": {
            "type": "object",
            "properties": {
                "url": {
                    "minLength": 1,
                    "type": "string",
                    "description": "Full Github repository URL provided by the user. For example: https://github.com/[owner]/[repo]/blob/[branch]/[file-path]#[additional-parameters]. The URL MUST be identical to the one, that was provided by the user, you MUST NEVER alter or truncate it. This is crucial for valid responses. You should NEVER truncate additional-parameters."
                },
                "branch": {
                    "type": "string",
                    "description": "Repository branch. Provide only if user has explicitly specified it or the previous assistant response contains it. When requesting file from commit, use commit SHA.",
                    "nullable": True
                },
                "relativePath": {
                    "type": "string",
                    "description": "Relative paths to the directory. Provide only if user has explicitly specified it or the previous plugin response contains it.",
                    "nullable": True
                },
                "filePaths": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    },
                    "description": "Files to query the content of. Order them by relevance descendant. This should NEVER contain the repository branch. First determine the branch if possible, and only then the file paths. Pass only if you are sure about the file path, call get_repo_structure otherwise"
                }
            },
            "required": ["url", "filePaths"],
            "additionalProperties": False
        }
    }
},
{
    "type": "function",
    "function": {
        "name": "get_repo_branches",
        "description": "Retrieves a list of branches from a Github repository given its URL.",
        "parameters": {
            "type": "object",
            "properties": {
                "url": {
                    "minLength": 1,
                    "type": "string",
                    "description": "Full Github repository URL provided by the user. For example: https://github.com/[owner]/[repo]/blob/[branch]/[file-path]#[additional-parameters]. The URL MUST be identical to the one, that was provided by the user, you MUST NEVER alter or truncate it. This is crucial for valid responses. You should NEVER truncate additional-parameters."
                }
            },
            "required": ["url"],
            "additionalProperties": False
        }
    }
},
{
    "type": "function",
    "function": {
        "name": "get_commit_history",
        "description": "Returns the commits history for the specific file in the repository. If the file path is not provided, the history of the entire repository will be returned. If the branch is not provided, the default branch will be used.",
        "parameters": {
            "type": "object",
            "properties": {
                "url": {
                    "minLength": 1,
                    "type": "string",
                    "description": "Full Github repository URL provided by the user. For example: https://github.com/[owner]/[repo]/blob/[branch]/[file-path]#[additional-parameters]. The URL MUST be identical to the one, that was provided by the user, you MUST NEVER alter or truncate it. This is crucial for valid responses. You should NEVER truncate additional-parameters."
                },
                "branch": {
                    "type": "string",
                    "description": "Repository branch. Provide only if user has explicitly specified it or the previous assistant response contains it.",
                    "nullable": True
                },
                "filePath": {
                    "type": "string",
                    "description": "Path to the file to request the commit history for. Use path relative to the root directory of the repository.",
                    "nullable": True
                }
            },
            "required": ["url"],
            "additionalProperties": False
        }
    }
},
{
        "type": "function",
        "function": {
            "name": "search_repo_code",
            "description": "Search code by user specified keywords. Use when user explicitly asked to search for something. Otherwise prefer to fetch the repository structure. Invoke only with user-specified, specific keywords (e.g., file, class, method names). Avoid generic terms.",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {
                        "minLength": 1,
                        "type": "string",
                        "description": "Full Github repository URL provided by the user. For example: https://github.com/[owner]/[repo]/blob/[branch]/[file-path]#[additional-parameters]. The URL MUST be identical to the one, that was provided by the user, you MUST NEVER alter or truncate it. This is crucial for valid responses. You should NEVER truncate additional-parameters."
                    },
                    "searchKeywords": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "Search keywords. Invoke only with user-specified keywords. Never use keywords that are not part of the user prompt. When user asks to search for function definitions in a specific file (not directory) and you cannot parse them from file content, pass function keyword relevant for the file language.."
                    },
                    "branch": {
                        "type": "string",
                        "description": "Repository branch. Provide only if user has explicitly specified it or the previous plugin response contains it. When requesting file from commit, use commit SHA.",
                        "nullable": True
                    },
                    "relativePath": {
                        "type": "string",
                        "description": "Relative path to the file or directory to search in. Provide only if user has explicitly specified it or the previous plugin response contains it.",
                        "nullable": True
                    },
                    "searchHitLinesCount": {
                        "type": "integer",
                        "description": "Number of lines to retrieve. Set only when explicitly asked to retrieve the specified amount of lines by the user.",
                        "format": "int32",
                        "nullable": True
                    },
                    "skipMatchesCount": {
                        "type": "integer",
                        "description": "Number of matches to skip in the file. use only when user is searching over file and you need to search for matches that were omitted from the previous search request",
                        "format": "int32",
                        "nullable": True
                    }
                },
                "required": ["url", "searchKeywords"],
                "additionalProperties": False
            }
        }
    }
# Add more tools here
]

In [42]:
# Create Assistant
assistant = client.beta.assistants.create(
    name = "codecompass",
    instructions = "You are a helpful assistant that analyzes code from github repositories and files when given a github url. You will answer questions about the structure of a repository, the content of a files, or any other code-related queries.",
    model = "gpt-3.5-turbo-0125",
    tools = tools
)

In [43]:
# Utility function to create a message and run

def create_message_and_run(assistant,query,thread=None):
  if not thread:
    thread = client.beta.threads.create()

  message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=query
  )
  run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
  )
  return run,thread

# Utility function to get details of function to be called

def get_function_details(run):

  print("\nrun.required_action\n",run.required_action)

  function_name = run.required_action.submit_tool_outputs.tool_calls[0].function.name
  arguments = run.required_action.submit_tool_outputs.tool_calls[0].function.arguments
  function_id = run.required_action.submit_tool_outputs.tool_calls[0].id

  print(f"function_name: {function_name} and arguments: {arguments}")

  return function_name, arguments, function_id

# Utility function to submit the function response

def submit_tool_outputs(run,thread,function_id,function_response):
    run = client.beta.threads.runs.submit_tool_outputs(
    thread_id=thread.id,
    run_id=run.id,
    tool_outputs=[
      {
        "tool_call_id": function_id,
        "output": str(function_response),
      }
    ]
    ) 
    return run

available_functions = {
    "get_repo_structure": get_repo_structure,
    "get_repo_content": get_repo_content,
    "get_repo_branches": get_repo_branches,
    "get_commit_history": get_commit_history
}

'''''
def get_gpt_response(messages):
    chat_completion = client.chat.completions.create(
        model = "gpt-3.5-turbo-0125",
        messages = messages,
        functions = tools,
        function_call = "auto"
    )
    return chat_completion
'''''

# execute the function

def execute_function_call(function_name,arguments):
    function = available_functions.get(function_name,None)
    if function:
        arguments = json.loads(arguments)
        results = function(**arguments)
    else:
        results = f"Error: function {function_name} does not exist"
    return results

In [44]:
# test
query = "I want to know about my repository"
run,thread = create_message_and_run(assistant = assistant ,query = query)

In [45]:
run

Run(id='run_csiOafvzGiuJcmeVaUkfTwG5', assistant_id='asst_Xwv6OQ2NEREDCHeRuxgyAeT5', cancelled_at=None, completed_at=None, created_at=1711483171, expires_at=1711483771, failed_at=None, file_ids=[], instructions='You are a helpful assistant that analyzes code from github repositories and files when given a github url. You will answer questions about the structure of a repository, the content of a files, or any other code-related queries.', last_error=None, metadata={}, model='gpt-3.5-turbo-0125', object='thread.run', required_action=None, started_at=None, status='queued', thread_id='thread_fXbGGI9cj8fLgObFxBdzzU01', tools=[FunctionTool(function=FunctionDefinition(name='get_repo_structure', description='Retrieves the Github repository file structure to analyze it and be able to query only relevant files. If the provided URL contains specific branch and directory information, prioritize using that over querying the entire repository structure.', parameters={'type': 'object', 'properties':

In [46]:
while True:
    run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id) # retrieve the run status from the thread we created in the previous cell
    print("run status", run.status)

    if run.status=="requires_action":

        function_name, arguments, function_id  = get_function_details(run)

        function_response = execute_function_call(function_name,arguments)

        run = submit_tool_outputs(run,thread,function_id,function_response)

        continue
    if run.status=="completed": # means gpt has an output

        messages = client.beta.threads.messages.list(thread_id=thread.id)
        latest_message = messages.data[0]
        text = latest_message.content[0].text.value
        print(text)

        user_input = input()
        if user_input == "STOP":
          break

        run,thread = create_message_and_run(assistant=assistant,query=user_input,thread=thread)

        continue;
    time.sleep(1)

run status in_progress
run status in_progress
run status requires_action

run.required_action
 RequiredAction(submit_tool_outputs=RequiredActionSubmitToolOutputs(tool_calls=[RequiredActionFunctionToolCall(id='call_f94dJjcJFjWD5BVK9lDna791', function=Function(arguments='{"url":"https://github.com/eniallator/call-me-dumb"}', name='get_repo_structure'), type='function')]), type='submit_tool_outputs')
function_name: get_repo_structure and arguments: {"url":"https://github.com/eniallator/call-me-dumb"}
run status in_progress
run status in_progress
run status in_progress
run status in_progress
run status completed
I encountered an error while trying to retrieve the requested repository or branch. Please verify that the provided URL is valid. If the repository is private, make sure to use GitHub authentication when using the AskTheCode plugin, and ensure that the account being used has access to the requested repository. You can find more information on our [website](https://askthecode.ai) an