In [24]:
import openai
from dotenv import load_dotenv
from github import Github, GithubException
import re

In [2]:
def parse_python_code(input_str):
    pattern = r'```python\n(.*?)\n```'
    result = re.search(pattern, input_str, re.DOTALL)
    if result:
        return result.group(1)
    else:
        return "No match found"

In [3]:
load_dotenv()

True

In [4]:
# Login and get repo
repo_path = "Scott-Huston/gpt_dev"

g = Github(os.getenv("GH_TOKEN"))
repo = g.get_repo(repo_path)

In [5]:
# Get file paths for all files in repo
# get_contents only gets root level at first

filepaths = []
contents = repo.get_contents("")
while contents:
    file_content = contents.pop(0)
    if file_content.type == "dir":
        # Add contents of subfolder
        contents.extend(repo.get_contents(file_content.path))
    else:
        filepaths.append(file_content.path)

filepaths

['.gitignore', 'README.md']

In [6]:
# Represent contents of repo in a long string
repo_content_string = ""

for filepath in filepaths:
    repo_content_string += f"FILE PATH:\n{filepath}\nFILE CONTENT:\n"

    file_content = repo.get_contents(filepath).decoded_content
    repo_content_string += f"{file_content}\n\n"

repo_content_string

'FILE PATH:\n.gitignore\nFILE CONTENT:\nb"# Byte-compiled / optimized / DLL files\\n__pycache__/\\n*.py[cod]\\n*$py.class\\n\\n# C extensions\\n*.so\\n\\n# Distribution / packaging\\n.Python\\nbuild/\\ndevelop-eggs/\\ndist/\\ndownloads/\\neggs/\\n.eggs/\\nlib/\\nlib64/\\nparts/\\nsdist/\\nvar/\\nwheels/\\nshare/python-wheels/\\n*.egg-info/\\n.installed.cfg\\n*.egg\\nMANIFEST\\n\\n# PyInstaller\\n#  Usually these files are written by a python script from a template\\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\\n*.manifest\\n*.spec\\n\\n# Installer logs\\npip-log.txt\\npip-delete-this-directory.txt\\n\\n# Unit test / coverage reports\\nhtmlcov/\\n.tox/\\n.nox/\\n.coverage\\n.coverage.*\\n.cache\\nnosetests.xml\\ncoverage.xml\\n*.cover\\n*.py,cover\\n.hypothesis/\\n.pytest_cache/\\ncover/\\n\\n# Translations\\n*.mo\\n*.pot\\n\\n# Django stuff:\\n*.log\\nlocal_settings.py\\ndb.sqlite3\\ndb.sqlite3-journal\\n\\n# Flask stuff:\\ninstance/\\n.webassets-cac

In [7]:
# Authenticate
openai.api_key = os.getenv("OPENAI_API_KEY")

In [8]:
system_message = "You are a world-class 10X programmer." 

# You will be given the contents \
#     of a repository and a description of a task. You will first ask any clarifying questions \
#     that could be useful to resolve any ambiguity. After that, you will submit a pull request \
#     to complete the task that was described by outputting code "

task_description = "Please add a markdown file to the repo with an overview of how a human-level large language model \
could be used to create pull requests with high quality code. Feel free to add any additional ideas for how the model \
could be useful for code development generally."

clarification_prompt = "Do not attempt to complete the task yet. Are there any questions you have about the task before you begin?"

first_message = f"""Here are the contents of a code repository on Github:
{repo_content_string}
Task Description:
{task_description}
{clarification_prompt}
"""

response = openai.ChatCompletion.create(
  model="gpt-4",
  messages=[
        {"role": "system", "content": system_message},
        {"role": "user", "content": first_message}
        # {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
        # {"role": "user", "content": "Where was it played?"}
    ]
)



In [9]:
print(response)

{
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "No questions. I will create a markdown file with the requested content. Here it is:\n\nFILE PATH:\nlanguage_model_code_dev.md\n\nFILE CONTENT:\n```\n# Human-Level Language Model for Code Development\n\nHuman-level large language models, such as GPT, can revolutionize the way we develop code by automating several aspects of the coding process. In this document, we explore the potential usage of such models for creating high-quality pull requests and code development in general.\n\n## Creating High-Quality Pull Requests\n\n1. **Code Review**: A human-level language model can analyze the code changes and provide suggestions to improve code quality or catch potential issues before they become a problem. This can also help in maintaining a consistent code style throughout the project.\n\n2. **Commit Messages**: The language model can summarize the changes in a commit and automatica

In [10]:
first_response_content = response["choices"][0]["message"]["content"]
first_response_content

"No questions. I will create a markdown file with the requested content. Here it is:\n\nFILE PATH:\nlanguage_model_code_dev.md\n\nFILE CONTENT:\n```\n# Human-Level Language Model for Code Development\n\nHuman-level large language models, such as GPT, can revolutionize the way we develop code by automating several aspects of the coding process. In this document, we explore the potential usage of such models for creating high-quality pull requests and code development in general.\n\n## Creating High-Quality Pull Requests\n\n1. **Code Review**: A human-level language model can analyze the code changes and provide suggestions to improve code quality or catch potential issues before they become a problem. This can also help in maintaining a consistent code style throughout the project.\n\n2. **Commit Messages**: The language model can summarize the changes in a commit and automatically generate a concise and informative commit message.\n\n3. **Automated Testing**: Based on the code changes,

In [25]:
source_branch = '{repo.default_branch}'
target_branch = 'test'

try:
    # Test to see if branch already exists
    repo.get_branch("test")
except GithubException as e:
    if e.status == 404:
        # If branch does not exist, create it
        sb = repo.get_branch(source_branch)
        repo.create_git_ref(ref='refs/heads/' + target_branch, sha=sb.commit.sha)

repo.get_branch("test")

Branch(name="test")

In [27]:
create_pr_message = f"""Ok, now please create a pull request to complete the task using the PyGithub Python library. You should create a new branch, create/modify/delete files as necessary, and then create a pull request.

# Example of creating a branch

source_branch = '{repo.default_branch}'
target_branch = 'test'

try:
    # Test to see if branch already exists
    repo.get_branch("test")
except GithubException as e:
    if e.status == 404:
        # If branch does not exist, create it
        sb = repo.get_branch(source_branch)
        repo.create_git_ref(ref='refs/heads/' + target_branch, sha=sb.commit.sha)

# Example of creating a new file
file_content = "This is the file content"
commit_message = "create test file"
repo.create_file(path="test.txt", message=commit_message, content=file_content, branch="test")

# Example of updating a file
contents = repo.get_contents("test.txt", ref="test")
repo.update_file(contents.path, "more tests", "more tests", contents.sha, branch="test")

# Example of deleting a file
contents = repo.get_contents("test.txt", ref="test")
repo.delete_file(contents.path, message="remove test", sha=contents.sha, branch="test")

# Example of creating the PR
body = '''
This PR created, updated and deleted a test file
'''
repo.create_pull(title="Test file", body=body, head="test", base=repo.default_branch)


YOUR ENTIRE RESPONSE SHOULD BE EXCLUSIVELY PYTHON CODE. YOUR RESPONSE MUST RUN IN PYTHON AS-IS TO CREATE THE PULL REQUEST.

Your response will be run in a python script after the following code has already been run.

from github import Github, GithubException

# Login and get repo
g = Github(os.getenv("GH_TOKEN"))
repo = g.get_repo({repo_path})

"""

response = openai.ChatCompletion.create(
  model="gpt-4",
  messages=[
        {"role": "system", "content": system_message},
        {"role": "user", "content": first_message},
        {"role": "assistant", "content": first_response_content},
        {"role": "user", "content": create_pr_message}
    ]
)

In [28]:
create_pr_response = response["choices"][0]["message"]["content"]
create_pr_response

'```python\nfrom github import Github, GithubException\nimport os\n\n# Login and get repo\ng = Github(os.getenv("GH_TOKEN"))\nrepo = g.get_repo("Scott-Huston/gpt_dev")\n\n# Create a new branch\nsource_branch = \'main\'\ntarget_branch = \'language_model_code_dev\'\n\ntry:\n    # Test to see if branch already exists\n    repo.get_branch(target_branch)\nexcept GithubException as e:\n    if e.status == 404:\n        # If branch does not exist, create it\n        sb = repo.get_branch(source_branch)\n        repo.create_git_ref(ref=\'refs/heads/\' + target_branch, sha=sb.commit.sha)\n\n# Create a new file\nfile_content = \'\'\'\n# Human-Level Language Model for Code Development\n\nHuman-level large language models, such as GPT, can revolutionize the way we develop code by automating several aspects of the coding process. In this document, we explore the potential usage of such models for creating high-quality pull requests and code development in general.\n\n## Creating High-Quality Pull Req

In [29]:
exec(parse_python_code(create_pr_response))

In [19]:
locs = parse_python_code(create_pr_response).split("\n")
locs

for loc in locs:
    print(loc)
    exec(loc)

from github import InputGitTreeElement

# Create a new branch
source_branch = 'main'
target_branch = 'language_model_code_dev'

sb = repo.get_branch(source_branch)
repo.create_git_ref(ref='refs/heads/' + target_branch, sha=sb.commit.sha)

# Create the new markdown file
file_path = "language_model_code_dev.md"
file_content = '''


SyntaxError: unterminated triple-quoted string literal (detected at line 1) (<string>, line 1)

In [None]:
## Need to separate out the middle of ```python\n XXXX \n```
## Probably need to specify to not import any non-standard packages
## Specifying that the response needs to be in python might be confusing if the repo is in another language
## Wrap calls to gpt-4 in try/except to handle connection issues

In [32]:
exec("from github import Github\nimport os\nprint('worked')")

worked


In [36]:
exec(parse_python_code(create_pr_response))

UnknownObjectException: 404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest"}

In [26]:
create_pr_response = response["choices"][0]["message"]["content"]
create_pr_response

'```python\nfrom base64 import b64encode\n\n# Create a new branch\nsource_branch = repo.default_branch\ntarget_branch = "feature/human-level-lm"\n\nsb = repo.get_branch(source_branch)\nrepo.create_git_ref(ref="refs/heads/" + target_branch, sha=sb.commit.sha)\n\n# Create a markdown file with the requested content\nfile_content = b64encode(b\'\'\'# Human-level Large Language Model for Pull Requests\n\nA human-level large language model can significantly improve the process of code development, making it more efficient and reliable. This document provides an overview of how such a model can be employed to create pull requests with high-quality code.\n\n## Creating Pull Requests\n\nA large language model can be used to:\n\n1. **Analyze the codebase**: The model can be designed to understand the repository\'s code structure, modules, functions, and logic.\n2. **Identify modifications**: The model should be capable of detecting necessary changes, such as bug fixes, new feature additions, or 

In [24]:
create_pr_response = response["choices"][0]["message"]["content"]
create_pr_response


'```python\nfrom github import Github\nimport os\n\n# Replace with your own Github access token\naccess_token = os.environ.get("GITHUB_ACCESS_TOKEN")\ng = Github(access_token)\n\n# Get the repo object\nrepo_owner = "your_username"\nrepo_name = "your_repo_name"\nrepo = g.get_user(repo_owner).get_repo(repo_name)\n\n# Create a new branch\nsource_branch = repo.default_branch\ntarget_branch = \'new_branch_name\'\n\nsb = repo.get_branch(source_branch)\nrepo.create_git_ref(ref=\'refs/heads/\' + target_branch, sha=sb.commit.sha)\n\n# Create a new markdown file\nfile_content = b"""\n# Human-Level Large Language Model for Code Development\n\nA human-level large language model such as OpenAI GPT-3 can bring significant improvements to the process of code development. We\'ll focus on some key benefits and ideas:\n\n## Creating pull requests with high-quality code\n\nA large language model can leverage pretrained knowledge and in-built code understanding capabilities to generate high-quality code s

In [None]:
exec()