This script is designed to read a GitHub project and compile all its code files into a single text file. This consolidated file is intended for use with large language models (LLMs) to facilitate reading and understanding the entire project.

In [1]:
import os
import subprocess
import fnmatch

def clone_github_repo(github_url, local_dir):
    if os.path.exists(local_dir):
        print(f"Directory {local_dir} already exists.")
        return
    subprocess.run(["git", "clone", github_url, local_dir], check=True)

def gather_code_files(project_path, output_file):
    code_file_extensions = ['*.py', '*.cpp', '*.c', '*.h', '*.java', '*.js', '*.md', '*.ipynb']
    code_files = []
    
    for root, _, files in os.walk(project_path):
        for file in files:
            if any(fnmatch.fnmatch(file, ext) for ext in code_file_extensions):
                file_path = os.path.join(root, file)
                code_files.append(file_path)

    with open(output_file, 'w', encoding='utf-8') as f:
        for file_path in code_files:
            f.write(f"File: {file_path}\n")
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as code_file:
                f.write(code_file.read())
            f.write("\n\n")

    with open(output_file, 'w', encoding='utf-8') as f:
        for file_path in code_files:
            f.write(f"File: {file_path}\n")
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as code_file:
                f.write(code_file.read())
            f.write("\n\n")




In [3]:
def main():
    choice = '2'

    if choice == '1':
        github_url = "https://github.com/Silicon-Life/Nova.git"
        local_dir = "temp_repo"
        clone_github_repo(github_url, local_dir)
        project_path = local_dir
    elif choice == '2':
        project_path = r'C:\Users\ps\Documents\Academic\Papers\Agents\Nova'
    else:
        print("Invalid choice")
        return

    output_file = "project_code.txt"
    gather_code_files(project_path, output_file)
    print(f"Code files and their contents have been written to {output_file}")


main()

Code files and their contents have been written to project_code.txt


In [3]:
def main():
    choice = '1'

    if choice == '1':
        github_url = "https://github.com/microsoft/autogen.git"
        local_dir = "autogen"
        clone_github_repo(github_url, local_dir)
        project_path = local_dir
    elif choice == '2':
        project_path = r'C:\Users\ps\Documents\Academic\Papers\Agents\Nova'
    else:
        print("Invalid choice")
        return

    output_file = "autogen.txt"
    gather_code_files(project_path, output_file)
    print(f"Code files and their contents have been written to {output_file}")


main()

Code files and their contents have been written to autogen.txt


In [4]:
def main():
    choice = '1'

    if choice == '1':
        github_url = "https://github.com/geekan/MetaGPT.git"
        local_dir = "MetaGPT"
        clone_github_repo(github_url, local_dir)
        project_path = local_dir
    elif choice == '2':
        project_path = r'C:\Users\ps\Documents\Academic\Papers\Agents\Nova'
    else:
        print("Invalid choice")
        return

    output_file = "MetaGPT.txt"
    gather_code_files(project_path, output_file)
    print(f"Code files and their contents have been written to {output_file}")


main()

Code files and their contents have been written to MetaGPT.txt


In [5]:
def main():
    choice = '1'

    if choice == '1':
        github_url = "https://github.com/camel-ai/camel.git"
        local_dir = "camel"
        clone_github_repo(github_url, local_dir)
        project_path = local_dir
    elif choice == '2':
        project_path = r'C:\Users\ps\Documents\Academic\Papers\Agents\Nova'
    else:
        print("Invalid choice")
        return

    output_file = "camel.txt"
    gather_code_files(project_path, output_file)
    print(f"Code files and their contents have been written to {output_file}")


main()

Code files and their contents have been written to camel.txt
