In [1]:
import os

# Define the repository URL
repo_url = "https://github.com/CellMigrationLab/CellTracksColab"  # Replace with your repository URL
repo_name = "CellTracksColab"  # Name of the repository folder after cloning
output_file_path = "/content/CellTracksColab.txt"  # Replace with your desired output file path on Google Drive
preamble_file = None  # Replace with the path to your preamble file if any, or set to None



In [2]:
import os
import fnmatch
from google.colab import drive


# Function to get the ignore list from a .gptignore file
def get_ignore_list(ignore_file_path):
    ignore_list = []
    if os.path.exists(ignore_file_path):
        with open(ignore_file_path, 'r') as ignore_file:
            for line in ignore_file:
                ignore_list.append(line.strip())
    return ignore_list

# Function to check if a file should be ignored based on the ignore list and .git exclusion
def should_ignore(file_path, ignore_list):
    # Exclude all files in the .git directory
    if '.git' in file_path.split(os.sep):
        return True
    return any(fnmatch.fnmatch(file_path, pattern) for pattern in ignore_list)

# Function to check if a file is an image based on its extension
def is_image_file(file_name):
    image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.tif', '.svg', '.webp', '.pdf'}
    _, ext = os.path.splitext(file_name.lower())
    return ext in image_extensions

# Function to write a preamble to the output file
def write_preamble(output_file, preamble_file=None):
    if preamble_file and os.path.exists(preamble_file):
        with open(preamble_file, 'r') as pf:
            preamble_text = pf.read()
            output_file.write(f"{preamble_text}\n")
    else:
        output_file.write("The following text is a Git repository with code. The structure of the text begins with ----, followed by the file path and file name, then the file contents. The text ends with --END--.\n")

# Function to write the README file content at the top of the output file
def write_readme_at_top(repo_path, output_file):
    for root, _, files in os.walk(repo_path):
        for file in files:
            if "readme" in file.lower():  # Check if the file is a README file
                file_path = os.path.join(root, file)
                with open(file_path, 'r', errors='ignore') as f:
                    contents = f.read()
                relative_file_path = os.path.relpath(file_path, repo_path)
                output_file.write("----\n")
                output_file.write(f"{relative_file_path}\n")
                output_file.write(f"{contents}\n")
                return  # We assume there is only one README file that we need to put at the top

# Function to process the repository and write all non-image files to a single output file
def process_repository(repo_path, ignore_list, output_file):
    for root, _, files in os.walk(repo_path):
        for file in files:
            file_path = os.path.join(root, file)
            relative_file_path = os.path.relpath(file_path, repo_path)

            if not should_ignore(relative_file_path, ignore_list) and not is_image_file(file) and "readme" not in file.lower():
                with open(file_path, 'r', errors='ignore') as f:
                    contents = f.read()
                output_file.write("----\n")
                output_file.write(f"{relative_file_path}\n")
                output_file.write(f"{contents}\n")

# Function to append the end marker to the output file
def finalize_output(output_file_path):
    with open(output_file_path, 'a') as output_file:
        output_file.write("--END--\n")

# Clone the GitHub repository
!git clone {repo_url} {repo_name}

# Change the working directory to the cloned repository
os.chdir(repo_name)

# Get the ignore list from .gptignore
ignore_file_path = os.path.join(repo_name, ".gptignore")
ignore_list = get_ignore_list(ignore_file_path)

# Write the repository content to a single output file, with the README at the top
with open(output_file_path, 'w') as output_file:
    write_preamble(output_file, preamble_file)
    write_readme_at_top(".", output_file)  # Write the README file at the top
    process_repository(".", ignore_list, output_file)  # Process the rest of the repository
    finalize_output(output_file_path)

# Confirm the output file creation
print(f"Repository contents written to {output_file_path}.")


Mounted at /content/drive
Cloning into 'CellTracksColab'...
remote: Enumerating objects: 1332, done.[K
remote: Counting objects: 100% (708/708), done.[K
remote: Compressing objects: 100% (343/343), done.[K
remote: Total 1332 (delta 493), reused 510 (delta 364), pack-reused 624 (from 1)[K
Receiving objects: 100% (1332/1332), 27.70 MiB | 20.26 MiB/s, done.
Resolving deltas: 100% (839/839), done.
Repository contents written to /content/CellTracksColab.txt.
