In [1]:
from IPython.display import display, Markdown, Code
import tempfile
from pathlib import Path, PurePath
import os
import subprocess
import shlex
import hashlib
import zlib
import time
import shutil
from typing import Mapping, Tuple, Literal, Optional, List

In [2]:
base = tempfile.mkdtemp(prefix='git-mock-')

display(Code(f'cd {base}'))

In [3]:
def run_cmd(cmd):
    proc = subprocess.run(shlex.split(cmd), capture_output=True, encoding='utf-8')

    display(Code(f'>>> {shlex.join(proc.args)}\n{proc.stdout or proc.stderr}'))

In [4]:
run_cmd('git --version')

### git init

In [5]:
init_empty_dirs = [
    'objects/info',
    'refs/heads',
]

git_dir = PurePath(base).joinpath('.git')
os.mkdir(git_dir)

for d in init_empty_dirs:
    os.makedirs(git_dir.joinpath(d), exist_ok=True)

In [6]:
with open(git_dir.joinpath('config'), 'wt', encoding='utf-8') as f:
    f.write('''\
[core]
    repositoryformatversion = 0
    filemode = true
    bare = false
    logallrefupdates = true
''')

In [7]:
with open(git_dir.joinpath('HEAD'), 'wt', encoding='utf-8') as f:
    f.write('ref: refs/heads/master')

In [8]:
run_cmd(f'tree {base} -a')
run_cmd(f'git -C {base} status')

### git add

In [9]:
py_v1_text = 'print("hello")\n'
py_file_name = 'hello.py'
with open(os.path.join(base, py_file_name), 'wt', encoding='utf-8') as f:
    f.write(py_v1_text)

In [10]:
run_cmd(f'tree {base} -a')
run_cmd(f'git -C {base} status')

In [11]:
def write_object(raw_content: bytes, sha1: str, git_dir: PurePath) -> None:
    compressed = zlib.compress(raw_content)
    object_dir = git_dir.joinpath('objects', sha1[:2])
    os.makedirs(object_dir, exist_ok=True)
    with open(object_dir.joinpath(sha1[2:]), 'wb') as f:
        f.write(compressed)

def write_blob_object(file_content: str) -> str:
    raw_content = f'blob {len(file_content)}\0{file_content}'.encode('utf-8')
    sha1 = hashlib.sha1(raw_content).hexdigest()
    
    write_object(raw_content, sha1, git_dir)
        
    return sha1

In [12]:
with open(os.path.join(base, py_file_name), 'rt', encoding='utf-8') as f:
    file_content = f.read()
    
py_v1_blob_sha = write_blob_object(file_content)
print(py_v1_blob_sha)

11b15b1a4584b08fa423a57964bdbf018b0da0d5


In [13]:
run_cmd(f'git -C {base} cat-file -p {py_v1_blob_sha}')
run_cmd(f'tree {base} -a')
run_cmd(f'git -C {base} status')

In [14]:
"""
ref: https://git-scm.com/docs/index-format/2.25.0
"""
class IndexEntry:
    def __init__(self, path: str, blob_sha: str, base_path: str):
        self.path = path
        self.blob_sha = blob_sha
        self.base_path = base_path
    
    def to_bytes(self):
        stat = os.stat(self.path)
        
        b = int(stat.st_ctime).to_bytes(4, byteorder='big')
        b += int(stat.st_ctime_ns % 1e9).to_bytes(4, byteorder='big')
        b += int(stat.st_mtime).to_bytes(4, byteorder='big')
        b += int(stat.st_mtime_ns % 1e9).to_bytes(4, byteorder='big')
        
        b += int(stat.st_dev).to_bytes(4, byteorder='big')
        b += int(stat.st_ino).to_bytes(4, byteorder='big')
        b += int('100644', 8).to_bytes(4, byteorder='big')
        b += int(stat.st_uid).to_bytes(4, byteorder='big')
        b += int(stat.st_gid).to_bytes(4, byteorder='big')
        b += int(stat.st_size).to_bytes(4, byteorder='big')
        
        b += bytes.fromhex(self.blob_sha)
        
        assume_valid_flag = 0 << 3
        extended_flag = 0 << 2
        merge_stage_flag = 0
        name_length = len(os.path.basename(self.path)) if len(os.path.basename(self.path)) < 0xfff else 0xfff
        flags = (
            ((assume_valid_flag | extended_flag | merge_stage_flag) << 12) 
            | name_length
        ).to_bytes(2, byteorder='big')
        b += flags
        
        relative_path_name = os.path.relpath(self.path, self.base_path).encode('utf-8')
        b += relative_path_name
        
        padding_size = 8 - (len(b) % 8)
        b += (b'\0' * padding_size)
        
        return b

In [15]:
def write_index_file(entries: List[IndexEntry]) -> None:
    signature = b'DIRC'
    version = (2).to_bytes(4, byteorder='big')
    entries_number = len(entries).to_bytes(4, byteorder='big')
    
    entries = sorted(entries, key=lambda e: e.path)
    raw_content = signature + version + entries_number + b''.join([e.to_bytes() for e in entries])
    sha1 = hashlib.sha1(raw_content).hexdigest()
    raw_content += bytes.fromhex(sha1)
    
    with open(git_dir.joinpath('index'), 'wb') as f:
        f.write(raw_content)

In [16]:
py_v1_index_entry = IndexEntry(path=os.path.join(base, py_file_name), blob_sha=py_v1_blob_sha, base_path=base)
write_index_file([py_v1_index_entry])

In [17]:
run_cmd(f'tree {base} -a')
run_cmd(f'git -C {base} status')

### git commit

In [18]:
class TreeEntry:
    def __init__(self, object_type: str, name: str, sha: str):
        assert(object_type in ('tree', 'blob', 'commit', 'tag'))
        self.object_type = object_type
        self.name = name
        self.sha = sha
        self.mode = '100644' if object_type == 'blob' else '40000'

In [19]:
def write_tree_object(entries: List[TreeEntry]) -> str:
    sorted_entries = sorted(entries, key=lambda e: e.name)
    
    entries_content = b''.join([
        f'{e.mode} {e.name}\0'.encode('utf-8') + bytes.fromhex(e.sha) for e in sorted_entries
    ])
    raw_content = f'tree {len(entries_content)}\0'.encode('utf-8') + entries_content
    sha1 = hashlib.sha1(raw_content).hexdigest()
    
    write_object(raw_content, sha1, git_dir)
    
    return sha1

In [20]:
py_tree_entry = TreeEntry(object_type='blob', name=py_file_name, sha=py_v1_blob_sha)
first_tree_sha = write_tree_object([py_tree_entry])
print(first_tree_sha)

30ffe02680eefd02f7ada864196baaade119243b


In [21]:
run_cmd(f'git -C {base} cat-file -p {first_tree_sha}')
run_cmd(f'tree {base} -a')
run_cmd(f'git -C {base} status')

In [22]:
my_name = 'Soros Liu'
my_email = 'soros.liu1029@gmail.com'

def write_commit_object(tree_sha: str, parent_commmit_sha: Optional[str], msg: str) -> str:
    commit = f'tree {tree_sha}\n' + \
        (f'parent {parent_commmit_sha}\n' if parent_commmit_sha else '') + \
        f'author {my_name} <{my_email}> {int(time.time())} +0800\n' + \
        f'committer {my_name} <{my_email}> {int(time.time())} +0800\n' + \
        '\n' + \
        msg + \
        '\n'
    
    commit_content = commit.encode('utf-8')
    raw_content = f'commit {len(commit_content)}\0'.encode('utf-8') + commit_content
    sha1 = hashlib.sha1(raw_content).hexdigest()
    
    write_object(raw_content, sha1, git_dir)
    
    return sha1

In [23]:
first_commit_sha = write_commit_object(tree_sha=first_tree_sha, parent_commmit_sha=None, msg='first commit')
print(first_commit_sha)

6f6ce60ddfd8c76ab532882e0175b2551e6de461


In [24]:
run_cmd(f'git -C {base} cat-file -p {first_commit_sha}')
run_cmd(f'tree {base} -a')
run_cmd(f'git -C {base} status')
run_cmd(f'git -C {base} log')

### bookmark

In [25]:
with open(git_dir.joinpath('refs', 'heads', 'master'), 'wt', encoding='utf-8') as f:
    f.write(first_commit_sha)

In [26]:
run_cmd(f'tree {base} -a')
run_cmd(f'git -C {base} status')
run_cmd(f'git -C {base} log')
run_cmd(f'git -C {base} fsck --verbose')

### add more files

In [27]:
md_text = '## Explore Git\n'
md_file_name = 'README.md'
with open(os.path.join(base, md_file_name), 'wt', encoding='utf-8') as f:
    f.write(md_text)

In [28]:
run_cmd(f'git -C {base} status')

In [29]:
with open(os.path.join(base, md_file_name), 'rt', encoding='utf-8') as f:
    file_content = f.read()
    
md_blob_sha = write_blob_object(file_content)
print(md_blob_sha)

29ed52ec5fa9b6631a198103754707735d637187


In [30]:
run_cmd(f'git -C {base} cat-file -p {md_blob_sha}')
run_cmd(f'git -C {base} status')
run_cmd(f'tree {base} -a')

In [31]:
md_index_entry = IndexEntry(path=os.path.join(base, md_file_name), blob_sha=md_blob_sha, base_path=base)
write_index_file([py_v1_index_entry, md_index_entry])

In [32]:
run_cmd(f'git -C {base} status')

In [33]:
md_tree_entry = TreeEntry(object_type='blob', name=md_file_name, sha=md_blob_sha)
second_tree_sha = write_tree_object([py_tree_entry, md_tree_entry])
print(second_tree_sha)

c2cde5feba9af8b3f19713822794aeda07725ddd


In [34]:
run_cmd(f'git -C {base} cat-file -p {second_tree_sha}')
run_cmd(f'tree {base} -a')

In [35]:
second_commit_sha = write_commit_object(tree_sha=second_tree_sha, parent_commmit_sha=first_commit_sha, msg='second commit')
print(second_commit_sha)

24df06436d220df557b94b74d1194287919257bc


In [36]:
run_cmd(f'git -C {base} cat-file -p {second_commit_sha}')
run_cmd(f'tree {base} -a')
run_cmd(f'git -C {base} status')

In [37]:
with open(git_dir.joinpath('refs', 'heads', 'master'), 'wt', encoding='utf-8') as f:
    f.write(second_commit_sha)

In [38]:
run_cmd(f'git -C {base} status')
run_cmd(f'git -C {base} log')
run_cmd(f'git -C {base} fsck --verbose')

### git checkout -b

In [39]:
run_cmd(f'cat {base}/.git/HEAD')
run_cmd(f'git -C {base} branch --show-current')
run_cmd(f'git -C {base} rev-parse HEAD')

In [40]:
shutil.copy(git_dir.joinpath('refs', 'heads', 'master'), git_dir.joinpath('refs', 'heads', 'new-idea'))

with open(git_dir.joinpath('HEAD'), 'wt', encoding='utf-8') as f:
    f.write('ref: refs/heads/new-idea')

In [41]:
run_cmd(f'git -C {base} branch --show-current')
run_cmd(f'git -C {base} rev-parse HEAD')
run_cmd(f'git -C {base} log --oneline --decorate --graph')

### git merge

In [42]:
py_v2_text = 'print("hello world")\n'
with open(os.path.join(base, py_file_name), 'wt', encoding='utf-8') as f:
    f.write(py_v2_text)

In [43]:
run_cmd(f'git -C {base} status')
run_cmd(f'git -C {base} diff')

In [44]:
with open(os.path.join(base, py_file_name), 'rt', encoding='utf-8') as f:
    file_content = f.read()
    
py_v2_blob_sha = write_blob_object(file_content)
print(py_v2_blob_sha)

8cde7829c178ede96040e03f17c416d15bdacd01


In [45]:
py_v2_index_entry = IndexEntry(path=os.path.join(base, py_file_name), blob_sha=py_v2_blob_sha, base_path=base)
write_index_file([py_v2_index_entry, md_index_entry])

In [46]:
run_cmd(f'git -C {base} status')

In [47]:
py_tree_entry = TreeEntry(object_type='blob', name=py_file_name, sha=py_v2_blob_sha)
third_tree_sha = write_tree_object([py_tree_entry, md_tree_entry])
print(third_tree_sha)

third_commit_sha = write_commit_object(tree_sha=third_tree_sha, parent_commmit_sha=second_commit_sha, msg='third commit')
print(third_commit_sha)

with open(git_dir.joinpath('refs', 'heads', 'new-idea'), 'wt', encoding='utf-8') as f:
    f.write(third_commit_sha)

e640331ae7c9ca36815f713519571799a2276ff7
42cba3fa0318db4694d65953b90b6f6a33d5351f


In [48]:
run_cmd(f'git -C {base} status')
run_cmd(f'git -C {base} log --oneline --decorate --graph')

In [49]:
shutil.copy(git_dir.joinpath('refs', 'heads', 'new-idea'), git_dir.joinpath('refs', 'heads', 'master'))

run_cmd(f'git -C {base} log --oneline --decorate --graph')

`git merge` recursively, skipped