# Git internals study session, 22 February 2023

## 0. The `.git/` subdir 

In [1]:
import hashlib
import os
import pathlib
import zlib

In [2]:
GIT_DIRECTORY = pathlib.PosixPath(".git/")

OBJECTS_ROOT_DIR = GIT_DIRECTORY / "objects"

OLD_FILE_ID = "2bfc82f7e00838e0e39dde7b77c4df3b4817f091"

print(GIT_DIRECTORY.exists())
print("\n".join([str(x) for x in GIT_DIRECTORY.iterdir()]))

True
.git/config
.git/objects
.git/HEAD
.git/info
.git/logs
.git/description
.git/hooks
.git/refs
.git/index
.git/COMMIT_EDITMSG


In [3]:
!ls -ahl .git/

total 40
drwxr-xr-x  12 eric.fulmer  FARFETCH\Domain Users   384B Feb 20 16:04 [1m[36m.[39;49m[0m
drwxr-xr-x   9 eric.fulmer  FARFETCH\Domain Users   288B Feb 20 16:07 [1m[36m..[39;49m[0m
-rw-r--r--   1 eric.fulmer  FARFETCH\Domain Users    41B Feb 20 16:04 COMMIT_EDITMSG
-rw-r--r--   1 eric.fulmer  FARFETCH\Domain Users    21B Feb 20 09:36 HEAD
-rw-r--r--   1 eric.fulmer  FARFETCH\Domain Users   137B Feb 20 09:36 config
-rw-r--r--   1 eric.fulmer  FARFETCH\Domain Users    73B Feb 20 09:36 description
drwxr-xr-x  15 eric.fulmer  FARFETCH\Domain Users   480B Feb 20 09:36 [1m[36mhooks[39;49m[0m
-rw-r--r--   1 eric.fulmer  FARFETCH\Domain Users   305B Feb 20 16:04 index
drwxr-xr-x   3 eric.fulmer  FARFETCH\Domain Users    96B Feb 20 09:36 [1m[36minfo[39;49m[0m
drwxr-xr-x   4 eric.fulmer  FARFETCH\Domain Users   128B Feb 20 09:36 [1m[36mlogs[39;49m[0m
drwxr-xr-x  14 eric.fulmer  FARFETCH\Domain Users   448B Feb 20 16:04 [1m[36mobjects[39;49m[0m
drwxr-xr-

In [4]:
with open(GIT_DIRECTORY / "HEAD", "r") as f:
    print(f.read())

ref: refs/heads/main



## 1. Git Objects I - Blobs

In [5]:
[
    f".git/objects/{d.name}/{f.name}" for d in (GIT_DIRECTORY / "objects").iterdir() if d.is_dir()
    for f in d.iterdir() if f.is_file()
]

['.git/objects/68/bc17f9ff2104a9d7b6777058bb4c343ca72609',
 '.git/objects/b5/a99e2b88b7fdb8fb3e54a1df2ce801acecfeb5',
 '.git/objects/d8/e0be6dc085079b872c14999a1783f47ca1311f',
 '.git/objects/ab/1964713b60fdfbe235a362b3ec0f2ef9c066c0',
 '.git/objects/c6/266c7a5b2b5825ff627216e8133135ff0ec496',
 '.git/objects/64/ba6ed86228c76b5090cef85c65d91a770c7a99',
 '.git/objects/46/c51474515d3bc9c9388040206e58db2f2ff080',
 '.git/objects/41/dd0bdb604c7732ef6f9eb4b887a9bd5088b7df',
 '.git/objects/70/7b7d108cf0d5801e58574dfc84ce8d013961f1',
 '.git/objects/2b/fc82f7e00838e0e39dde7b77c4df3b4817f091']

In [6]:
!ls -alt .git/objects

total 0
drwxr-xr-x  14 eric.fulmer  FARFETCH\Domain Users  448 Feb 20 16:04 [1m[36m.[39;49m[0m
drwxr-xr-x  12 eric.fulmer  FARFETCH\Domain Users  384 Feb 20 16:04 [1m[36m..[39;49m[0m
drwxr-xr-x   3 eric.fulmer  FARFETCH\Domain Users   96 Feb 20 16:04 [1m[36md8[39;49m[0m
drwxr-xr-x   3 eric.fulmer  FARFETCH\Domain Users   96 Feb 20 16:04 [1m[36m70[39;49m[0m
drwxr-xr-x   3 eric.fulmer  FARFETCH\Domain Users   96 Feb 20 16:04 [1m[36mb5[39;49m[0m
drwxr-xr-x   3 eric.fulmer  FARFETCH\Domain Users   96 Feb 20 09:38 [1m[36m46[39;49m[0m
drwxr-xr-x   3 eric.fulmer  FARFETCH\Domain Users   96 Feb 20 09:38 [1m[36mc6[39;49m[0m
drwxr-xr-x   3 eric.fulmer  FARFETCH\Domain Users   96 Feb 20 09:38 [1m[36m68[39;49m[0m
drwxr-xr-x   3 eric.fulmer  FARFETCH\Domain Users   96 Feb 20 09:36 [1m[36m41[39;49m[0m
drwxr-xr-x   3 eric.fulmer  FARFETCH\Domain Users   96 Feb 20 09:36 [1m[36m64[39;49m[0m
drwxr-xr-x   3 eric.fulmer  FARFETCH\Domain Users   96 Feb 20 

In [7]:
ls -alt .git/objects/d8

total 8
drwxr-xr-x  14 eric.fulmer  FARFETCH\Domain Users  448 Feb 20 16:04 [1m[36m..[39;49m[0m/
drwxr-xr-x   3 eric.fulmer  FARFETCH\Domain Users   96 Feb 20 16:04 [1m[36m.[39;49m[0m/
-r--r--r--   1 eric.fulmer  FARFETCH\Domain Users  197 Feb 20 16:04 e0be6dc085079b872c14999a1783f47ca1311f


In [8]:
hasher = hashlib.sha1()

with open("pyproject.toml", "r") as f:
    contents = f.read()
    header = f"blob {len(contents)}\0"
    git_object = header + contents

hasher.update(bytes(git_object.encode('utf-8')))
print(hasher.hexdigest())
head_pyproject_id = hasher.hexdigest()

b5a99e2b88b7fdb8fb3e54a1df2ce801acecfeb5


In [10]:
!ls .git/objects/b5

a99e2b88b7fdb8fb3e54a1df2ce801acecfeb5


In [11]:
with open(OBJECTS_ROOT_DIR / head_pyproject_id[:2] / head_pyproject_id[2:], "rb") as file_contents:
    git_pyproject = file_contents.read()
    print(git_pyproject)

b"x\x01M\x90OK\x031\x14\xc4=\xe7S<r\x95\r\xdd\xaa\x85\x82\x8a\x82[O\x82\xe8\xb1\xac\x90M^m4\xff|I\x84|{\xd3n)\xde\xc2\xe41\xf3\x9b\x99l\x98\xe0\xba_]ls\x08V\xc4\x80\x99\xea\xc8\xbct\x08w\xc0?M\xee\x8c\xcfH^\xda\xd4\xa5\\t\xed\x12\xa6d\x82\xe7\xec\x17\xe9\xf08\xdc\xf5b!\x16\x9ciL\x8aL\xcc'\xf5\xddIkA\xa3\x0b\x10v\xf0l2\x9c\xbd`\x17\x08\xa4?\x0b\xb0\x91\xb4\xc3\xac\xf6p\x0c\x81s\x88,y\x1f(\xb5\x90-\x1f\xc8(\xd8\x14\xeb\x90\xe0v\xb9\xbeY\xae\xd7\xab\xcba\x16\x1eJj8\xc2\x07\xc2h\xabh\xe0\xfb2\t\x15\xdc=\x1f\x995\n}:6z\xa5\x10\xc9`\x96T9#\x94z.\xfa6<>\xbd\x0c\xc2i\xce\xd8\xff)\x84\xc6\x88^\xa3W\x06\xd3\xc8bm4\xc7\xc6\x1fW\xa2\xef9\xfb*Mj8m\x83\x8f\xd3\x08\xcd`*\xc6\xea.\xd5\x94\xd1\x8d-\xe5\xa7\x18\xc2\xb9\xc3\xbcp\xa7\x1ag\x03\x9b\x0f'\xa9\xbe[\xc8\xc1d\xfen\xdc\x84\xc2\xc9\x14<U!\xa3\xe1\xec\x0f\x05\\\x8fM"


In [12]:
print(zlib.decompress(git_pyproject).decode("utf-8"))

blob 416 [tool.poetry]
name = "git-internals-study-session"
version = "1.0.0"
description = "Small demo of Git internals for an internal Farfetch study session"
authors = ["Eric Fulmer <2952996+EFulmer@users.noreply.github.com>"]
license = "Proprietary"
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.11"
jupyter = "^1.0.0"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"



### Sanity check

In [13]:
!git hash-object -t blob pyproject.toml

b5a99e2b88b7fdb8fb3e54a1df2ce801acecfeb5


In [15]:
!git cat-file -p b5a99e2b88b7fdb8fb3e54a1df2ce801acecfeb5

[tool.poetry]
name = "git-internals-study-session"
version = "1.0.0"
description = "Small demo of Git internals for an internal Farfetch study session"
authors = ["Eric Fulmer <2952996+EFulmer@users.noreply.github.com>"]
license = "Proprietary"
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.11"
jupyter = "^1.0.0"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"


### Immutable object database!

In [16]:
!git cat-file -p $OLD_FILE_ID

[tool.poetry]
name = "git-internals-study-session"
version = "1.0.0"
description = "Small demo of Git internals for an internal Farfetch study session"
authors = ["Eric Fulmer <2952996+EFulmer@users.noreply.github.com>"]
license = "Proprietary"
readme = "README.md"
packages = [{include = "git_internals_study_session"}]

[tool.poetry.dependencies]
python = "^3.11"
jupyter = "^1.0.0"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"


## 1. Git Objects II - Tree

Read more: https://git-scm.com/book/en/v2/Git-Internals-Plumbing-and-Porcelain