LabStrangeLoop
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.python-version‎
Lines changed: 1 addition & 0 deletions b/‎.python-version‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 9 additions & 0 deletions b/‎README.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎docker/into_torch_with_sm120_5090.sh‎
Lines changed: 8 additions & 0 deletions b/‎docker/into_torch_with_sm120_5090.sh‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎main.py‎
Lines changed: 6 additions & 0 deletions b/‎main.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎poetry.lock‎
Lines changed: 0 additions & 1453 deletions b/‎poetry.lock‎
Lines changed: 0 additions & 1453 deletions
diff --git a/‎poetry.toml‎
Lines changed: 0 additions & 2 deletions b/‎poetry.toml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 30 additions & 24 deletions b/‎pyproject.toml‎
Lines changed: 30 additions & 24 deletions
diff --git a/‎scratch_gpt.yaml.sample‎
Lines changed: 12 additions & 0 deletions b/‎scratch_gpt.yaml.sample‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎scratchgpt/config.py‎
Lines changed: 72 additions & 0 deletions b/‎scratchgpt/config.py‎
Lines changed: 72 additions & 0 deletions
@@ -3,3 +3,4 @@ karpathy*
 __pycache__
 *.pyc
 experiments
+solutions
@@ -0,0 +1 @@
+3.12
@@ -15,6 +15,15 @@ repo is educational, so the aim is to keep the code as legible as possible.
 - Flexible tokenization using TikToken
 - Command-line interfaces for training and inference
 
+## Roadmap
+
+[x] Switch to uv
+[x] Make it easy to modify with a config file
+[] Make it into a package
+[] Create an easy to use interface
+[] Create or check tokenizer interface
+[] Apply SOTA optimizations
+
 ## Requirements
 
 - Python 3.12+
 
@@ -0,0 +1,8 @@
+#!/usr/bin/bash
+
+docker run -it \
+  --gpus all \
+  --ipc=host \
+  -v "$(pwd)":/app \
+  --entrypoint bash \
+  vllm-sm120:latest
@@ -0,0 +1,6 @@
+def main():
+    print("Hello from scratchgpt!")
+
+
+if __name__ == "__main__":
+    main()
@@ -1,27 +1,33 @@
-[tool.poetry]
+[project]
 name = "scratchgpt"
-version = "0.1.0"
-description = ""
-authors = ["Aleksandr Yeganov <ayeganov@gmail.com>", "Dario Cazzani <dariocazzani@gmail.com"]
+version = "0.2.0"
+description = "Add your description here"
+authors = [
+  { name = "Aleksandr Yeganov", email = "ayeganov@gmail.com"},
+  { name = "Dario Cazzani", email ="dariocazzani@gmail.com" }
+]
 readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "numpy>=2.3.2",
+    "ptflops>=0.7.5",
+    "pydantic-settings>=2.10.1",
+    "pydantic-yaml>=1.6.0",
+    "tiktoken>=0.11.0",
+    "torch>=2.8.0",
+    "tqdm>=4.67.1",
+    "types-tqdm>=4.67.0.20250809",
+]
 
-[tool.poetry.dependencies]
-python = "^3.12"
-torch = "^2.4"
-tqdm = "^4.66"
-types-tqdm = "^4.66"
-ptflops = "^0.7"
-numpy = "^2.1"
-tiktoken = "^0.7"
-
-[tool.poetry.group.dev.dependencies]
-pylint = "^3.0.3"
-pytest = "^8.3"
-bandit = "^1.7.7"
-mypy = "^1.8.0"
-pytest-cov = "^4.1.0"
-isort = "^5.13.2"
-black = "^24.2.0"
+[dependency-groups]
+dev = [
+    "bandit>=1.8.6",
+    "black>=25.1.0",
+    "isort>=6.0.1",
+    "mypy>=1.17.1",
+    "pylint>=3.3.8",
+    "pytest>=8.4.1",
+]
 
 [tool.isort]
 profile = "black"
@@ -56,10 +62,10 @@ asyncio_mode = "auto"
 python_version = "3.12"
 
 [build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
+requires = ["hatchling"]
+build-backend = "hatchling.build"
 
-[tool.poetry.scripts]
+[project.scripts]
 train = "scratchgpt.main:main"
 infer = "scratchgpt.infer:main"
 tiktoken = "scratchgpt.tokenizer.tiktoken:main"
@@ -0,0 +1,12 @@
+architecture:
+  block_size: 256
+  embedding_size: 256
+  num_heads: 4
+  num_blocks: 4
+
+training:
+  max_epochs: 50
+  learning_rate: 3e-4
+  batch_size: 48
+  dropout_rate: 0.2
+  random_seed: 1337
@@ -0,0 +1,72 @@
+from pydantic import Field
+from pydantic_settings import (
+    BaseSettings,
+    PydanticBaseSettingsSource,
+    SettingsConfigDict,
+    YamlConfigSettingsSource,
+)
+
+
+class ScratchGPTArchitecture(BaseSettings):
+    """
+    All settings for training the model.
+    """
+
+    block_size: int = 256
+    embedding_size: int = 384
+    """ Size of the individual embeddings vector """
+    num_heads: int = 6
+    num_blocks: int = 6
+    vocab_size: int | None = None
+
+    model_config = SettingsConfigDict(
+        env_prefix="ARCHITECTURE_",
+        extra="allow",
+    )
+
+
+class ScratchGPTTraining(BaseSettings):
+    """
+    All training related parameters
+    """
+
+    max_epochs: int = 50
+    learning_rate: float = 3e-4
+    batch_size: int = 32
+    dropout_rate: float = 0.2
+    random_seed: int = 1337
+
+    model_config = SettingsConfigDict(
+        env_prefix="TRAINING_",
+        extra="allow",
+    )
+
+
+class ScratchGPTConfig(BaseSettings):
+    """
+    Full model config
+    """
+
+    architecture: ScratchGPTArchitecture = Field(default_factory=ScratchGPTArchitecture)
+    training: ScratchGPTTraining = Field(default_factory=ScratchGPTTraining)
+
+    model_config = SettingsConfigDict(
+        env_prefix="SCRATCH_GPT_",
+        extra="allow",
+    )
+
+    @classmethod
+    def settings_customise_sources(
+        cls,
+        settings_cls: type[BaseSettings],
+        init_settings: PydanticBaseSettingsSource,
+        env_settings: PydanticBaseSettingsSource,
+        dotenv_settings: PydanticBaseSettingsSource,
+        file_secret_settings: PydanticBaseSettingsSource,
+    ) -> tuple[PydanticBaseSettingsSource, ...]:
+        return (
+            env_settings,
+            init_settings,
+            file_secret_settings,
+            YamlConfigSettingsSource(settings_cls, yaml_file="scratch_gpt.yaml"),
+        )