diff --git a/.github/markdown-links-config.json b/.github/markdown-links-config.json new file mode 100644 index 000000000..76bbc794a --- /dev/null +++ b/.github/markdown-links-config.json @@ -0,0 +1,27 @@ +{ + "ignorePatterns": [ + { + "pattern": "^https://github.com/Lightning-AI/litData/pull/" + }, + { + "pattern": "^https://codecov.io/gh/Lightning-AI/litData/graph/badge.svg" + } + ], + "httpHeaders": [ + { + "urls": [ + "https://github.com/", + "https://guides.github.com/", + "https://help.github.com/", + "https://docs.github.com/" + ], + "headers": { + "Accept-Encoding": "zstd, br, gzip, deflate" + } + } + ], + "timeout": "20s", + "retryOn429": true, + "retryCount": 5, + "fallbackRetryDelay": "20s" +} diff --git a/.github/workflows/ci-checks.yml b/.github/workflows/ci-checks.yml index 7552db466..4fe3fe837 100644 --- a/.github/workflows/ci-checks.yml +++ b/.github/workflows/ci-checks.yml @@ -37,3 +37,10 @@ jobs: uses: Lightning-AI/utilities/.github/workflows/check-docs.yml@main with: requirements-file: "requirements/docs.txt" + + check-md-links: + uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@main + with: + config-file: ".github/markdown-links-config.json" + base-branch: "main" + force-check-all: "yes" diff --git a/README.md b/README.md index aebe2058a..422f003b8 100644 --- a/README.md +++ b/README.md @@ -571,7 +571,7 @@ Stream data during long training, if interrupted, pick up right where you left o LitData provides a stateful `Streaming DataLoader` e.g. you can `pause` and `resume` your training whenever you want. -Info: The `Streaming DataLoader` was used by [Lit-GPT](https://github.com/Lightning-AI/lit-gpt/blob/main/pretrain/tinyllama.py) to pretrain LLMs. Restarting from an older checkpoint was critical to get to pretrain the full model due to several failures (network, CUDA Errors, etc..). +Info: The `Streaming DataLoader` was used by [Lit-GPT](https://github.com/Lightning-AI/litgpt/blob/main/tutorials/pretrain_tinyllama.md) to pretrain LLMs. Restarting from an older checkpoint was critical to get to pretrain the full model due to several failures (network, CUDA Errors, etc..). ```python import os diff --git a/examples/multi_modal/README.md b/examples/multi_modal/README.md index 97c2999d4..d192b2bda 100644 --- a/examples/multi_modal/README.md +++ b/examples/multi_modal/README.md @@ -126,7 +126,7 @@ pip install -r requirements.txt ## License -This project is licensed under the APACHE 2.0 License. See the [LICENSE](LICENSE) file for details. +This project is licensed under the [APACHE 2.0 License](https://github.com/Lightning-AI/litData/blob/main/LICENSE). ______________________________________________________________________