From c76535c76565e2cbec8695b9024742a055705919 Mon Sep 17 00:00:00 2001
From: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com>
Date: Tue, 21 May 2024 09:43:09 +0200
Subject: [PATCH 1/3] docs: Added licenses

---
 docs/datasets.md | 80 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 54 insertions(+), 26 deletions(-)

diff --git a/docs/datasets.md b/docs/datasets.md
index ffe92e00..fd989ada 100644
--- a/docs/datasets.md
+++ b/docs/datasets.md
@@ -7,35 +7,63 @@ The following tables contains description of all the dataset in the benchmark al
 <!-- This dataset is autogenerated. If you want to update it please update the dataset metadata instead. -->
 
 <!--START_TABLE-->
-| Dataset                                                                                                                                                  | Description                                                                                                                                                                                                                                                                            | Main Score   | Languages              | Type           | Domains                                                              |   Number of Documents | Mean Length of Documents (characters)   |
-|:---------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------|:-----------------------|:---------------|:---------------------------------------------------------------------|----------------------:|:----------------------------------------|
-| [Angry Tweets](https://aclanthology.org/2021.nodalida-main.53/)                                                                                          | A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets                                                                                                                                                                                                       | Accuracy     | da                     | Classification | social                                                               |                  1047 | 156.15 (std: 82.02)                     |
-| [Bornholm Parallel](https://aclanthology.org/W19-6138/)                                                                                                  | Danish Bornholmsk Parallel Corpus. Bornholmsk is a Danish dialect spoken on the island of Bornholm, Denmark. Historically it is a part of east Danish which was also spoken in Scania and Halland, Sweden.                                                                             | F1           | da, da-bornholm        | BitextMining   | poetry, wiki, fiction, web, social                                   |                  1000 | 44.36 (std: 41.22)                      |
-| [DKHate](https://aclanthology.org/2020.lrec-1.430/)                                                                                                      | Danish Tweets annotated for Hate Speech either being Offensive or not                                                                                                                                                                                                                  | Accuracy     | da                     | Classification | social                                                               |                   329 | 88.18 (std: 168.30)                     |
-| [Da Political Comments](https://huggingface.co/datasets/danish_political_comments)                                                                       | A dataset of Danish political comments rated for sentiment                                                                                                                                                                                                                             | Accuracy     | da                     | Classification | social                                                               |                  7206 | 69.60 (std: 62.85)                      |
-| [DaLAJ](https://spraakbanken.gu.se/en/resources/superlim)                                                                                                | A Swedish dataset for linguistic acceptability. Available as a part of Superlim.                                                                                                                                                                                                       | Accuracy     | sv                     | Classification | fiction, non-fiction                                                 |                   888 | 120.77 (std: 67.95)                     |
-| [DanFEVER](https://aclanthology.org/2021.nodalida-main.47/)                                                                                              | A Danish dataset intended for misinformation research. It follows the same format as the English FEVER dataset.                                                                                                                                                                        | Ndcg_at_10   | da                     | Retrieval      | wiki, non-fiction                                                    |                  8897 | 124.84 (std: 168.53)                    |
-| [LCC](https://github.com/fnielsen/lcc-sentiment)                                                                                                         | The leipzig corpora collection, annotated for sentiment                                                                                                                                                                                                                                | Accuracy     | da                     | Classification | legal, web, news, social, fiction, non-fiction, academic, government |                   150 | 118.73 (std: 57.82)                     |
-| [Language Identification](https://aclanthology.org/2021.vardial-1.8/)                                                                                    | A dataset for Nordic language identification.                                                                                                                                                                                                                                          | Accuracy     | da, sv, nb, nn, is, fo | Classification | wiki                                                                 |                  3000 | 78.23 (std: 48.54)                      |
-| [Massive Intent](https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.)   | MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages                                                                                                                                                                      | Accuracy     | da, nb, sv             | Classification | spoken                                                               |                 15021 | 34.65 (std: 16.99)                      |
-| [Massive Scenario](https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.) | MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages                                                                                                                                                                      | Accuracy     | da, nb, sv             | Classification | spoken                                                               |                 15021 | 34.65 (std: 16.99)                      |
-| [NoReC](https://aclanthology.org/L18-1661/)                                                                                                              | A Norwegian dataset for sentiment classification on review                                                                                                                                                                                                                             | Accuracy     | nb                     | Classification | reviews                                                              |                  2048 | 89.62 (std: 61.21)                      |
-| [NorQuad](https://aclanthology.org/2023.nodalida-1.17/)                                                                                                  | Human-created question for Norwegian wikipedia passages.                                                                                                                                                                                                                               | Ndcg_at_10   | nb                     | Retrieval      | non-fiction, wiki                                                    |                  2602 | 502.19 (std: 875.23)                    |
-| [Norwegian courts](https://opus.nlpl.eu/ELRC-Courts_Norway-v1.php)                                                                                       | Nynorsk and Bokmål parallel corpus from Norwegian courts. Norway has two standardised written languages. Bokmål is a variant closer to Danish, while Nynorsk was created to resemble regional dialects of Norwegian.                                                                   | F1           | nb, nn                 | BitextMining   | legal, non-fiction                                                   |                   456 | 82.11 (std: 49.48)                      |
-| [Norwegian parliament](https://huggingface.co/datasets/NbAiLab/norwegian_parliament)                                                                     | Norwegian parliament speeches annotated with the party of the speaker (`Sosialistisk Venstreparti` vs `Fremskrittspartiet`)                                                                                                                                                            | Accuracy     | nb                     | Classification | spoken                                                               |                  2400 | 1897.51 (std: 1988.62)                  |
-| [SNL Clustering](https://huggingface.co/datasets/navjordj/SNL_summarization)                                                                             | Webscrabed articles from the Norwegian lexicon 'Det Store Norske Leksikon'. Uses articles categories as clusters.                                                                                                                                                                      | V_measure    | nb                     | Clustering     | non-fiction, wiki                                                    |                  2048 | 1101.30 (std: 2168.35)                  |
-| [SNL Retrieval](https://huggingface.co/datasets/navjordj/SNL_summarization)                                                                              | Webscrabed articles and ingresses from the Norwegian lexicon 'Det Store Norske Leksikon'.                                                                                                                                                                                              | Ndcg_at_10   | nb                     | Retrieval      | non-fiction, wiki                                                    |                  2600 | 1001.43 (std: 2537.83)                  |
-| [ScaLA](https://aclanthology.org/2023.nodalida-1.20/)                                                                                                    | A linguistic acceptability task for Danish, Norwegian Bokmål Norwegian Nynorsk and Swedish.                                                                                                                                                                                            | Accuracy     | da, nb, sv, nn         | Classification | fiction, news, non-fiction, spoken, blog                             |                  8192 | 102.45 (std: 55.49)                     |
-| [SweFAQ](https://spraakbanken.gu.se/en/resources/superlim)                                                                                               | A Swedish QA dataset derived from FAQ                                                                                                                                                                                                                                                  | Ndcg_at_10   | sv                     | Retrieval      | non-fiction, web                                                     |                  1024 | 195.44 (std: 209.33)                    |
-| [SweReC](https://aclanthology.org/2023.nodalida-1.20/)                                                                                                   | A Swedish dataset for sentiment classification on review                                                                                                                                                                                                                               | Accuracy     | sv                     | Classification | reviews                                                              |                  2048 | 318.83 (std: 499.57)                    |
-| [SwednClustering](https://spraakbanken.gu.se/en/resources/swedn)                                                                                         | The SWE-DN corpus is based on 1,963,576 news articles from the Swedish newspaper Dagens Nyheter (DN) during the years 2000--2020. The articles are filtered to resemble the CNN/DailyMail dataset both regarding textual structure. This dataset uses the category labels as clusters. | V_measure    | sv                     | Clustering     | non-fiction, news                                                    |                  2048 | 1619.71 (std: 2220.36)                  |
-| [SwednRetrieval](https://spraakbanken.gu.se/en/resources/swedn)                                                                                          | News Article Summary Semantic Similarity Estimation.                                                                                                                                                                                                                                   | Ndcg_at_10   | sv                     | Retrieval      | non-fiction, news                                                    |                  3070 | 1946.35 (std: 3071.98)                  |
-| [TV2Nord Retrieval](https://huggingface.co/datasets/alexandrainst/nordjylland-news-summarization)                                                        | News Article and corresponding summaries extracted from the Danish newspaper TV2 Nord.                                                                                                                                                                                                 | Ndcg_at_10   | da                     | Retrieval      | news, non-fiction                                                    |                  4096 | 784.11 (std: 982.97)                    |
-| [Twitterhjerne](https://huggingface.co/datasets/sorenmulli/da-hashtag-twitterhjerne)                                                                     | Danish question asked on Twitter with the Hashtag #Twitterhjerne ('Twitter brain') and their corresponding answer.                                                                                                                                                                     | Ndcg_at_10   | da                     | Retrieval      | social                                                               |                   340 | 138.23 (std: 82.41)                     |
-| [VG Clustering](https://huggingface.co/datasets/navjordj/VG_summarization)                                                                               | Articles and their classes (e.g. sports) from VG news articles extracted from Norsk Aviskorpus.                                                                                                                                                                                        | V_measure    | nb                     | Clustering     | non-fiction, news                                                    |                  2048 | 1009.65 (std: 1597.60)                  |
+| Dataset                                                                                                                                                  | Description                                                                                                                                                                                                                                                                            | Main Score | Languages              | Type           | Domains                                                              | Number of Documents | Mean Length of Documents (characters) |
+| :------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------- | :--------------------- | :------------- | :------------------------------------------------------------------- | ------------------: | :------------------------------------ |
+| [Angry Tweets](https://aclanthology.org/2021.nodalida-main.53/)                                                                                          | A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets                                                                                                                                                                                                       | Accuracy   | da                     | Classification | social                                                               |                1047 | 156.15 (std: 82.02)                   |
+| [Bornholm Parallel](https://aclanthology.org/W19-6138/)                                                                                                  | Danish Bornholmsk Parallel Corpus. Bornholmsk is a Danish dialect spoken on the island of Bornholm, Denmark. Historically it is a part of east Danish which was also spoken in Scania and Halland, Sweden.                                                                             | F1         | da, da-bornholm        | BitextMining   | poetry, wiki, fiction, web, social                                   |                1000 | 44.36 (std: 41.22)                    |
+| [DKHate](https://aclanthology.org/2020.lrec-1.430/)                                                                                                      | Danish Tweets annotated for Hate Speech either being Offensive or not                                                                                                                                                                                                                  | Accuracy   | da                     | Classification | social                                                               |                 329 | 88.18 (std: 168.30)                   |
+| [Da Political Comments](https://huggingface.co/datasets/danish_political_comments)                                                                       | A dataset of Danish political comments rated for sentiment                                                                                                                                                                                                                             | Accuracy   | da                     | Classification | social                                                               |                7206 | 69.60 (std: 62.85)                    |
+| [DaLAJ](https://spraakbanken.gu.se/en/resources/superlim)                                                                                                | A Swedish dataset for linguistic acceptability. Available as a part of Superlim.                                                                                                                                                                                                       | Accuracy   | sv                     | Classification | fiction, non-fiction                                                 |                 888 | 120.77 (std: 67.95)                   |
+| [DanFEVER](https://aclanthology.org/2021.nodalida-main.47/)                                                                                              | A Danish dataset intended for misinformation research. It follows the same format as the English FEVER dataset.                                                                                                                                                                        | Ndcg_at_10 | da                     | Retrieval      | wiki, non-fiction                                                    |                8897 | 124.84 (std: 168.53)                  |
+| [LCC](https://github.com/fnielsen/lcc-sentiment)                                                                                                         | The leipzig corpora collection, annotated for sentiment                                                                                                                                                                                                                                | Accuracy   | da                     | Classification | legal, web, news, social, fiction, non-fiction, academic, government |                 150 | 118.73 (std: 57.82)                   |
+| [Language Identification](https://aclanthology.org/2021.vardial-1.8/)                                                                                    | A dataset for Nordic language identification.                                                                                                                                                                                                                                          | Accuracy   | da, sv, nb, nn, is, fo | Classification | wiki                                                                 |                3000 | 78.23 (std: 48.54)                    |
+| [Massive Intent](https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.)   | MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages                                                                                                                                                                      | Accuracy   | da, nb, sv             | Classification | spoken                                                               |               15021 | 34.65 (std: 16.99)                    |
+| [Massive Scenario](https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.) | MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages                                                                                                                                                                      | Accuracy   | da, nb, sv             | Classification | spoken                                                               |               15021 | 34.65 (std: 16.99)                    |
+| [NoReC](https://aclanthology.org/L18-1661/)                                                                                                              | A Norwegian dataset for sentiment classification on review                                                                                                                                                                                                                             | Accuracy   | nb                     | Classification | reviews                                                              |                2048 | 89.62 (std: 61.21)                    |
+| [NorQuad](https://aclanthology.org/2023.nodalida-1.17/)                                                                                                  | Human-created question for Norwegian wikipedia passages.                                                                                                                                                                                                                               | Ndcg_at_10 | nb                     | Retrieval      | non-fiction, wiki                                                    |                2602 | 502.19 (std: 875.23)                  |
+| [Norwegian courts](https://opus.nlpl.eu/ELRC-Courts_Norway-v1.php)                                                                                       | Nynorsk and Bokmål parallel corpus from Norwegian courts. Norway has two standardised written languages. Bokmål is a variant closer to Danish, while Nynorsk was created to resemble regional dialects of Norwegian.                                                                   | F1         | nb, nn                 | BitextMining   | legal, non-fiction                                                   |                 456 | 82.11 (std: 49.48)                    |
+| [Norwegian parliament](https://huggingface.co/datasets/NbAiLab/norwegian_parliament)                                                                     | Norwegian parliament speeches annotated with the party of the speaker (`Sosialistisk Venstreparti` vs `Fremskrittspartiet`)                                                                                                                                                            | Accuracy   | nb                     | Classification | spoken                                                               |                2400 | 1897.51 (std: 1988.62)                |
+| [SNL Clustering](https://huggingface.co/datasets/navjordj/SNL_summarization)                                                                             | Webscrabed articles from the Norwegian lexicon 'Det Store Norske Leksikon'. Uses articles categories as clusters.                                                                                                                                                                      | V_measure  | nb                     | Clustering     | non-fiction, wiki                                                    |                2048 | 1101.30 (std: 2168.35)                |
+| [SNL Retrieval](https://huggingface.co/datasets/navjordj/SNL_summarization)                                                                              | Webscrabed articles and ingresses from the Norwegian lexicon 'Det Store Norske Leksikon'.                                                                                                                                                                                              | Ndcg_at_10 | nb                     | Retrieval      | non-fiction, wiki                                                    |                2600 | 1001.43 (std: 2537.83)                |
+| [ScaLA](https://aclanthology.org/2023.nodalida-1.20/)                                                                                                    | A linguistic acceptability task for Danish, Norwegian Bokmål Norwegian Nynorsk and Swedish.                                                                                                                                                                                            | Accuracy   | da, nb, sv, nn         | Classification | fiction, news, non-fiction, spoken, blog                             |                8192 | 102.45 (std: 55.49)                   |
+| [SweFAQ](https://spraakbanken.gu.se/en/resources/superlim)                                                                                               | A Swedish QA dataset derived from FAQ                                                                                                                                                                                                                                                  | Ndcg_at_10 | sv                     | Retrieval      | non-fiction, web                                                     |                1024 | 195.44 (std: 209.33)                  |
+| [SweReC](https://aclanthology.org/2023.nodalida-1.20/)                                                                                                   | A Swedish dataset for sentiment classification on review                                                                                                                                                                                                                               | Accuracy   | sv                     | Classification | reviews                                                              |                2048 | 318.83 (std: 499.57)                  |
+| [SwednClustering](https://spraakbanken.gu.se/en/resources/swedn)                                                                                         | The SWE-DN corpus is based on 1,963,576 news articles from the Swedish newspaper Dagens Nyheter (DN) during the years 2000--2020. The articles are filtered to resemble the CNN/DailyMail dataset both regarding textual structure. This dataset uses the category labels as clusters. | V_measure  | sv                     | Clustering     | non-fiction, news                                                    |                2048 | 1619.71 (std: 2220.36)                |
+| [SwednRetrieval](https://spraakbanken.gu.se/en/resources/swedn)                                                                                          | News Article Summary Semantic Similarity Estimation.                                                                                                                                                                                                                                   | Ndcg_at_10 | sv                     | Retrieval      | non-fiction, news                                                    |                3070 | 1946.35 (std: 3071.98)                |
+| [TV2Nord Retrieval](https://huggingface.co/datasets/alexandrainst/nordjylland-news-summarization)                                                        | News Article and corresponding summaries extracted from the Danish newspaper TV2 Nord.                                                                                                                                                                                                 | Ndcg_at_10 | da                     | Retrieval      | news, non-fiction                                                    |                4096 | 784.11 (std: 982.97)                  |
+| [Twitterhjerne](https://huggingface.co/datasets/sorenmulli/da-hashtag-twitterhjerne)                                                                     | Danish question asked on Twitter with the Hashtag #Twitterhjerne ('Twitter brain') and their corresponding answer.                                                                                                                                                                     | Ndcg_at_10 | da                     | Retrieval      | social                                                               |                 340 | 138.23 (std: 82.41)                   |
+| [VG Clustering](https://huggingface.co/datasets/navjordj/VG_summarization)                                                                               | Articles and their classes (e.g. sports) from VG news articles extracted from Norsk Aviskorpus.                                                                                                                                                                                        | V_measure  | nb                     | Clustering     | non-fiction, news                                                    |                2048 | 1009.65 (std: 1597.60)                |
 <!--END_TABLE-->
 
 
+## Dataset Licenses
+
+
+| Dataset                                                                                                                                                  | License      |
+| :------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------- |
+| [Angry Tweets](https://aclanthology.org/2021.nodalida-main.53/)                                                                                          | CC-BY-4.0    |
+| [Bornholm Parallel](https://aclanthology.org/W19-6138/)                                                                                                  | CC-BY-4.0    |
+| [DKHate](https://aclanthology.org/2020.lrec-1.430/)                                                                                                      | CC-BY-4.0    |
+| [Da Political Comments](https://huggingface.co/datasets/danish_political_comments)                                                                       |              |
+| [DaLAJ](https://spraakbanken.gu.se/en/resources/superlim)                                                                                                | CC-BY-4.0    |
+| [DanFEVER](https://aclanthology.org/2021.nodalida-main.47/)                                                                                              | CC-BY-4.0    |
+| [LCC](https://github.com/fnielsen/lcc-sentiment)                                                                                                         | CC-BY-4.0    |
+| [Massive Scenario](https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.) | CC-BY-4.0    |
+| [NoReC](https://aclanthology.org/L18-1661/)                                                                                                              | CC-BY-NC-4.0 |
+| [NorQuad](https://aclanthology.org/2023.nodalida-1.17/)                                                                                                  | CC0-1.0      |
+| [Norwegian courts](https://opus.nlpl.eu/ELRC-Courts_Norway-v1.php)                                                                                       | MIT          |
+| [Norwegian parliament](https://huggingface.co/datasets/NbAiLab/norwegian_parliament)                                                                     | CC-BY-4.0    |
+| [SNL Clustering](https://huggingface.co/datasets/navjordj/SNL_summarization)                                                                             | CC-BY-NC     |
+| [SNL Retrieval](https://huggingface.co/datasets/navjordj/SNL_summarization)                                                                              | CC-BY-NC     |
+| [ScaLA](https://aclanthology.org/2023.nodalida-1.20/)                                                                                                    | CC-BY-SA-4.0 |
+| [SweFAQ](https://spraakbanken.gu.se/en/resources/superlim)                                                                                               | CC-BY-4.0    |
+| [SweReC](https://aclanthology.org/2023.nodalida-1.20/)                                                                                                   | CC-BY-4.0    |
+| [SwednClustering](https://spraakbanken.gu.se/en/resources/swedn)                                                                                         | CC-BY-4.0    |
+| [SwednRetrieval](https://spraakbanken.gu.se/en/resources/swedn)                                                                                          | CC-BY-4.0    |
+| [TV2Nord Retrieval](https://huggingface.co/datasets/alexandrainst/nordjylland-news-summarization)                                                        | Apache 2.0   |
+| [Twitterhjerne](https://huggingface.co/datasets/sorenmulli/da-hashtag-twitterhjerne)                                                                     | Upcoming     |
+| [VG Clustering](https://huggingface.co/datasets/navjordj/VG_summarization)                                                                               | CC-BY-NC     |
+
 ## Dataset Disclaimer
 
 - We do not own or host any of the datasets which we use for this benchmark.

From 452bfe25d2b7e3531c6fddb80dea0314b79895d3 Mon Sep 17 00:00:00 2001
From: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com>
Date: Tue, 21 May 2024 10:48:12 +0200
Subject: [PATCH 2/3] ci: remove macos due to it being slow

---
 .github/workflows/tests.yml             | 2 +-
 src/seb/cache/all-MiniLM-L6-v2/LCC.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index ea939d8c..dd5c434f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -20,7 +20,7 @@ jobs:
       pull-requests: write
     strategy:
       matrix:
-        os: [ubuntu-latest, macos-latest]
+        os: [ubuntu-latest]
         python-version: ["3.9"] # , "3.10"]
 
     # This allows a subsequently queued workflow run to interrupt previous runs
diff --git a/src/seb/cache/all-MiniLM-L6-v2/LCC.json b/src/seb/cache/all-MiniLM-L6-v2/LCC.json
index 044e52d7..6611ebfa 100644
--- a/src/seb/cache/all-MiniLM-L6-v2/LCC.json
+++ b/src/seb/cache/all-MiniLM-L6-v2/LCC.json
@@ -1 +1 @@
-{"task_name":"LCC","task_description":"The leipzig corpora collection, annotated for sentiment","task_version":"1.1.1","time_of_run":"2024-04-08T19:18:32.717984","scores":{"da":{"accuracy":0.3846666666666666,"f1":0.3650136884557438,"accuracy_stderr":0.03664241622309678,"f1_stderr":0.03540233062350939,"main_score":0.3846666666666666}},"main_score":"accuracy"}
\ No newline at end of file
+{"task_name":"LCC","task_description":"The leipzig corpora collection, annotated for sentiment","task_version":"1.1.1","time_of_run":"2024-05-21T09:44:03.564974","scores":{"da":{"accuracy":0.3846666666666666,"f1":0.3650136884557438,"accuracy_stderr":0.03664241622309678,"f1_stderr":0.03540233062350939,"main_score":0.3846666666666666}},"main_score":"accuracy"}
\ No newline at end of file

From c3519357dd4eb13121e9b102c6d0e45bc2563e94 Mon Sep 17 00:00:00 2001
From: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com>
Date: Tue, 21 May 2024 10:52:52 +0200
Subject: [PATCH 3/3] fix docs dependencies

---
 pyproject.toml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 639ba5bd..a032fa7f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ name = "MIT"
 dev = ["cruft>=2.0.0", "pyright==1.1.348", "ruff>=0.3.0"]
 tests = ["pytest>=7.1.3", "pytest-cov>=3.0.0"]
 docs = [
+  "mkdocs==1.5.1",
   "mkdocs-jupyter==0.24.6",
   "mkdocs-material==9.1.21",
   "mkdocstrings[python]==0.22.0",
@@ -50,7 +51,7 @@ mistral = [
 openai = ["openai>=0.27.4"]
 cohere = ["cohere>=4.34"]
 sonar = [
-  "fairseq2>=0.1.0",  # requires sudo apt-get update -y; sudo apt install libsndfile1
+  "fairseq2>=0.1.0",    # requires sudo apt-get update -y; sudo apt install libsndfile1
   "sonar-space>=0.2.1",
 ] # fairseq2 only works for linux at the moment
 
@@ -167,5 +168,3 @@ build_command = "python -m pip install build; python -m build"
 
 [tool.setuptools]
 include-package-data = true
-
-