Skip to content

Commit

Permalink
Merge pull request #948 from glerzing/licenses
Browse files Browse the repository at this point in the history
Dataset licenses - PR 2
  • Loading branch information
haileyschoelkopf committed Nov 3, 2023
2 parents cc9778f + b0686dd commit 382af8c
Show file tree
Hide file tree
Showing 12 changed files with 23 additions and 23 deletions.
4 changes: 2 additions & 2 deletions lm_eval/datasets/asdiv/asdiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@

_HOMEPAGE = "https://github.com/chaochun/nlu-asdiv-dataset"

# TODO: Add the licence for the dataset here if you can find it
_LICENSE = ""
# License declared at https://github.com/chaochun/nlu-asdiv-dataset/blob/master/README.md
_LICENSE = "CC BY-NC 4.0"

_URLS = "https://github.com/chaochun/nlu-asdiv-dataset/archive/55790e5270bb91ccfa5053194b25732534696b50.zip"

Expand Down
3 changes: 1 addition & 2 deletions lm_eval/datasets/coqa/coqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@

_HOMEPAGE = "https://stanfordnlp.github.io/coqa/"

# TODO: Add the licence for the dataset here if you can find it
_LICENSE = ""
_LICENSE = "Different licenses depending on the content (see https://stanfordnlp.github.io/coqa/ for details)"

_URLS = {
"train": "https://nlp.stanford.edu/data/coqa/coqa-train-v1.0.json",
Expand Down
4 changes: 2 additions & 2 deletions lm_eval/datasets/drop/drop.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@

_HOMEPAGE = "https://allenai.org/data/drop"

# TODO: Add the licence for the dataset here if you can find it
_LICENSE = ""
# License declared at https://allenai.org/data/drop
_LICENSE = "CC BY"

_URLS = {
"drop": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/drop/drop_dataset.zip",
Expand Down
5 changes: 4 additions & 1 deletion lm_eval/datasets/headqa/headqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@

_HOMEPAGE = "https://aghie.github.io/head-qa/"

_LICENSE = "MIT License"
# The Spanish data comes from the "Ministerio de Sanidad, Consumo y Bienestar Social", as indicated here : https://github.com/aghie/head-qa
# This Spanish data seems to follow the intellectual property rights stated here : https://www.sanidad.gob.es/avisoLegal/home.htm
# The English data was translated by the authors of head-qa (https://arxiv.org/pdf/1906.04701.pdf).
_LICENSE = "Custom license"

_URL = "https://drive.google.com/uc?export=download&confirm=t&id=1a_95N5zQQoUCq8IBNVZgziHbeM-QxG2t"

Expand Down
6 changes: 4 additions & 2 deletions lm_eval/datasets/hendrycks_ethics/hendrycks_ethics.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@

_HOMEPAGE = "https://github.com/hendrycks/ethics"

# TODO: Add the licence for the dataset here if you can find it
_LICENSE = ""
# The authors declared that the dataset is not distributed under a copyright or intellectual property (https://arxiv.org/pdf/2008.02275.pdf)
# On Hugging Face, the dataset is distributed under the MIT license (https://huggingface.co/datasets/hendrycks/ethics)
# The common sense portion is from Reddit and might incur some licensing complications.
_LICENSE = "Ambiguous"

_URLS = "https://people.eecs.berkeley.edu/~hendrycks/ethics.tar"

Expand Down
4 changes: 2 additions & 2 deletions lm_eval/datasets/hendrycks_math/hendrycks_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@

_HOMEPAGE = "https://github.com/hendrycks/math"

# TODO: Add the licence for the dataset here if you can find it
_LICENSE = ""
# License declared at https://arxiv.org/pdf/2103.03874.pdf
_LICENSE = "MIT License"

_URLS = "https://people.eecs.berkeley.edu/~hendrycks/MATH.tar"

Expand Down
3 changes: 1 addition & 2 deletions lm_eval/datasets/logiqa/logiqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@

_HOMEPAGE = "https://github.com/lgw863/LogiQA-dataset"

# TODO: Add the licence for the dataset here if you can find it
_LICENSE = ""
_LICENSE = "No license found"

_URLS = {
"train": "https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master/Train.txt",
Expand Down
3 changes: 1 addition & 2 deletions lm_eval/datasets/mutual/mutual.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@

_HOMEPAGE = "https://github.com/Nealcly/MuTual"

# TODO: Add the licence for the dataset here if you can find it
_LICENSE = ""
_LICENSE = "No license found"

_URLS = "https://github.com/Nealcly/MuTual/archive/master.zip"

Expand Down
4 changes: 2 additions & 2 deletions lm_eval/datasets/pile/pile.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@

_HOMEPAGE = "https://pile.eleuther.ai/"

# TODO: Add the licence for the dataset here if you can find it
_LICENSE = ""
# More details at https://arxiv.org/pdf/2201.07311.pdf
_LICENSE = "Multiple licenses"

_URLS = {
"validation": "https://the-eye.eu/public/AI/pile/val.jsonl.zst",
Expand Down
4 changes: 2 additions & 2 deletions lm_eval/datasets/quac/quac.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@

_HOMEPAGE = "https://quac.ai/"

# TODO: Add the licence for the dataset here if you can find it
_LICENSE = ""
# License declared at https://quac.ai/
_LICENSE = "CC BY-SA 4.0"

_URLS = {
"train": "https://s3.amazonaws.com/my89public/quac/train_v0.2.json",
Expand Down
3 changes: 1 addition & 2 deletions lm_eval/datasets/sat_analogies/sat_analogies.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@

_HOMEPAGE = "https://aclweb.org/aclwiki/SAT_Analogy_Questions_(State_of_the_art)"

# TODO: Add the licence for the dataset here if you can find it
_LICENSE = ""
_LICENSE = "No license found"


class SatAnalogies(datasets.GeneratorBasedBuilder):
Expand Down
3 changes: 1 addition & 2 deletions lm_eval/datasets/unscramble/unscramble.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@

_HOMEPAGE = "https://github.com/openai/gpt-3/tree/master/data"

# TODO: Add the licence for the dataset here if you can find it
_LICENSE = ""
_LICENSE = "No license found"

_BASE_URL = "https://raw.githubusercontent.com/openai/gpt-3/master/data"

Expand Down

0 comments on commit 382af8c

Please sign in to comment.