# Creating lexibank CLDF for a dataset

In [32]:
from pylexibank.dataset import Dataset, CldfDataset, REQUIRED_FIELDS
from clldutils.path import TemporaryDirectory
from clldutils.dsv import reader

In [33]:
dataset = Dataset('../datasets/galuciotupi')

In [34]:
print(REQUIRED_FIELDS)

(u'ID', u'Language_ID', u'Parameter_ID', u'Value')


In [35]:
cldf = CldfDataset(list(REQUIRED_FIELDS) + ['Source'], dataset)

## Adding sources

In [36]:
cldf.sources.add("""
@book{meier2000,
    author = {Max Meier},
    title = {The Book},
    year = {2000}
}""")

In [37]:
assert 'meier2000' in cldf.sources

In [38]:
with TemporaryDirectory() as tmp:
    cldf.write(outdir=tmp)
    assert tmp.joinpath('galuciotupi.bib').exists()
    bib = tmp.joinpath('galuciotupi.bib').open(encoding='utf8').read()
    print(bib)

@book{meier2000,
    author = {Meier, Max},
    title = {The Book},
    year = {2000}
}



Adding sources is idempotent, i.e. if the cite key of a source already exists, it is not added again:

In [39]:
assert len(cldf.sources) == 1
cldf.sources.add(bib)
assert len(cldf.sources) == 1

## Adding data

In [40]:
row = cldf.add_row(['wid', 'abcd1234', '1234', 'word', 'meier2000[12]'])

In [41]:
row.refs

[<Reference meier2000[12]>]

In [42]:
with TemporaryDirectory() as tmp:
    cldf.write(outdir=tmp)
    assert tmp.joinpath('galuciotupi.csv').exists()
    print(list(reader(tmp.joinpath('galuciotupi.csv'), dicts=True))[0])

OrderedDict([(u'ID', u'wid'), (u'Language_ID', u'abcd1234'), (u'Parameter_ID', u'1234'), (u'Value', u'word'), (u'Source', u'meier2000[12]')])


## Metadata

In [43]:
with TemporaryDirectory() as tmp:
    cldf.write(outdir=tmp)
    for fname in tmp.iterdir():
        print(fname.name)

galuciotupi.csv
galuciotupi.csv-metadata.json
galuciotupi.bib
