Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update csvw metadta for custom datasets #83

Merged
merged 4 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,5 @@ See [CONTRIBUTING](CONTRIBUTING.md) for details.

Copyright © 2021, Office for National Statistics (https://www.ons.gov.uk)

Released under MIT license, see [LICENSE](LICENSE.md) for details.
Released under MIT license, see [LICENSE](LICENSE.md) for details

157 changes: 91 additions & 66 deletions csvw/csvw.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"fmt"
"net/url"
"regexp"
"strings"
"time"

"github.com/ONSdigital/dp-api-clients-go/v2/dataset"
"github.com/ONSdigital/log.go/v2/log"
Expand All @@ -23,8 +25,8 @@ type CSVW struct {
Title string `json:"dct:title"`
Description string `json:"dct:description,omitempty"`
Issued string `json:"dct:issued,omitempty"`
Publisher Publisher `json:"dct:publisher"`
Contact []Contact `json:"dcat:contactPoint"`
Publisher Publisher `json:"dct:publisher,omitempty"`
Contact []Contact `json:"dcat:contactPoint,omitempty"`
TableSchema Columns `json:"tableSchema"`
Theme string `json:"dcat:theme,omitempty"`
License string `json:"dct:license,omitempty"`
Expand All @@ -34,8 +36,8 @@ type CSVW struct {
Downloads map[string]Download `json:"dcat:distribution,omitempty"`
Keywords []string `json:"dcat:keyword,omitempty"`
UnitOfMeasure string `json:"dcat:conformsTo,omitempty"`
Version int `json:"dcat:version"`
IsBasedOn string `json:"prov:wasGeneratedBy,omitempty`
Version int `json:"dcat:version,omitempty"`
IsBasedOn string `json:"prov:wasGeneratedBy,omitempty"`
DatasetLinks *DatasetLinks `json:"dcat:record,omitempty"`
}

Expand Down Expand Up @@ -74,7 +76,7 @@ type Publisher struct {
// Columns provides the nested structure expected within the tableSchema of a CSVW
type Columns struct {
C []Column `json:"columns"`
About string `json:"aboutUrl"`
About string `json:"aboutUrl,omitempty"`
}

// Column provides the ability to define the JSON tags required specific
Expand All @@ -93,76 +95,93 @@ type Note struct {
var errMissingDimensions = errors.New("no dimensions in provided metadata")

// New CSVW returned with top level fields populated based on provided metadata
func New(m *dataset.Metadata, csvURL, externalPrefixURL string) *CSVW {
csvw := &CSVW{
Context: "http://www.w3.org/ns/csvw",
Title: m.Title,
Description: m.Description,
Issued: m.ReleaseDate,
Theme: m.Theme,
License: m.License,
Frequency: m.ReleaseFrequency,
URL: csvURL,
}
func New(m *dataset.Metadata, csvURL, externalPrefixURL, filterOutputID, downloadServiceURL string, isCustom bool) *CSVW {
if isCustom {
var titleDims []string
dt := time.Now()
issuedDate := dt.Format("01-02-2006 15:04:05")

for _, d := range m.Version.Dimensions {
titleDims = append(titleDims, d.Label)
}
csvw := &CSVW{
Title: strings.Join(titleDims, " "),
Issued: issuedDate,
URL: fmt.Sprintf("\tURL: %s\n\n", fmt.Sprintf("%s/downloads/filter-outputs/%s.csvw", downloadServiceURL, filterOutputID)),
}
csvw.UnitOfMeasure = m.UnitOfMeasure
return csvw
} else {
csvw := &CSVW{
Context: "http://www.w3.org/ns/csvw",
Title: m.Title,
Description: m.Description,
Issued: m.ReleaseDate,
Theme: m.Theme,
License: m.License,
Frequency: m.ReleaseFrequency,
URL: csvURL,
}

if m.Contacts != nil {
for _, c := range *m.Contacts {
csvw.Contact = append(csvw.Contact, Contact{
Name: c.Name,
Telephone: c.Telephone,
Email: c.Email,
})
if m.Contacts != nil {
for _, c := range *m.Contacts {
csvw.Contact = append(csvw.Contact, Contact{
Name: c.Name,
Telephone: c.Telephone,
Email: c.Email,
})
}
}
}

if m.Publisher != nil {
csvw.Publisher = Publisher{
Name: m.Publisher.Name,
Type: m.Publisher.Type,
ID: m.Publisher.URL,
if m.Publisher != nil {
csvw.Publisher = Publisher{
Name: m.Publisher.Name,
Type: m.Publisher.Type,
ID: m.Publisher.URL,
}
}
}

if m.Downloads != nil {
csvw.Downloads = make(map[string]Download)
for k, v := range m.Downloads {
csvw.Downloads[k] = Download{
HREF: v.URL,
Size: v.Size,
if m.Downloads != nil {
csvw.Downloads = make(map[string]Download)
for k, v := range m.Downloads {
csvw.Downloads[k] = Download{
HREF: v.URL,
Size: v.Size,
}
}
}
}

if m.Keywords != nil {
csvw.Keywords = *m.Keywords
}
if m.Keywords != nil {
csvw.Keywords = *m.Keywords
}

csvw.UnitOfMeasure = m.UnitOfMeasure
csvw.Version = m.Version.Version
if m.Version.IsBasedOn != nil {
csvw.IsBasedOn = m.Version.IsBasedOn.ID
}
csvw.UnitOfMeasure = m.UnitOfMeasure
csvw.Version = m.Version.Version
if m.Version.IsBasedOn != nil {
csvw.IsBasedOn = m.Version.IsBasedOn.ID
}

re := regexp.MustCompile("https?://([^/]+)")
csvw.DatasetLinks = &DatasetLinks{
Editions: Link{
HREF: re.ReplaceAllString(m.DatasetLinks.Editions.URL, externalPrefixURL),
ID: m.DatasetLinks.Editions.ID,
},
LatestVersion: Link{
HREF: re.ReplaceAllString(m.DatasetLinks.LatestVersion.URL, externalPrefixURL),
ID: m.DatasetLinks.LatestVersion.ID,
},
Self: Link{
HREF: re.ReplaceAllString(m.DatasetLinks.Self.URL, externalPrefixURL),
ID: m.DatasetLinks.Editions.ID,
},
re := regexp.MustCompile("https?://([^/]+)")
csvw.DatasetLinks = &DatasetLinks{
Editions: Link{
HREF: re.ReplaceAllString(m.DatasetLinks.Editions.URL, externalPrefixURL),
ID: m.DatasetLinks.Editions.ID,
},
LatestVersion: Link{
HREF: re.ReplaceAllString(m.DatasetLinks.LatestVersion.URL, externalPrefixURL),
ID: m.DatasetLinks.LatestVersion.ID,
},
Self: Link{
HREF: re.ReplaceAllString(m.DatasetLinks.Self.URL, externalPrefixURL),
ID: m.DatasetLinks.Editions.ID,
},
}
return csvw
}
return csvw
}

// Generate the CSVW structured metadata file to describe a CSV
func Generate(ctx context.Context, metadata *dataset.Metadata, downloadURL, aboutURL, apiDomain, externalPrefixURL string) ([]byte, error) {
func Generate(ctx context.Context, metadata *dataset.Metadata, downloadURL, aboutURL, apiDomain, externalPrefixURL, filterOutputID, downloadServiceURL string, isCustom bool) ([]byte, error) {
logData := log.Data{
"dataset_id": metadata.DatasetDetails.ID,
"csv_header": metadata.CSVHeader,
Expand All @@ -186,7 +205,7 @@ func Generate(ctx context.Context, metadata *dataset.Metadata, downloadURL, abou

h := metadata.CSVHeader

csvw := New(metadata, downloadURL, externalPrefixURL)
csvw := New(metadata, downloadURL, externalPrefixURL, filterOutputID, downloadServiceURL, isCustom)

var list []Column
obs := newObservationColumn(h[0], metadata.UnitOfMeasure)
Expand All @@ -213,10 +232,16 @@ func Generate(ctx context.Context, metadata *dataset.Metadata, downloadURL, abou
logData: logData,
}
}

csvw.TableSchema = Columns{
About: aboutURL,
C: list,
if !isCustom {
csvw.TableSchema = Columns{
About: aboutURL,
C: list,
}
} else {
csvw.TableSchema = Columns{
About: "",
C: list,
}
}

csvw.AddNotes(metadata, downloadURL)
Expand Down
9 changes: 6 additions & 3 deletions csvw/csvw_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import (
var fileURL = "ons/file.csv"
var apiURL = "api.example.com"
var externalPrefixURL = "external.prefixurl.com"
var filterOutputID = "filter-output-id"
var downloadServiceURL = "download-service-url"
var isCustom = false

var ctx = context.Background()

Expand All @@ -28,7 +31,7 @@ func TestNew(t *testing.T) {
}

Convey("When the New csvw function is called", func() {
csvw := New(m, fileURL, externalPrefixURL)
csvw := New(m, fileURL, externalPrefixURL, filterOutputID, downloadServiceURL, isCustom)

Convey("Then the values should be set to the expected fields", func() {
So(csvw.Context, ShouldEqual, "http://www.w3.org/ns/csvw")
Expand Down Expand Up @@ -79,10 +82,10 @@ func TestGenerate(t *testing.T) {
}

Convey("When the Generate csvw function is called", func() {
data, err := Generate(ctx, m, fileURL, fileURL, apiURL, externalPrefixURL)
data, err := Generate(ctx, m, fileURL, fileURL, apiURL, externalPrefixURL, filterOutputID, downloadServiceURL, isCustom)

Convey("Then results should be returned with no errors", func() {
So(data, ShouldHaveLength, 593)
So(data, ShouldHaveLength, 536)
So(err, ShouldBeNil)
})
})
Expand Down
6 changes: 3 additions & 3 deletions features/dp-cantabular-metadata-exporter.feature
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ Feature: Cantabular-Metadata-Exporter
"downloads": {
"CSVW": {
"href": "http://localhost:23600/downloads/datasets/cantabular-example-2/editions/2021/versions/1.csv-metadata.json",
"size": "1137",
"size": "1080",
"private": "http://minio:9000/dp-cantabular-metadata-exporter-priv/datasets/cantabular-example-2-2021-1.csvw"
},
"TXT": {
Expand Down Expand Up @@ -813,7 +813,7 @@ Feature: Cantabular-Metadata-Exporter
"downloads": {
"CSVW": {
"href": "http://localhost:23600/downloads/filter-outputs/filter-output-2.csv-metadata.json",
"size": "1137",
"size": "1080",
"private": "http://minio:9000/dp-cantabular-metadata-exporter-priv/datasets/filter-output-2/cantabular-example-2-2021-1-2022-01-26T12:27:04Z.csvw"
},
"TXT": {
Expand All @@ -833,7 +833,7 @@ Feature: Cantabular-Metadata-Exporter
"downloads": {
"CSVW": {
"href": "http://localhost:23600/downloads/datasets/cantabular-example-1/editions/2021/versions/1.csv-metadata.json",
"size": "1135",
"size": "1078",
"public": "http://public-bucket/datasets/cantabular-example-1-2021-1.csvw"
},
"TXT": {
Expand Down
2 changes: 1 addition & 1 deletion handler/cantabular_metadata_export.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ func (h *CantabularMetadataExport) exportCSVW(ctx context.Context, e *event.CSVC
downloadURL := h.generateDownloadURL(e, "csv-metadata.json")
aboutURL := h.dataset.GetMetadataURL(e.DatasetID, e.Edition, e.Version)

f, err := csvw.Generate(ctx, &m, downloadURL, aboutURL, h.apiDomainURL, h.cfg.ExternalPrefixURL)
f, err := csvw.Generate(ctx, &m, downloadURL, aboutURL, h.apiDomainURL, h.cfg.ExternalPrefixURL, e.FilterOutputID, h.cfg.DownloadServiceURL, isCustom)
if err != nil {
return nil, fmt.Errorf("failed to generate csvw: %w", err)
}
Expand Down