diff --git a/libgin/datacite.go b/libgin/datacite.go index b27c1eb..8549b67 100644 --- a/libgin/datacite.go +++ b/libgin/datacite.go @@ -167,10 +167,11 @@ func NewDataCite() DataCite { } func parseAuthorID(authorID string) *NameIdentifier { - if authorID == "" { + lowerID := strings.ToLower(authorID) + if lowerID == "" || lowerID == "orcid:" || lowerID == "researcherid:" { return nil } - lowerID := strings.ToLower(authorID) + if strings.HasPrefix(lowerID, "orcid") { // four blocks of four numbers separated by dash; last character can be X // https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier @@ -266,25 +267,28 @@ func (dc *DataCite) AddFunding(fundstr string) { // AddReference is a convenience function for appending a RelatedIdentifier // that describes a referenced work. The RelatedIdentifier includes the -// identifier, relation type, and identifier type. A full citation string is -// also added to the Descriptions list. +// identifier, relation type, and identifier type. +// The RelatedIdentifier is not appended, if either identifier or relation type +// cannot be identified. +// A full citation string is also added to the Descriptions list. func (dc *DataCite) AddReference(ref *Reference) { // Add info as RelatedIdentifier refIDParts := strings.SplitN(ref.ID, ":", 2) var relIDType, relID string + // Only add a related identifier, if the type and id can be separated and exist + if len(refIDParts) == 2 { relIDType = strings.TrimSpace(refIDParts[0]) if ridt, ok := relIDTypeMap[strings.ToLower(relIDType)]; ok { relIDType = ridt } relID = strings.TrimSpace(refIDParts[1]) - } else { - // No colon, add to ID as is - relID = ref.ID - } - relatedIdentifier := RelatedIdentifier{Identifier: relID, Type: relIDType, RelationType: ref.RefType} - dc.RelatedIdentifiers = append(dc.RelatedIdentifiers, relatedIdentifier) + if relID != "" && relIDType != "" { + relatedIdentifier := RelatedIdentifier{Identifier: relID, Type: relIDType, RelationType: ref.RefType} + dc.RelatedIdentifiers = append(dc.RelatedIdentifiers, relatedIdentifier) + } + } // Add citation string as Description var namecitation string diff --git a/libgin/datacite_test.go b/libgin/datacite_test.go index f00b338..d028568 100644 --- a/libgin/datacite_test.go +++ b/libgin/datacite_test.go @@ -79,6 +79,8 @@ func Test_DataCiteMarshal(t *testing.T) { example.AddReference(&Reference{ID: "doi:10.1111/example.doi", RefType: "IsDescribedBy", Name: "Manuscript title for reference."}) example.AddReference(&Reference{ID: "arxiv:10.2222/example.doi", RefType: "IsSupplementTo", Name: "Some other work"}) example.AddReference(&Reference{ID: "doi:10.3333/example.doi", RefType: "IsReferencedBy", Name: "A work that references this dataset."}) + example.AddReference(&Reference{ID: "10.3333/example.doi", RefType: "IsReferencedBy", Name: "A reference without the reqired type - should not be added"}) + example.AddReference(&Reference{ID: "doi:", RefType: "IsReferencedBy", Name: "A reference without the reqired id - should not be added"}) _, err := xml.MarshalIndent(example, "", "\t") if err != nil { @@ -157,6 +159,14 @@ func Test_parseAuthorID(t *testing.T) { t.Fatal("Empty author ID should return nil") } + if ident := parseAuthorID("orcID:"); ident != nil { + t.Fatal("Stunted AuthorID 'orcID:' should return nil") + } + + if ident := parseAuthorID("researcherID:"); ident != nil { + t.Fatal("Stunted AuthorID 'researcherID:' should return nil") + } + validORCIDs := []string{ // valid, all 0s (different delimiters) "orcid.0000-0000-0000-0000", @@ -301,21 +311,6 @@ func Test_parseAuthorID(t *testing.T) { } -func Test_GetArchiveSize(t *testing.T) { - // URL is earliest archive with the new name format, so wont change. - // Older archives might be renamed to the new format soon. - const archiveURL = "https://doi.gin.g-node.org/10.12751/g-node.4bdb22/10.12751_g-node.4bdb22.zip" - const expSize = 1559190240 - size, err := GetArchiveSize(archiveURL) - if err != nil { - t.Fatalf("Failed to retrieve archive size for %q: %v", archiveURL, err.Error()) - } - - if size != expSize { - t.Fatalf("Incorrect archive size: %d (expected) != %d", expSize, size) - } -} - func Test_MarshalUnmarshal(t *testing.T) { example := NewDataCite() example.Creators = []Creator{ diff --git a/libgin/doi.go b/libgin/doi.go index 819ad63..d2ac4b2 100644 --- a/libgin/doi.go +++ b/libgin/doi.go @@ -14,6 +14,7 @@ import ( // RepositoryYAML is used to read the information provided by a GIN user // through the datacite.yml file. This data is usually used to populate the // DataCite and RepositoryMetadata types. +// This struct is used in the G-Node gin-doi project. type RepositoryYAML struct { Authors []Author `yaml:"authors"` Title string `yaml:"title"` @@ -26,6 +27,8 @@ type RepositoryYAML struct { ResourceType string `yaml:"resourcetype"` } +// Author holds information about a DOI Author. +// This struct is used in the G-Node gin-doi project. type Author struct { FirstName string `yaml:"firstname"` LastName string `yaml:"lastname"` @@ -33,11 +36,16 @@ type Author struct { ID string `yaml:"id,omitempty"` } +// License holds information about a DOI license. +// The struct is used in the G-Node gogs and gin-doi projects. type License struct { Name string `yaml:"name"` URL string `yaml:"url"` } +// Reference holds information about a DOI reference. +// The "Name" field has been deprecated. +// This struct is used in the G-Node gin-doi project. type Reference struct { ID string `yaml:"id,omitempty"` RefType string `yaml:"reftype,omitempty"` @@ -45,6 +53,8 @@ type Reference struct { Citation string `yaml:"citation,omitempty"` // meant to replace Name } +// GINUser holds basic information about a user on GIN. +// This struct is used in the G-Node gin-doi project. type GINUser struct { Username string Email string @@ -52,7 +62,8 @@ type GINUser struct { } // RepositoryMetadata can contain all known metadata for a registered (or -// to-be-registered) repository. To do this, it embeds the +// to-be-registered) repository. +// This struct is used in the G-Node gin-doi project. type RepositoryMetadata struct { // YAMLData is the original data coming from the repository YAMLData *RepositoryYAML @@ -78,6 +89,7 @@ type RepositoryMetadata struct { // This map is required because the current method of computing UUIDs differs // from the older method and this lookup is used to handle the old-method // UUIDs. +// This map is used in the G-Node gin-doi project. var UUIDMap = map[string]string{ "INT/multielectrode_grasp": "f83565d148510fede8a277f660e1a419", "ajkumaraswamy/HB-PAC_disinhibitory_network": "1090f803258557299d287c4d44a541b2", @@ -87,6 +99,7 @@ var UUIDMap = map[string]string{ } // RepoPathToUUID computes a UUID from a repository path. +// This function is used in the G-Node gogs project. func RepoPathToUUID(URI string) string { if doi, ok := UUIDMap[URI]; ok { return doi @@ -97,6 +110,7 @@ func RepoPathToUUID(URI string) string { // DOIRequestData is used to transmit data from GIN to DOI when a registration // request is triggered. +// This struct is used in the G-Node gogs and gin-doi projects. type DOIRequestData struct { Username string Realname string @@ -105,7 +119,8 @@ type DOIRequestData struct { } // DOIRegInfo holds all the metadata and information necessary for a DOI registration request. -// Deprecated and obsolete: Marked for removal +// Deprecated: Marked for removal +// This struct is used in the G-Node gogs project. type DOIRegInfo struct { Missing []string DOI string @@ -124,6 +139,9 @@ type DOIRegInfo struct { TemplateVersion string } +// GetType returns the ResourceType entry of a DOIRegInfo +// or the string "Dataset" if no ResourceType entry was found. +// This method is currently used in the G-Node gogs project. func (c *DOIRegInfo) GetType() string { if c.ResourceType != "" { return c.ResourceType @@ -131,6 +149,10 @@ func (c *DOIRegInfo) GetType() string { return "Dataset" } +// GetCitation returns a formatted string of a DOIRegInfo content +// containing Authors, Year, Title and DOI link. +// This method is currently not used in any project and should be +// considered deprecated. func (c *DOIRegInfo) GetCitation() string { var authors string for _, auth := range c.Authors { @@ -143,18 +165,28 @@ func (c *DOIRegInfo) GetCitation() string { return fmt.Sprintf("%s (%s) %s. G-Node. https://doi.org/%s", authors, c.Year(), c.Title, c.DOI) } +// Year is used in the unused GetCitation DOIRefInfo method +// and should be considered deprecated. func (c *DOIRegInfo) Year() string { return fmt.Sprintf("%d", c.DateTime.Year()) } +// ISODate is currently not used in any project and should be +// considered deprecated. func (c *DOIRegInfo) ISODate() string { return c.DateTime.Format("2006-01-02") } +// PrettyDate is currently not used in any project and should be +// considered deprecated. func PrettyDate(dt *time.Time) string { return dt.Format("02 Jan. 2006") } +// GetValidID returns a NamedIdentifier struct for an Author, if +// the Author.ID contains a valid ORCID entry. +// The Method is currently not used in any project and should be +// considered deprecated. func (c *Author) GetValidID() *NamedIdentifier { if c.ID == "" { return nil @@ -169,17 +201,30 @@ func (c *Author) GetValidID() *NamedIdentifier { } return nil } + +// RenderAuthor returns a string of the Author content in the format +// 'Lastname, Firstname; Affiliation; ID'. Empty entries are omitted. +// This method is used in the G-Node gogs project. func (a *Author) RenderAuthor() string { - auth := fmt.Sprintf("%s,%s;%s;%s", a.LastName, a.FirstName, a.Affiliation, a.ID) - return strings.Replace(strings.TrimRight(auth, ";"), ";;", ";", -1) + auth := fmt.Sprintf("%s, %s; %s; %s", a.LastName, a.FirstName, a.Affiliation, a.ID) + + return strings.Replace(strings.TrimRight(auth, "; "), "; ;", ";", -1) } +// NamedIdentifier is used in the unused GetValidID Author method +// and should be considered deprecated. type NamedIdentifier struct { SchemeURI string Scheme string ID string } +// GetURL splits the ID string of a Reference at the ":" char +// into prefix and value and returns a full URL dependent on +// the provided prefix. Supported prefixes are "doi", "archiv", +// "pmid" and "url". If no prefix can be identified, an empty +// string is returned. +// This method is used in the G-Node gin-doi project. func (ref Reference) GetURL() string { idparts := strings.SplitN(ref.ID, ":", 2) if len(idparts) != 2 { @@ -210,6 +255,10 @@ func (ref Reference) GetURL() string { return fmt.Sprintf("%s%s", prefix, idnum) } +// IsRegisteredDOI tries to http.Get a DOI via a provided +// DOI ID and returns a boolean value accoring to success +// or failure. +// This Function is used in the G-Node gin-doi and gogs projects. func IsRegisteredDOI(doi string) bool { url := fmt.Sprintf("https://doi.org/%s", doi) resp, err := http.Get(url) diff --git a/libgin/doi_test.go b/libgin/doi_test.go index 58323ff..d547118 100644 --- a/libgin/doi_test.go +++ b/libgin/doi_test.go @@ -2,6 +2,7 @@ package libgin import ( "fmt" + "strings" "testing" ) @@ -22,6 +23,22 @@ func TestRepoPathToUUID(t *testing.T) { } } +func TestGetType(t *testing.T) { + var res = DOIRegInfo{} + + check := res.GetType() + if check != "Dataset" { + t.Fatalf("Expected 'Dataset' but got %q", check) + } + + val := "Datapaper" + res.ResourceType = val + check = res.GetType() + if check != val { + t.Fatalf("Expected %q but got %q", val, check) + } +} + func TestAuthor(t *testing.T) { lname := "ln" fname := "fn" @@ -39,7 +56,7 @@ func TestAuthor(t *testing.T) { // Test RenderAuthor; make all expected combinations explicit // No omit test - validate = fmt.Sprintf("%s,%s;%s;%s", lname, fname, aff, id) + validate = fmt.Sprintf("%s, %s; %s; %s", lname, fname, aff, id) auth = Author{ FirstName: fname, @@ -50,7 +67,7 @@ func TestAuthor(t *testing.T) { check("No omit") // Omit ID test - validate = fmt.Sprintf("%s,%s;%s", lname, fname, aff) + validate = fmt.Sprintf("%s, %s; %s", lname, fname, aff) auth = Author{ FirstName: fname, @@ -60,7 +77,7 @@ func TestAuthor(t *testing.T) { check("Omit ID") // Omit affiliation test - validate = fmt.Sprintf("%s,%s;%s", lname, fname, id) + validate = fmt.Sprintf("%s, %s; %s", lname, fname, id) auth = Author{ FirstName: fname, @@ -70,7 +87,7 @@ func TestAuthor(t *testing.T) { check("Omit affiliation") // Omit ID and affiliation test - validate = fmt.Sprintf("%s,%s", lname, fname) + validate = fmt.Sprintf("%s, %s", lname, fname) auth = Author{ FirstName: fname, @@ -78,3 +95,78 @@ func TestAuthor(t *testing.T) { } check("Omit ID/affiliation") } + +func TestGetURL(t *testing.T) { + // check empty or malformed ID + var ref = Reference{} + + checkstr := ref.GetURL() + if checkstr != "" { + t.Fatalf("Expected empty URL on empty ID but got %q", checkstr) + } + ref.ID = "IamNotOK" + checkstr = ref.GetURL() + if checkstr != "" { + t.Fatalf("Expected empty URL on invalid ID but got %q", checkstr) + } + + // check empty on empty or unknown prefix + ref.ID = ":value" + checkstr = ref.GetURL() + if checkstr != "" { + t.Fatalf("Expected empty URL on missing prefix but got %q", checkstr) + } + ref.ID = "you:dontKnowMe" + checkstr = ref.GetURL() + if checkstr != "" { + t.Fatalf("Expected empty URL on unknown ID prefix but got %q", checkstr) + } + + // check url prefix behavior + idval := "reference" + ref.ID = fmt.Sprintf("uRl:%s", idval) + checkstr = ref.GetURL() + if checkstr != idval { + t.Fatalf("Expected %q on URL prefix but got %q", idval, checkstr) + } + + // check doi prefix behavior + ref.ID = fmt.Sprintf("DOI:%s", idval) + resstr := fmt.Sprintf("doi.org/%s", idval) + checkstr = ref.GetURL() + if !strings.Contains(checkstr, resstr) { + t.Fatalf("Got unexpected DOI URL string %q", checkstr) + } + + // check arxiv prefix behavior + ref.ID = fmt.Sprintf("arXiv:%s", idval) + checkstr = ref.GetURL() + resstr = fmt.Sprintf("arxiv.org/abs/%s", idval) + if !strings.Contains(checkstr, resstr) { + t.Fatalf("Got unexpected arxiv URL string %q", checkstr) + } + + // check pmid prefix behavior + ref.ID = fmt.Sprintf("pmID:%s", idval) + checkstr = ref.GetURL() + resstr = fmt.Sprintf("www.ncbi.nlm.nih.gov/pubmed/%s", idval) + if !strings.Contains(checkstr, resstr) { + t.Fatalf("Got unexpected pmid URL string %q", checkstr) + } +} + +func TestIsRegisteredDOI(t *testing.T) { + invalid := "idonotexist" + valid := "10.12751/g-node.5b08du" + + // check false on non-existing DOI + ok := IsRegisteredDOI(invalid) + if ok { + t.Fatal("Expected check to fail on invalid DOI") + } + + ok = IsRegisteredDOI(valid) + if !ok { + t.Fatal("Expected check to succeed on valid DOI") + } +} diff --git a/libgin/util.go b/libgin/util.go index 20842f6..a07981a 100644 --- a/libgin/util.go +++ b/libgin/util.go @@ -30,14 +30,14 @@ func ReadConf(key string) string { func GetArchiveSize(archiveURL string) (uint64, error) { resp, err := http.Get(archiveURL) if err != nil { - return 0, fmt.Errorf("Request for archive %q failed: %s\n", archiveURL, err.Error()) + return 0, fmt.Errorf("request for archive %q failed: %s", archiveURL, err.Error()) } if resp.StatusCode != http.StatusOK { - return 0, fmt.Errorf("Request for archive %q failed: %s\n", archiveURL, resp.Status) + return 0, fmt.Errorf("request for archive %q failed: %s", archiveURL, resp.Status) } if resp.ContentLength < 0 { // returns -1 when size is unknown; let's turn it into an error - return 0, fmt.Errorf("Unable to determine size of %q", archiveURL) + return 0, fmt.Errorf("unable to determine size of %q", archiveURL) } return uint64(resp.ContentLength), nil } diff --git a/libgin/util_test.go b/libgin/util_test.go new file mode 100644 index 0000000..b8f7761 --- /dev/null +++ b/libgin/util_test.go @@ -0,0 +1,61 @@ +package libgin + +import ( + "os" + "testing" +) + +func Test_ReadConfDefault(t *testing.T) { + oskey := "test" + val := "tmp" + defval := "default" + + testval := ReadConfDefault(oskey, defval) + if testval != defval { + t.Fatalf("Expected default value %q but got %q", defval, testval) + } + + os.Setenv(oskey, val) + testval = ReadConfDefault(oskey, defval) + if testval != val { + t.Fatalf("Expected default value %q but got %q", val, testval) + } +} + +func Test_ReadConf(t *testing.T) { + oskey := "test" + val := "tmp" + os.Setenv(oskey, val) + + testval := ReadConf(oskey) + if val != testval { + t.Fatal("Could not read environment variable") + } +} + +func Test_GetArchiveSize(t *testing.T) { + // URL is earliest archive with the new name format, so wont change. + // Older archives might be renamed to the new format soon. + const archiveURL = "https://doi.gin.g-node.org/10.12751/g-node.4bdb22/10.12751_g-node.4bdb22.zip" + const expSize = 1559190240 + size, err := GetArchiveSize(archiveURL) + if err != nil { + t.Fatalf("Failed to retrieve archive size for %q: %v", archiveURL, err.Error()) + } + + if size != expSize { + t.Fatalf("Incorrect archive size: %d (expected) != %d", expSize, size) + } + + // Check status not ok + _, err = GetArchiveSize("https://doi.gin.g-node.org/idonotexist") + if err == nil { + t.Fatalf("Expected error on invalid URL") + } + + // Check fail + _, err = GetArchiveSize("I do not exist") + if err == nil { + t.Fatal("Expected error on non URL") + } +}