Skip to content

Commit

Permalink
Merge pull request #29 from mpsonntag/orcid
Browse files Browse the repository at this point in the history
Minor changes in datacite handling
  • Loading branch information
achilleas-k committed Mar 22, 2021
2 parents b377a12 + bdce4c3 commit f776c7c
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 36 deletions.
24 changes: 14 additions & 10 deletions libgin/datacite.go
Expand Up @@ -167,10 +167,11 @@ func NewDataCite() DataCite {
}

func parseAuthorID(authorID string) *NameIdentifier {
if authorID == "" {
lowerID := strings.ToLower(authorID)
if lowerID == "" || lowerID == "orcid:" || lowerID == "researcherid:" {
return nil
}
lowerID := strings.ToLower(authorID)

if strings.HasPrefix(lowerID, "orcid") {
// four blocks of four numbers separated by dash; last character can be X
// https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier
Expand Down Expand Up @@ -266,25 +267,28 @@ func (dc *DataCite) AddFunding(fundstr string) {

// AddReference is a convenience function for appending a RelatedIdentifier
// that describes a referenced work. The RelatedIdentifier includes the
// identifier, relation type, and identifier type. A full citation string is
// also added to the Descriptions list.
// identifier, relation type, and identifier type.
// The RelatedIdentifier is not appended, if either identifier or relation type
// cannot be identified.
// A full citation string is also added to the Descriptions list.
func (dc *DataCite) AddReference(ref *Reference) {
// Add info as RelatedIdentifier
refIDParts := strings.SplitN(ref.ID, ":", 2)
var relIDType, relID string
// Only add a related identifier, if the type and id can be separated and exist

if len(refIDParts) == 2 {
relIDType = strings.TrimSpace(refIDParts[0])
if ridt, ok := relIDTypeMap[strings.ToLower(relIDType)]; ok {
relIDType = ridt
}
relID = strings.TrimSpace(refIDParts[1])
} else {
// No colon, add to ID as is
relID = ref.ID
}

relatedIdentifier := RelatedIdentifier{Identifier: relID, Type: relIDType, RelationType: ref.RefType}
dc.RelatedIdentifiers = append(dc.RelatedIdentifiers, relatedIdentifier)
if relID != "" && relIDType != "" {
relatedIdentifier := RelatedIdentifier{Identifier: relID, Type: relIDType, RelationType: ref.RefType}
dc.RelatedIdentifiers = append(dc.RelatedIdentifiers, relatedIdentifier)
}
}

// Add citation string as Description
var namecitation string
Expand Down
25 changes: 10 additions & 15 deletions libgin/datacite_test.go
Expand Up @@ -79,6 +79,8 @@ func Test_DataCiteMarshal(t *testing.T) {
example.AddReference(&Reference{ID: "doi:10.1111/example.doi", RefType: "IsDescribedBy", Name: "Manuscript title for reference."})
example.AddReference(&Reference{ID: "arxiv:10.2222/example.doi", RefType: "IsSupplementTo", Name: "Some other work"})
example.AddReference(&Reference{ID: "doi:10.3333/example.doi", RefType: "IsReferencedBy", Name: "A work that references this dataset."})
example.AddReference(&Reference{ID: "10.3333/example.doi", RefType: "IsReferencedBy", Name: "A reference without the reqired type - should not be added"})
example.AddReference(&Reference{ID: "doi:", RefType: "IsReferencedBy", Name: "A reference without the reqired id - should not be added"})

_, err := xml.MarshalIndent(example, "", "\t")
if err != nil {
Expand Down Expand Up @@ -157,6 +159,14 @@ func Test_parseAuthorID(t *testing.T) {
t.Fatal("Empty author ID should return nil")
}

if ident := parseAuthorID("orcID:"); ident != nil {
t.Fatal("Stunted AuthorID 'orcID:' should return nil")
}

if ident := parseAuthorID("researcherID:"); ident != nil {
t.Fatal("Stunted AuthorID 'researcherID:' should return nil")
}

validORCIDs := []string{
// valid, all 0s (different delimiters)
"orcid.0000-0000-0000-0000",
Expand Down Expand Up @@ -301,21 +311,6 @@ func Test_parseAuthorID(t *testing.T) {

}

func Test_GetArchiveSize(t *testing.T) {
// URL is earliest archive with the new name format, so wont change.
// Older archives might be renamed to the new format soon.
const archiveURL = "https://doi.gin.g-node.org/10.12751/g-node.4bdb22/10.12751_g-node.4bdb22.zip"
const expSize = 1559190240
size, err := GetArchiveSize(archiveURL)
if err != nil {
t.Fatalf("Failed to retrieve archive size for %q: %v", archiveURL, err.Error())
}

if size != expSize {
t.Fatalf("Incorrect archive size: %d (expected) != %d", expSize, size)
}
}

func Test_MarshalUnmarshal(t *testing.T) {
example := NewDataCite()
example.Creators = []Creator{
Expand Down
57 changes: 53 additions & 4 deletions libgin/doi.go
Expand Up @@ -14,6 +14,7 @@ import (
// RepositoryYAML is used to read the information provided by a GIN user
// through the datacite.yml file. This data is usually used to populate the
// DataCite and RepositoryMetadata types.
// This struct is used in the G-Node gin-doi project.
type RepositoryYAML struct {
Authors []Author `yaml:"authors"`
Title string `yaml:"title"`
Expand All @@ -26,33 +27,43 @@ type RepositoryYAML struct {
ResourceType string `yaml:"resourcetype"`
}

// Author holds information about a DOI Author.
// This struct is used in the G-Node gin-doi project.
type Author struct {
FirstName string `yaml:"firstname"`
LastName string `yaml:"lastname"`
Affiliation string `yaml:"affiliation,omitempty"`
ID string `yaml:"id,omitempty"`
}

// License holds information about a DOI license.
// The struct is used in the G-Node gogs and gin-doi projects.
type License struct {
Name string `yaml:"name"`
URL string `yaml:"url"`
}

// Reference holds information about a DOI reference.
// The "Name" field has been deprecated.
// This struct is used in the G-Node gin-doi project.
type Reference struct {
ID string `yaml:"id,omitempty"`
RefType string `yaml:"reftype,omitempty"`
Name string `yaml:"name,omitempty"` // deprecated, but still read for older versions
Citation string `yaml:"citation,omitempty"` // meant to replace Name
}

// GINUser holds basic information about a user on GIN.
// This struct is used in the G-Node gin-doi project.
type GINUser struct {
Username string
Email string
RealName string
}

// RepositoryMetadata can contain all known metadata for a registered (or
// to-be-registered) repository. To do this, it embeds the
// to-be-registered) repository.
// This struct is used in the G-Node gin-doi project.
type RepositoryMetadata struct {
// YAMLData is the original data coming from the repository
YAMLData *RepositoryYAML
Expand All @@ -78,6 +89,7 @@ type RepositoryMetadata struct {
// This map is required because the current method of computing UUIDs differs
// from the older method and this lookup is used to handle the old-method
// UUIDs.
// This map is used in the G-Node gin-doi project.
var UUIDMap = map[string]string{
"INT/multielectrode_grasp": "f83565d148510fede8a277f660e1a419",
"ajkumaraswamy/HB-PAC_disinhibitory_network": "1090f803258557299d287c4d44a541b2",
Expand All @@ -87,6 +99,7 @@ var UUIDMap = map[string]string{
}

// RepoPathToUUID computes a UUID from a repository path.
// This function is used in the G-Node gogs project.
func RepoPathToUUID(URI string) string {
if doi, ok := UUIDMap[URI]; ok {
return doi
Expand All @@ -97,6 +110,7 @@ func RepoPathToUUID(URI string) string {

// DOIRequestData is used to transmit data from GIN to DOI when a registration
// request is triggered.
// This struct is used in the G-Node gogs and gin-doi projects.
type DOIRequestData struct {
Username string
Realname string
Expand All @@ -105,7 +119,8 @@ type DOIRequestData struct {
}

// DOIRegInfo holds all the metadata and information necessary for a DOI registration request.
// Deprecated and obsolete: Marked for removal
// Deprecated: Marked for removal
// This struct is used in the G-Node gogs project.
type DOIRegInfo struct {
Missing []string
DOI string
Expand All @@ -124,13 +139,20 @@ type DOIRegInfo struct {
TemplateVersion string
}

// GetType returns the ResourceType entry of a DOIRegInfo
// or the string "Dataset" if no ResourceType entry was found.
// This method is currently used in the G-Node gogs project.
func (c *DOIRegInfo) GetType() string {
if c.ResourceType != "" {
return c.ResourceType
}
return "Dataset"
}

// GetCitation returns a formatted string of a DOIRegInfo content
// containing Authors, Year, Title and DOI link.
// This method is currently not used in any project and should be
// considered deprecated.
func (c *DOIRegInfo) GetCitation() string {
var authors string
for _, auth := range c.Authors {
Expand All @@ -143,18 +165,28 @@ func (c *DOIRegInfo) GetCitation() string {
return fmt.Sprintf("%s (%s) %s. G-Node. https://doi.org/%s", authors, c.Year(), c.Title, c.DOI)
}

// Year is used in the unused GetCitation DOIRefInfo method
// and should be considered deprecated.
func (c *DOIRegInfo) Year() string {
return fmt.Sprintf("%d", c.DateTime.Year())
}

// ISODate is currently not used in any project and should be
// considered deprecated.
func (c *DOIRegInfo) ISODate() string {
return c.DateTime.Format("2006-01-02")
}

// PrettyDate is currently not used in any project and should be
// considered deprecated.
func PrettyDate(dt *time.Time) string {
return dt.Format("02 Jan. 2006")
}

// GetValidID returns a NamedIdentifier struct for an Author, if
// the Author.ID contains a valid ORCID entry.
// The Method is currently not used in any project and should be
// considered deprecated.
func (c *Author) GetValidID() *NamedIdentifier {
if c.ID == "" {
return nil
Expand All @@ -169,17 +201,30 @@ func (c *Author) GetValidID() *NamedIdentifier {
}
return nil
}

// RenderAuthor returns a string of the Author content in the format
// 'Lastname, Firstname; Affiliation; ID'. Empty entries are omitted.
// This method is used in the G-Node gogs project.
func (a *Author) RenderAuthor() string {
auth := fmt.Sprintf("%s,%s;%s;%s", a.LastName, a.FirstName, a.Affiliation, a.ID)
return strings.Replace(strings.TrimRight(auth, ";"), ";;", ";", -1)
auth := fmt.Sprintf("%s, %s; %s; %s", a.LastName, a.FirstName, a.Affiliation, a.ID)

return strings.Replace(strings.TrimRight(auth, "; "), "; ;", ";", -1)
}

// NamedIdentifier is used in the unused GetValidID Author method
// and should be considered deprecated.
type NamedIdentifier struct {
SchemeURI string
Scheme string
ID string
}

// GetURL splits the ID string of a Reference at the ":" char
// into prefix and value and returns a full URL dependent on
// the provided prefix. Supported prefixes are "doi", "archiv",
// "pmid" and "url". If no prefix can be identified, an empty
// string is returned.
// This method is used in the G-Node gin-doi project.
func (ref Reference) GetURL() string {
idparts := strings.SplitN(ref.ID, ":", 2)
if len(idparts) != 2 {
Expand Down Expand Up @@ -210,6 +255,10 @@ func (ref Reference) GetURL() string {
return fmt.Sprintf("%s%s", prefix, idnum)
}

// IsRegisteredDOI tries to http.Get a DOI via a provided
// DOI ID and returns a boolean value accoring to success
// or failure.
// This Function is used in the G-Node gin-doi and gogs projects.
func IsRegisteredDOI(doi string) bool {
url := fmt.Sprintf("https://doi.org/%s", doi)
resp, err := http.Get(url)
Expand Down

0 comments on commit f776c7c

Please sign in to comment.