# Publications & Dataset: checking Licenses/Terms of use using the Data Citation Service

This notebook uses the Data Citation Service to check the **licenses** and **terms of use** of publication and datasets stored in SSHOMP having a _DOI_ or _Handle_ as PID.




## 0 Requirements to run this notebook

This section gives all the relevant information to "interact" with the MP data.

### 0.1 libraries
*There are a number of external libraries needed to run the notebook* 

*Furthermore, a dedicated SSH Open Marketplace library - sshmarketplacelib - with customised functions has been created and can be imported using the python import commands.* 

*Below the libraries import needed to run this notebook*

In [None]:
import numpy as np
import pandas as pd
import requests
#import the MarketPlace Library 
from sshmarketplacelib import MPData as mpd
from sshmarketplacelib import  eval as eva, helper as hel

### 0.2 Get the data



Get the MarketPlace data

In [None]:
mpdata = mpd()

df_publication_flat =mpdata.getMPItems ("publications", True)
df_datasets_flat =mpdata.getMPItems ("datasets", True)
df_data=pd.concat([df_publication_flat, df_datasets_flat])

Select all Publications/Datasets that have a _handle_ or a _doi_ as identifier

In [None]:
df_data['accessibleAt'] = [[n for n in l if ('doi' in n or 'handle' in n)] for l in df_data['accessibleAt']]
df_data = df_data[df_data['accessibleAt'].str.len().gt(0)]
df_data.reset_index(inplace=True)

    Iterate over the dataframe with the publications/datasets and search for citation metadata
    Inspect the retrieved metadata to search for licensing info
    Create a dataframe with resulting information

In [None]:
import json
import urllib.request
import urllib
d = []

for inde, row in df_data[0:100].iterrows():
        for aait in row.accessibleAt:
            inshomp=[]
            
            print (f"{aait}, {inde}")
            if (len(row.properties)>0):
                for prope in row.properties:
                    if ('concept' in prope):
                            
                        if (prope['type']['code']=='license' or 'terms-of-use' in prope['type']['code']):
                            inshomp.append({prope['type']['code']: prope['concept']['code']})
                            lno+=1
                        if ('value' in prope):
                            if ('terms-of-use' in prope['type']['code']):
                                inshomp.append({prope['type']['code']: prope['value']})
            
            # connect to Data Citation Service to search for metadata
            
            pid=urllib.parse.quote_plus(aait)
            turl='https://v4e-lab.isti.cnr.it/citationservice/citharvester/getcitationmetadata?pid='+pid+'&token=test'
                 
#                 print (turl)
            with urllib.request.urlopen(turl) as url:
                res_j=json.load(url)
            indcsrepo=[]
            indcsra=[]
            
            #search for license info in the result returned by the service
            
            if(res_j and 'jsonld_properties' in res_j):
                if ('license' in res_j['jsonld_properties']):
                    indcsrepo.append(res_j['jsonld_properties']['license'])
            if(res_j and 'ra_properties' in res_j):
                if ('copyright' in res_j['ra_properties']):
                    indcsra.append(res_j['ra_properties']['copyright'])
                if ('license' in res_j['ra_properties']):
                    indcsra.append(res_j['ra_properties']['license'])
            if(res_j and 'properties' in res_j):
                res_j['properties'] =  {k.lower(): v for k, v in res_j['properties'].items()}
                if ('dc.rights' in res_j['properties']):
                    indcsrepo.append(res_j['properties']['dc.rights'])
            
            if (len(indcsrepo)>0 or len(indcsra)>0):
                    d.append(
                                {
                                    'item': f"{mpdata.MPserver}/{row.category}/{row.persistentId}",
                                    'accessibleAt': aait,
                                    'license_in_sshomp': inshomp,
                                    'license_from_repository':  indcsrepo,
                                    'license_from_ra': indcsra
                                }
                    )
                      
res=pd.DataFrame(d)

In [None]:
res.head(10)

In [None]:
res.to_csv('data/licenses.csv')