### Implementation of [FAIRsoft evaluation tool](https://openebench.bsc.es/observatory/Evaluation/) on CSDMS model repository.

This notebook is intended to provide an example adaptation of the FAIRsoft evaluation workflow using the Observatory API and a custom GitHub metadta extraction routine to evaluate the FAIRsoft scores of research software in the CSDMS model repository.

In [None]:
# import libaries to handle making Observatory API requests
import evaluation_client
import jsonld_client
import cff_client
# import custom libary for extracting GitHub metadata and mapping to observatory metadata schema
import get_repository_metadata
# import custom library for extracting GitHub URLs and metadata from CSDMS model repo
import collect_csdms_urls
# import standard Python libraries
import pandas as pd
import matplotlib.pyplot as plt
import requests

The GitHub repository URLs of the research software from CSDMS can now be imported (this step will be scripted with API query later):

In [2]:
df_csdms_repos = pd.read_csv('241113_csdms_repos.csv')
github_urls = df_csdms_repos['Repository URL']
print(github_urls)

0      https://github.com/anyadoc/MOOvCWD_InfectionMo...
1      https://github.com/rossgore/ABM_Ethnocentrism_...
2      https://github.com/SOSIEL/The-SOSIEL-Platform.git
3               https://github.com/efyoungud/stationfire
4       https://github.com/mathieuBourgais/KissNightClub
                             ...                        
192    https://git.wur.nl/Oort001/darts/-/tags/DARTSS...
193                 https://github.com/ateeuw/3spire_GSA
194              https://github.com/doodem/majorincident
195    https://github.com/danielTorren/The_cultural_m...
196                https://github.com/isaacullah/AgModel
Name: Repository URL, Length: 197, dtype: object


The GitHub repository URLs can be input into the FAIRsoft evaluation workflow:

In [3]:
# initalize dataframe for FAIRsoft scores
indicators = [
              'F1','F2','F3',
              'A1','A2','A3',
              'I1','I2','I3',
              'R1','R2','R3','R4'
]
df_scores = pd.DataFrame(columns=indicators, dtype='object')

for url in github_urls:
    try:
        # The GitHub repository URL can be input into the routine for metadata extraction and mapping to the observatory metadata schema
        metadata = get_repository_metadata.main(url)
        # Run scores and get evaluation results
        fairsoft_scores,_ = evaluation_client.get_fairsoft_scores_and_evaluation(metadata)
        # Return scores and turn into dataframe
        df = pd.DataFrame([{indicator: fairsoft_scores[indicator] for indicator in indicators}], index=[url], dtype='object')
    except:
        # Report scores as none if the evaluation did not proceed
        df = pd.DataFrame([{indicator: None for indicator in indicators}], index=[url], dtype='object')
        print(url)
        #break
    # add row to summary dataframe
    df_scores = pd.concat([df_scores,df])


# save scores to file
df_scores.index.name = 'GitHub URL'
df_scores.to_csv('fairsoft_csdms_evaluation.csv')
print(df_scores)

https://github.com/SOSIEL/The-SOSIEL-Platform.git
                                                     F1   F2   F3   A1   A2  \
GitHub URL                                                                    
https://github.com/anyadoc/MOOvCWD_InfectionMod...  0.8  0.6  0.7  0.0  0.0   
https://github.com/rossgore/ABM_Ethnocentrism_I...  0.8  0.6  0.7  0.0  0.0   

                                                     A3   I1   I2   I3   R1  \
GitHub URL                                                                    
https://github.com/anyadoc/MOOvCWD_InfectionMod...  0.0  0.0  0.0  0.0  0.0   
https://github.com/rossgore/ABM_Ethnocentrism_I...  0.0  0.0  0.0  0.0  0.0   

                                                     R2   R3   R4  
GitHub URL                                                         
https://github.com/anyadoc/MOOvCWD_InfectionMod...  0.0  1.0  0.0  
https://github.com/rossgore/ABM_Ethnocentrism_I...  0.0  1.0  0.0  


In [3]:
# initalize dataframe for FAIRsoft scores
indicators = [
              'F1','F2','F3',
              'A1','A2','A3',
              'I1','I2','I3',
              'R1','R2','R3','R4'
]
df_scores = pd.DataFrame(columns=indicators, dtype='object')

for url in github_urls:
    try:
        # The GitHub repository URL can be input into the routine for metadata extraction and mapping to the observatory metadata schema
        metadata = get_repository_metadata.main(url)
        # Run scores and get evaluation results
        fairsoft_scores,_ = evaluation_client.get_fairsoft_scores_and_evaluation(metadata)
        # Return scores and turn into dataframe
        df = pd.DataFrame([{indicator: fairsoft_scores[indicator] for indicator in indicators}], index=[url], dtype='object')
    except:
        # Report scores as none if the evaluation did not proceed
        df = pd.DataFrame([{indicator: None for indicator in indicators}], index=[url], dtype='object')
        print(url)
        break
    # add row to summary dataframe
    df_scores = pd.concat([df_scores,df])
    # print(df_scores)


# save scores to file
df_scores.index.name = 'GitHub URL'
df_scores.to_csv('fairsoft_csdms_evaluation.csv')
print(df_scores)

https://github.com/SOSIEL/The-SOSIEL-Platform.git
                                                     F1   F2   F3   A1   A2  \
GitHub URL                                                                    
https://github.com/anyadoc/MOOvCWD_InfectionMod...  0.8  0.6  0.7  0.0  0.0   
https://github.com/rossgore/ABM_Ethnocentrism_I...  0.8  0.6  0.7  0.0  0.0   

                                                     A3   I1   I2   I3   R1  \
GitHub URL                                                                    
https://github.com/anyadoc/MOOvCWD_InfectionMod...  0.0  0.0  0.0  0.0  0.0   
https://github.com/rossgore/ABM_Ethnocentrism_I...  0.0  0.0  0.0  0.0  0.0   

                                                     R2   R3   R4  
GitHub URL                                                         
https://github.com/anyadoc/MOOvCWD_InfectionMod...  0.0  1.0  0.0  
https://github.com/rossgore/ABM_Ethnocentrism_I...  0.0  1.0  0.0  


In [7]:
url = 'https://github.com/SOSIEL/The-SOSIEL-Platform'

# The GitHub repository URL can be input into the routine for metadata extraction and mapping to the observatory metadata schema
metadata = get_repository_metadata.main(url)
# Run scores and get evaluation result
#fairsoft_scores,_ = evaluation_client.get_fairsoft_scores_and_evaluation(metadata)

In [8]:
metadata

{'name': 'Algorithm-SOSIEL',
 'label': ['Algorithm-SOSIEL'],
 'description': [{'term': "A repository with the SOSIEL (Self-Organizing Social & Inductive Evolutionary Learning) Algorithm's source files.",
   'id': 0}],
 'links': [],
 'webpage': [{'term': '', 'id': 0}],
 'isDisabled': False,
 'isEmpty': False,
 'isLocked': False,
 'isPrivate': False,
 'isTemplate': False,
 'license': [{'term': {'name': 'GNU Lesser General Public License v3.0',
    'url': 'http://choosealicense.com/licenses/lgpl-3.0/'},
   'id': 0}],
 'repository': [{'term': 'https://github.com/SOSIEL/Algorithm-SOSIEL',
   'id': 0}],
 'topics': [],
 'operations': [],
 'authors': [{'term': {'name': 'dependabot[bot]',
    'type': 'person',
    'email': '49699333+dependabot[bot]@users.noreply.github.com',
    'maintainer': False},
   'id': 0},
  {'term': {'name': 'Ivan Pizhenko',
    'type': 'person',
    'email': '11859904+IvanPizhenko@users.noreply.github.com',
    'maintainer': False},
   'id': 1},
  {'term': {'name': 'Iv

In [8]:
metadata

{'name': 'Algorithm-SOSIEL',
 'label': ['Algorithm-SOSIEL'],
 'description': [{'term': "A repository with the SOSIEL (Self-Organizing Social & Inductive Evolutionary Learning) Algorithm's source files.",
   'id': 0}],
 'links': [],
 'webpage': [{'term': '', 'id': 0}],
 'isDisabled': False,
 'isEmpty': False,
 'isLocked': False,
 'isPrivate': False,
 'isTemplate': False,
 'license': [{'term': {'name': 'GNU Lesser General Public License v3.0',
    'url': 'http://choosealicense.com/licenses/lgpl-3.0/'},
   'id': 0}],
 'repository': [{'term': 'https://github.com/SOSIEL/Algorithm-SOSIEL',
   'id': 0}],
 'topics': [],
 'operations': [],
 'authors': [{'term': {'name': 'dependabot[bot]',
    'type': 'person',
    'email': '49699333+dependabot[bot]@users.noreply.github.com',
    'maintainer': False},
   'id': 0},
  {'term': {'name': 'Ivan Pizhenko',
    'type': 'person',
    'email': '11859904+IvanPizhenko@users.noreply.github.com',
    'maintainer': False},
   'id': 1},
  {'term': {'name': 'Iv

In [10]:
evaluation_client.get_fairsoft_scores_and_evaluation(metadata)

HTTP error occurred: 400 Client Error: Bad Request for url: https://observatory.openebench.bsc.es/api/fair/evaluate


In [13]:
if 'version' in metadata:
    del metadata['version']

endpoint_url = 'https://observatory.openebench.bsc.es/api/fair/evaluate'

# define payload and content type headers
payload = {
    'tool_metadata': metadata
}
headers = {
    'Content-Type': 'application/json'
}


# get the response
response = requests.post(endpoint_url,json=payload,headers=headers)
response.raise_for_status()

if response.status_code == 200:
    # extract the json response
    json_response = response.json()

    if json_response['result'] is not None:
        # retrieve the results and logs
        results = json_response['result']
        logs = json_response['logs']
        return results,logs
    
    else:
        print('Error in obtaining FAIRsoft evaluation results.')


HTTPError: 400 Client Error: Bad Request for url: https://observatory.openebench.bsc.es/api/fair/evaluate

In [14]:
response

<Response [400]>

In [15]:
response.text

'{"error":"Instance creation failed: 2 validation errors for Instance\\nauthors.0.email\\n  value is not a valid email address: The email address contains invalid characters before the @-sign: \'[\', \']\'. [type=value_error, input_value=\'49699333+dependabot[bot]...sers.noreply.github.com\', input_type=str]\\nwebpage.0\\n  Input should be a valid URL, input is empty [type=url_parsing, input_value=\'\', input_type=str]\\n    For further information visit https://errors.pydantic.dev/2.8/v/url_parsing"}'

In [23]:
metadata_2 = {'name': 'Algorithm-SOSIEL',
 'label': ['Algorithm-SOSIEL'],
 'description': [{'term': "A repository with the SOSIEL (Self-Organizing Social & Inductive Evolutionary Learning) Algorithm's source files.",
   'id': 0}],
 'links': [],
 'webpage': [{'term': None, 'id': 0}],
 'isDisabled': False,
 'isEmpty': False,
 'isLocked': False,
 'isPrivate': False,
 'isTemplate': False,
 'license': [{'term': {'name': 'GNU Lesser General Public License v3.0',
    'url': 'http://choosealicense.com/licenses/lgpl-3.0/'},
   'id': 0}],
 'repository': [{'term': 'https://github.com/SOSIEL/Algorithm-SOSIEL',
   'id': 0}],
 'topics': [],
 'operations': [],
 'authors': [
  {'term': {'name': 'Ivan Pizhenko',
    'type': 'person',
    'email': '11859904+IvanPizhenko@users.noreply.github.com',
    'maintainer': False},
   'id': 1},
  {'term': {'name': 'Ivan Pizhenko',
    'type': 'person',
    'email': 'IvanPizhenko@users.noreply.github.com',
    'maintainer': False},
   'id': 2},
  {'term': {'name': 'Garry Sotnik',
    'type': 'person',
    'email': 'gsotnik@gmail.com',
    'maintainer': False},
   'id': 3},
  {'term': {'name': 'Garry Sotnik',
    'type': 'person',
    'email': 'gsotnik@users.noreply.github.com',
    'maintainer': False},
   'id': 4},
  {'term': {'name': 'Eugene Lobach',
    'type': 'person',
    'email': 'elobach@gmail.com',
    'maintainer': False},
   'id': 5},
  {'term': {'name': 'aleksandr.karifanov',
    'type': 'person',
    'email': 'aleksandr.karifanov@effective-soft.com',
    'maintainer': False},
   'id': 6},
  {'term': {'name': 'Vadim Moskvin',
    'type': 'person',
    'email': 'vadim.moskvin@yandex.by',
    'maintainer': False},
   'id': 7},
  {'term': {'name': 'Petr Sobolev',
    'type': 'person',
    'email': 'petrsobolev1432@gmail.com',
    'maintainer': False},
   'id': 8}],
 'bioschemas': False,
 'contribPolicy': [],
 'dependencies': [],
 'documentation': [],
 'download': [],
 'edam_operations': [],
 'edam_topics': [],
 'https': True,
 'input': [],
 'inst_instr': False,
 'operational': False,
 'os': [],
 'output': [],
 'publication': [],
 'semantics': {'inputs': [], 'outputs': [], 'topics': [], 'operations': []},
 'source': ['github'],
 'src': [],
 'ssl': True,
 'tags': [],
 'test': [],
 'type': ''}

In [24]:
if 'version' in metadata_2:
    del metadata_2['version']

endpoint_url = 'https://observatory.openebench.bsc.es/api/fair/evaluate'

# define payload and content type headers
payload = {
    'tool_metadata': metadata_2
}
headers = {
    'Content-Type': 'application/json'
}


# get the response
response = requests.post(endpoint_url,json=payload,headers=headers)
response.raise_for_status()

if response.status_code == 200:
    # extract the json response
    json_response = response.json()

    if json_response['result'] is not None:
        # retrieve the results and logs
        results = json_response['result']
        logs = json_response['logs']
        return results,logs
    
    else:
        print('Error in obtaining FAIRsoft evaluation results.')

HTTPError: 400 Client Error: Bad Request for url: https://observatory.openebench.bsc.es/api/fair/evaluate

In [25]:
response.text

'{"error":"Instance creation failed: 1 validation error for Instance\\nwebpage.0\\n  URL input should be a string or URL [type=url_type, input_value=None, input_type=NoneType]\\n    For further information visit https://errors.pydantic.dev/2.8/v/url_type"}'