# Imports

In [22]:
import numpy as np
import pandas as pd

# Fetching data

In [1]:
# Function to extract values from the downloaded entries
def extract_values(entry):
    try:
        htl.append(entry['results']['properties']['optoelectronic']['solar_cell']['hole_transport_layer'])
    except:
        htl.append('None')
    try:
        etl.append(entry['results']['properties']['optoelectronic']['solar_cell']['electron_transport_layer'])
    except:
        etl.append('None')
    try:
        pce.append(entry['results']['properties']['optoelectronic']['solar_cell']['efficiency'])
    except:
        pce.append('None')
    return htl, etl, pce

In [2]:
# gets the data from NOMAD

import requests

htl = []
etl = []
pce = []
page_after_value = None
base_url = 'https://nomad-lab.eu/prod/v1/api/v1/'

while True:
    data = requests.post(f'{base_url}entries/query', json={
        "owner": "visible",
        "aggregations": {},
        "query": {
            "and": [
                {"results.material.elements:all": ["*"]}, 
                {"sections:all": ["nomad.datamodel.results.SolarCell"]},
                ]},
        "required": {
            "results":{
                "material": {
                    "chemical_formula_reduced":"*",
                    "structural_type":"*"},
                "properties": {
                   "optoelectronic":{
                      "band_gap":"*",
                      "solar_cell":{
                          "open_circuit_voltage":"*",
                          "short_circuit_current_density":"*",
                          "fill_factor":"*",
                          "efficiency":"*",
                          }}},},
        },
        "pagination": {"page_size": 10,
                       "page_after_value": page_after_value}
        }).json()
    if not data['data']:
        break
    if 'next_page_after_value' not in data['pagination'].keys():
      # make sure to grasp the entries of the last page before breaking
      for entry in data['data']:
        if 'results' not in entry.keys():
          continue
        elif 'chemical_formula_reduced' not in entry['results']['material'].keys():
          continue
        else:
          extract_values(entry)
      break
    page_after_value = data['pagination']['next_page_after_value']

    for entry in data['data']:
      if 'results' not in entry.keys():
        continue
      elif 'chemical_formula_reduced' not in entry['results']['material'].keys():
        continue
      else:
        extract_values(entry)

In [7]:
# look at the the entry
entry


{'upload_id': 'FT8UX98FS5KtDbNBw-dU3A',
 'references': ['https://doi.org/10.1016/j.matchemphys.2019.122310',
  'https://doi.org/10.1038/s41560-021-00941-3',
  'https://www.perovskitedatabase.com/'],
 'origin': 'The Perovskite Database Project',
 'quantities': ['',
  'data',
  'data.add',
  'data.add.lay_back',
  'data.add.lay_back_deposition_aggregation_state_of_reactants',
  'data.add.lay_back_deposition_procedure',
  'data.add.lay_back_deposition_reaction_solutions_age',
  'data.add.lay_back_deposition_reaction_solutions_compounds_purity',
  'data.add.lay_back_deposition_reaction_solutions_compounds_supplier',
  'data.add.lay_back_deposition_reaction_solutions_temperature',
  'data.add.lay_back_deposition_reaction_solutions_volumes',
  'data.add.lay_back_deposition_solvents',
  'data.add.lay_back_deposition_solvents_purity',
  'data.add.lay_back_deposition_solvents_supplier',
  'data.add.lay_back_deposition_substrate_temperature',
  'data.add.lay_back_deposition_synthesis_atmosphere'

# Pickle the result
Fetching all those entries took ca. 30 mins, so I pickle them here.

In [6]:
df_all_ctls = pd.DataFrame({'etl': etl, 'htl': htl})

In [8]:
import pickle
with open('df_all_ctl.pkl', 'wb') as f:
    pickle.dump(df_all_ctls, f)

In [12]:
df_all_ctls

Unnamed: 0,etl,htl
0,"[PCBM-60, LiF]","[PEDOT:PSS, PDPP-DTT]"
1,"[PCBM-60, Bphen]",[PEDOT:PSS]
2,"[TiO2-c, TiO2-mp]",[none]
3,"[PCBM-60, Bphen]",[PEDOT:PSS]
4,[TiO2-c],[MeO-PheDOT]
...,...,...
43114,"[TiO2-c, TiO2-mp]",[Spiro-MeOTAD]
43115,"[ZnO-c, ZnO-nw]",[Spiro-MeOTAD]
43116,"[PCBM-60, BCP]",[PEDOT:PSS]
43117,[SnO2-c],[Spiro-MeOTAD]


# Only get a couple entries for experimentation
In order to have less entries for experimentation, we fetch only entries with
Sulfur in them, which limits the 43119 to about 1000.

In [9]:
# Function to extract values from the downloaded entries
def extract_values(entry):
    try:
        htl.append(entry['results']['properties']['optoelectronic']['solar_cell']['hole_transport_layer'])
    except:
        htl.append('None')
    try:
        etl.append(entry['results']['properties']['optoelectronic']['solar_cell']['electron_transport_layer'])
    except:
        etl.append('None')
    try:
        pce.append(entry['results']['properties']['optoelectronic']['solar_cell']['efficiency'])
    except:
        pce.append('None')
    try:
        ref.append(entry['references'])
    except:
        ref.append('None')
    return htl, etl, pce, ref

In [10]:
# only the results.material.elements:all": ["S"] part was changed

import requests

htl = []
etl = []
pce = []
ref = []
page_after_value = None
base_url = 'https://nomad-lab.eu/prod/v1/api/v1/'

while True:
    data = requests.post(f'{base_url}entries/query', json={
        "owner": "visible",
        "aggregations": {},
        "query": {
            "and": [
                {"results.material.elements:all": ["S"]}, # change was made here
                {"sections:all": ["nomad.datamodel.results.SolarCell"]},
                ]},
        "required": {
            "results":{
                "material": {
                    "chemical_formula_reduced":"*",
                    "structural_type":"*"},
                "properties": {
                   "optoelectronic":{
                      "band_gap":"*",
                      "solar_cell":{
                          "open_circuit_voltage":"*",
                          "short_circuit_current_density":"*",
                          "fill_factor":"*",
                          "efficiency":"*",
                          }}},},
        },
        "pagination": {"page_size": 10,
                       "page_after_value": page_after_value}
        }).json()
    if not data['data']:
        break
    if 'next_page_after_value' not in data['pagination'].keys():
      # make sure to grasp the entries of the last page before breaking
      for entry in data['data']:
        if 'results' not in entry.keys():
          continue
        elif 'chemical_formula_reduced' not in entry['results']['material'].keys():
          continue
        else:
          extract_values(entry)
      break
    page_after_value = data['pagination']['next_page_after_value']

    for entry in data['data']:
      if 'results' not in entry.keys():
        continue
      elif 'chemical_formula_reduced' not in entry['results']['material'].keys():
        continue
      else:
        extract_values(entry)

In [13]:
df_some_test_ctls = pd.DataFrame({'etl': etl, 'htl': htl, 'pce': pce, 'ref': ref})
# clean up ref so it shows only the reference to the paper
df_some_test_ctls['ref'] = df_some_test_ctls['ref'].apply(lambda x: x[0])

In [14]:
df_some_test_ctls[0:20]

Unnamed: 0,etl,htl,pce,ref
0,"[TiO2-mp, D35]",[none],1.12,https://doi.org/10.1016/j.matchemphys.2019.122310
1,"[TiO2-c, TiO2-mp]",[Spiro-MeOTAD],0.11,https://doi.org/10.1039/c8ra00639c
2,[C60; PEIE],[PTAA],20.7,https://doi.org/10.1126/science.aba3433
3,"[PCBM-60, BCP]",[NiO-c],0.01,https://doi.org/10.1186/s40580-017-0120-3
4,"[PCBM-60, BCP]",[PEDOT:PSS],,https://doi.org/10.1021/jacs.0c03363
5,"[PCBM-60, Bphen]",[PEDOT:PSS],11.32,https://doi.org/10.1002/advs.201900548
6,"[PCBM-60, BCP]",[PEDOT:PSS],15.93,https://doi.org/10.1002/adma.202001470
7,"[TiO2-c, TiO2-mp]",[Spiro-MeOTAD],1.69,https://doi.org/10.1021/acsami.8b15578
8,"[PCBM-60, BCP]",[PEDOT:PSS],15.0,https://doi.org/10.1021/jacs.8b04604
9,"[PCBM-60, BCP]",[PEDOT:PSS],18.01,https://doi.org/10.1002/adma.202001470


In [17]:
import pickle
with open('df_some_test_ctls.pkl','wb') as f:
    pickle.dump(df_some_test_ctls, f)