# Imports

In [1]:
import numpy as np
import pandas as pd

# Fetching data

In [8]:
# Function to extract values from the downloaded entries
def extract_values(entry):
    try:
        htl.append(entry['results']['properties']['optoelectronic']['solar_cell']['hole_transport_layer'])
    except:
        htl.append('None')
    try:
        etl.append(entry['results']['properties']['optoelectronic']['solar_cell']['electron_transport_layer'])
    except:
        etl.append('None')
    try:
        pce.append(entry['results']['properties']['optoelectronic']['solar_cell']['efficiency'])
    except:
        pce.append('None')
    try:
        ref.append(entry['references'])
    except:
        ref.append('None')
    return htl, etl, pce, ref

In [13]:
# gets all ~43000 PSCs from NOMAD

import requests

htl = []
etl = []
pce = []
ref = []
page_after_value = None
base_url = 'https://nomad-lab.eu/prod/v1/api/v1/'

while True:
    data = requests.post(f'{base_url}entries/query', json={
        "owner": "visible",
        "aggregations": {},
        "query": {
            "and": [
                {"sections:all": ["nomad.datamodel.results.SolarCell"]},
                ]},
        "required": {
            "results":{
                "material": {
                    "chemical_formula_reduced":"*",
                    "structural_type":"*"},
                "properties": {
                   "optoelectronic":{
                      "band_gap":"*",
                      "solar_cell":{
                          "open_circuit_voltage":"*",
                          "short_circuit_current_density":"*",
                          "fill_factor":"*",
                          "efficiency":"*",
                          }}},},
        },
        "pagination": {"page_size": 10,
                       "page_after_value": page_after_value}
        }).json()
    if not data['data']:
        print('debug: no data found')
        break
    if 'next_page_after_value' not in data['pagination'].keys():
      # make sure to grasp the entries of the last page before breaking
      for entry in data['data']:
        if 'results' not in entry.keys():
          continue
        elif 'chemical_formula_reduced' not in entry['results']['material'].keys():
          continue
        else:
          extract_values(entry)
      break
    page_after_value = data['pagination']['next_page_after_value']

    for entry in data['data']:
      if 'results' not in entry.keys():
        continue
      else:
        extract_values(entry)

In [19]:
# put result of query into a pandas dataframe
df_all_ctls = pd.DataFrame({'etl': etl, 'htl': htl, 'pce': pce, 'ref': ref})

# clean up ref so it shows only the reference to the paper
df_all_ctls['ref'] = df_all_ctls['ref'].apply(lambda x: x[0] if x else None)

In [7]:
import pickle
with open('df_all_ctls.pkl', 'rb') as f:
    df_all_ctls = pickle.load(f)


df_all_ctls.head(10)

Unnamed: 0,etl,htl,pce,ref
0,"[PCBM-60, LiF]","[PEDOT:PSS, PDPP-DTT]",9.8,https://doi.org/10.1038/NPHOTON.2014.284
1,"[PCBM-60, Bphen]",[PEDOT:PSS],14.1,https://doi.org/10.1016/j.orgel.2019.105430
2,"[TiO2-c, TiO2-mp]",[none],5.5,https://doi.org/10.1021/ja307789s
3,"[PCBM-60, Bphen]",[PEDOT:PSS],11.82,https://doi.org/10.1016/j.electacta.2018.07.029
4,[TiO2-c],[MeO-PheDOT],9.46,https://doi.org/10.1002/asia.201501423
5,[TiO2-c],[Spiro-MeOTAD],9.9,https://doi.org/10.1021/acsaem.9b00531
6,[TiO2-c],[Spiro-MeOTAD],18.67,https://doi.org/10.1016/j.jpowsour.2018.04.086
7,"[TiO2-c, TiO2-mp]",[none],6.17,https://doi.org/10.1016/j.mssp.2019.104798
8,[PCBM-60],[PEDOT:PSS],12.7,https://doi.org/10.1016/j.nanoen.2015.06.010
9,"[TiO2-c, TiO2-mp]",[none],7.09,https://doi.org/10.1016/j.jechem.2016.03.021


# Pickle the result
Fetching all those entries took ca. 30 mins, so I pickle them here.

In [21]:
import pickle
with open('df_all_ctls.pkl', 'wb') as f:
    pickle.dump(df_all_ctls, f)

# Only get a couple entries for experimentation
In order to have less entries for experimentation, we fetch only entries with
Sulfur in them, which limits the 43119 to about 1000.

In [11]:
# the results.material.elements:all": ["S"] part was inserted
# also, a check in the end checks if there is a chemical_formula_reduced key

import requests

htl = []
etl = []
pce = []
ref = []
page_after_value = None
base_url = 'https://nomad-lab.eu/prod/v1/api/v1/'

while True:
    data = requests.post(f'{base_url}entries/query', json={
        "owner": "visible",
        "aggregations": {},
        "query": {
            "and": [
                {"results.material.elements:all": ["S"]}, # change was made here
                {"sections:all": ["nomad.datamodel.results.SolarCell"]},
                ]},
        "required": {
            "results":{
                "material": {
                    "chemical_formula_reduced":"*",
                    "structural_type":"*"},
                "properties": {
                   "optoelectronic":{
                      "band_gap":"*",
                      "solar_cell":{
                          "open_circuit_voltage":"*",
                          "short_circuit_current_density":"*",
                          "fill_factor":"*",
                          "efficiency":"*",
                          }}},},
        },
        "pagination": {"page_size": 10,
                       "page_after_value": page_after_value}
        }).json()
    if not data['data']:
        break
    if 'next_page_after_value' not in data['pagination'].keys():
      # make sure to grasp the entries of the last page before breaking
      for entry in data['data']:
        if 'results' not in entry.keys():
          continue
        elif 'chemical_formula_reduced' not in entry['results']['material'].keys():
          continue
        else:
          extract_values(entry)
      break
    page_after_value = data['pagination']['next_page_after_value']

    for entry in data['data']:
      if 'results' not in entry.keys():
        continue
      # this check here only works if a material is specified
      elif 'chemical_formula_reduced' not in entry['results']['material'].keys():
        continue
      else:
        extract_values(entry)

KeyError: 'data'

In [13]:
df_some_test_ctls = pd.DataFrame({'etl': etl, 'htl': htl, 'pce': pce, 'ref': ref})
# clean up ref so it shows only the reference to the paper
df_some_test_ctls['ref'] = df_some_test_ctls['ref'].apply(lambda x: x[0])

In [14]:
df_some_test_ctls[0:20]

Unnamed: 0,etl,htl,pce,ref
0,"[TiO2-mp, D35]",[none],1.12,https://doi.org/10.1016/j.matchemphys.2019.122310
1,"[TiO2-c, TiO2-mp]",[Spiro-MeOTAD],0.11,https://doi.org/10.1039/c8ra00639c
2,[C60; PEIE],[PTAA],20.7,https://doi.org/10.1126/science.aba3433
3,"[PCBM-60, BCP]",[NiO-c],0.01,https://doi.org/10.1186/s40580-017-0120-3
4,"[PCBM-60, BCP]",[PEDOT:PSS],,https://doi.org/10.1021/jacs.0c03363
5,"[PCBM-60, Bphen]",[PEDOT:PSS],11.32,https://doi.org/10.1002/advs.201900548
6,"[PCBM-60, BCP]",[PEDOT:PSS],15.93,https://doi.org/10.1002/adma.202001470
7,"[TiO2-c, TiO2-mp]",[Spiro-MeOTAD],1.69,https://doi.org/10.1021/acsami.8b15578
8,"[PCBM-60, BCP]",[PEDOT:PSS],15.0,https://doi.org/10.1021/jacs.8b04604
9,"[PCBM-60, BCP]",[PEDOT:PSS],18.01,https://doi.org/10.1002/adma.202001470


In [17]:
import pickle
with open('df_some_test_ctls.pkl','wb') as f:
    pickle.dump(df_some_test_ctls, f)