In [2]:
import asyncio
import getpass
import json
import os
from collections import Counter

import aiohttp
import requests

In [2]:
base_url = 'https://nomad-lab.eu/prod/v1/oasis/api/v1'

username = os.getenv('NOMAD_USERNAME') or input('Username: ')
password = os.getenv('NOMAD_PASSWORD') or getpass.getpass('Password: ')

response = requests.post(
    f'{base_url}/auth/token',
    data={'username': username, 'password': password},
)
token = response.json()['access_token']

In [7]:
semaphore = asyncio.Semaphore(5)  # Limit to 5 concurrent requests


async def fetch_page(session, url, json_body, headers):
    async with semaphore:
        async with session.post(url, json=json_body, headers=headers) as resp:
            return await resp.json()


async def fetch_entry_ids_paginated_semaphore(token, max_entries=70):
    url = f'{base_url}/entries/query'
    json_body = {
        'owner': 'visible',
        'pagination': {
            'page_size': 100,
        },
        'required': {'include': ['entry_id']},
        'query': {
            'and': [
                {'upload_id:any': ['C4ynD5c9RXesSXOJf6oq_Q']},
                {
                    'section_defs.definition_qualified_name:all': [
                        'perovskite_solar_cell_database.llm_extraction_schema.LLMExtractedPerovskiteSolarCell'
                    ]
                },
            ]
        },
    }
    headers = {'Authorization': f'Bearer {token}'}
    entry_ids = set()
    async with aiohttp.ClientSession() as session:
        while len(entry_ids) < max_entries:
            response_json = await fetch_page(session, url, json_body, headers)
            for data in response_json['data']:
                entry_ids.add(data['entry_id'])
            next_value = response_json.get('pagination', {}).get(
                'next_page_after_value'
            )
            if not next_value:
                break
            json_body['pagination']['page_after_value'] = next_value
    return list(entry_ids)


payload = {
    'required': {
        'data': {
            'layers': {
                'name': '*',
                'functionality': '*',
                'deposition': {
                    'method': '*',
                },
            }
        }
    }
}


async def fetch_entry(session, entry_id, token, retries=3, delay=2):
    async with semaphore:
        url = f'{base_url}/entries/{entry_id}/archive/query'
        headers = {'Authorization': f'Bearer {token}'}
        for attempt in range(retries):
            async with session.post(url, json=payload, headers=headers) as resp:
                if resp.status == 200:
                    return await resp.json()
                else:
                    text = await resp.text()
                    print(
                        f'Error {resp.status} for entry {entry_id} (attempt {attempt + 1}): {text}'
                    )
                    if resp.status in [429, 500, 502, 503, 504]:
                        await asyncio.sleep(delay * (attempt + 1))
                    else:
                        break
        return None


async def fetch_entry_data(entry_ids, token):
    layer_names = {
        'Substrate': [],
        'Absorber': [],
        'Contact': [],
        'Electron-transport': [],
        'Hole-transport': [],
        'Other': [],
        'Unknown': [],
    }
    deposition_methods = []
    async with aiohttp.ClientSession() as session:
        tasks = [fetch_entry(session, eid, token) for eid in entry_ids]
        results = await asyncio.gather(*tasks)
        for response_json in results:
            if not response_json:
                continue
            layers = (
                response_json.get('data', {})
                .get('archive', {})
                .get('data', {})
                .get('layers', [])
            )
            for layer in layers:
                name = layer.get('name', 'Unknown')
                functionality = layer.get('functionality', 'Unknown')
                if functionality not in layer_names:
                    functionality = 'Other'
                deposition_methods.extend(
                    deposition.get('method', 'Unknown')
                    for deposition in layer.get('deposition', [])
                )
                layer_names[functionality].append(name)
    return layer_names, deposition_methods

In [18]:
entry_ids = await fetch_entry_ids_paginated_semaphore(token, max_entries=20000)
print(f'Fetched {len(entry_ids)} unique entry IDs.')

Fetched 11395 unique entry IDs.


In [19]:
layer_names, deposition_methods = await fetch_entry_data(entry_ids, token)

Error 503 for entry h5bQ8JkbK1KwOAm8uUzTKW6qdmfB (attempt 1): <html>
<head><title>503 Service Temporarily Unavailable</title></head>
<body>
<center><h1>503 Service Temporarily Unavailable</h1></center>
<hr><center>nginx</center>
</body>
</html>

Error 503 for entry V0ivRmP2jC6JKhK2EoBjA1ueLclV (attempt 1): <html>
<head><title>503 Service Temporarily Unavailable</title></head>
<body>
<center><h1>503 Service Temporarily Unavailable</h1></center>
<hr><center>nginx</center>
</body>
</html>

Error 503 for entry 6UbsL2GAbmx3RPltzlN0SX_LV8Pe (attempt 1): <html>
<head><title>503 Service Temporarily Unavailable</title></head>
<body>
<center><h1>503 Service Temporarily Unavailable</h1></center>
<hr><center>nginx</center>
</body>
</html>

Error 503 for entry QDx5DMAikmfeVSRqqKVFmf4m5pmF (attempt 1): <html>
<head><title>503 Service Temporarily Unavailable</title></head>
<body>
<center><h1>503 Service Temporarily Unavailable</h1></center>
<hr><center>nginx</center>
</body>
</html>

Error 503 for en

In [20]:
unique_names = {}
for func, names in layer_names.items():
    name_counts = Counter(names)
    sorted_names = sorted(name_counts.items(), key=lambda x: x[1], reverse=True)
    print(f'{func} ({len(name_counts)}): {[name for name, _ in sorted_names]}')
    unique_names[func] = sorted_names
method_counts = Counter(deposition_methods)
sorted_methods = sorted(method_counts.items(), key=lambda x: x[1], reverse=True)
print(f'Deposition methods ({len(method_counts)}): {[m for m, _ in sorted_methods]}')
unique_names['Deposition methods'] = sorted_methods

Substrate (101): ['FTO', 'ITO', 'Glass', 'PET', 'PEN', 'PEN/ITO', 'PET/ITO', 'FTO glass', 'Glass/ITO', 'ITO/PET', 'ITO/PEN', 'Quartz', 'FTO/glass', 'IZO', 'PEN-ITO', 'ITO-glass', 'Glass/FTO', 'ITO-PET', 'PDMS', 'TCO', 'p-type silicon', 'ITO glass', 'MgF2/Glass', 'c-Si', 'Ti', 'FTO-30', 'FTO-70', 'Flexible substrate', 'glass', 'HPMC', 'Silicon', 'Kapton', 'FTO-50', 'FTO-glass', 'ITO-PEN', 'Parylene', 'Silicon heterojunction', 'PSi', 'Ni', 'PET-IZO', 'n-type silicon', 'TCO/glass', 'Al-foil', 'Sapphire', 'Stainless steel', 'ITO/glass', 'Cellophane paper', 'PI-SWNT', 'FTO-A', 'SiO2', 'SWNT-PI', 'NOA 63', 'PET:ITO', 'CFRP', 'ITP-coated Glass', 'FTO-HA', 'FTO-N', 'PI', 'PET/IMI', 'MgF2/PET/ITO', 'Cu foil', 'Polymer', 'Si', 'SiO2/Si', 'FTO Glass', 'SLG', 'Ti metal-coated glass', 'LiF/FTO', 'FTO substrate with Sm3+-Ce3+ co-doped glass-ceramic waveguides', 'Textured silicon', 'Graphene', 'n-Si', 'OPV8', 'sm-ITO', 'Willow glass', 'ITO-coated PEN', 'Silk-Ag nanowires', 'Carbon cloth', 'TLHA-coate

In [None]:
print('\nSummary of unique names per functionality:')
for func, names in layer_names.items():
    unique_names = set(names)
    print(f'{func} ({len(unique_names)})')


Summary of unique names per functionality:
Substrate (71)
Absorber (2128)
Contact (178)
Electron-transport (598)
Hole-transport (789)
Other (492)
Unknown (0)


In [21]:
with open('unique_names_and_methods.json', 'w') as f:
    json.dump(unique_names, f, indent=2)

In [11]:
with open('unique_names_and_methods.json') as f:
    unique_names = json.load(f)

In [12]:
synthesis_methods = [m for m, _ in unique_names.get('Deposition methods', [])]
print(f'Synthesis methods: {synthesis_methods}')

manual_synthesis_method_synonyms = {
    'Spin-coating': [
        'Spin-coating',
        'Spin coating',
        'One-step spin coating',
        'One-step spin-coating',
        'Intermittent spin-coating',
    ],
    'Thermal-annealing': ['Thermal-annealing', 'Thermal annealing'],
    'Doctor blading': [
        'Blade coating',
        'Blade-coating',
        'Doctor blading',
        'Doctor blade',
        'Doctor-blading',
        'Meniscus coating',
        'Meniscus-coating',
        'Meniscus-modulated blade coating',
    ],
    'CBD': [
        'Chemical bath deposition',
        'Chemical Bath Deposition',
        'Chemical-bath deposition',
        'Chemical bath',
        'Chemical bath co-deposition',
        'CBD',
    ],
    'Antisolvent-quenching': [
        'Antisolvent-quenching',
        'Antisolvent technique',
        'one-step anti-solvent method',
        'Antisolvent-fumigated',
    ],
    'Screen printing': ['Screen-printing', 'Screen printing'],
    'Slot-die coating': ['Slot-die coating', 'Slot die coating', 'Slot-die printing'],
    'Drop-infiltration': [
        'Drop-infiltration',
    ],
    'Sputtering': [
        'Sputtering',
    ],
    'Unknown': [
        'Unknown',
    ],
    'Spray coating': [
        'Spray coating',
    ],
    'Spray pyrolysis': [
        'Spray pyrolysis',
    ],
    'Atomic layer deposition': [
        'Atomic layer deposition',
        'ALD',
        'Atom layer deposition',
        'Atomic Layer Deposition',
    ],
    'Thermal evaporation': ['Thermal evaporation'],
    'Co-evaporation': ['Co-evaporation'],
    'Magnetron sputtering': ['Magnetron sputtering'],
    'Drop-casting': [
        'Drop-casting',
        'Drop casting',
        'Drop-coating',
        'Drop coating',
        'Dropcasting',
    ],
    'SILAR': ['SILAR'],
    'Gas quenching': ['Gas quenching'],
    'Sol-gel': ['Sol-gel'],
    'RF magnetron sputtering': ['RF magnetron sputtering'],
    'Inkjet printing': ['Inkjet printing'],
    'Hydrothermal': ['Hydrothermal'],
    'TiCl4 treatment': ['TiCl4 treatment'],
    'RF sputtering': ['RF sputtering'],
}

synthesis_method_map = {}
for standard_method, synonyms in manual_synthesis_method_synonyms.items():
    if len(synonyms) < 2:
        continue
    for synonym in synonyms:
        synthesis_method_map[synonym] = standard_method

unique_synthesis_methods = []
for method in synthesis_methods:
    unique_method = synthesis_method_map.get(method, method)
    if unique_method not in unique_synthesis_methods:
        unique_synthesis_methods.append(unique_method)

print(
    f'Unique synthesis methods after mapping: {json.dumps(unique_synthesis_methods, indent=2)}'
)

Synthesis methods: ['Spin-coating', 'Thermal-annealing', 'Evaporation', 'Doctor blading', 'Chemical bath deposition', 'Antisolvent-quenching', 'Screen-printing', 'Screen printing', 'Slot-die coating', 'Drop-infiltration', 'Blade coating', 'Sputtering', 'Unknown', 'Spray coating', 'Spray pyrolysis', 'Blade-coating', 'Atomic layer deposition', 'Thermal evaporation', 'Co-evaporation', 'Magnetron sputtering', 'Drop-casting', 'ALD', 'SILAR', 'Gas quenching', 'Sol-gel', 'RF magnetron sputtering', 'Inkjet printing', 'Hydrothermal', 'TiCl4 treatment', 'RF sputtering', 'Two-step sequential deposition', 'Two-step deposition', 'Sequential deposition', 'Thermal annealing', 'Immersion', 'Drop-coating', 'Chemical vapor deposition', 'Gravure printing', 'Electrospraying', 'Two-step method', 'Ligand exchange', 'Post-treatment', 'Dip-coating', 'Blading', 'Spin coating', 'Vapor-solid reaction', 'Vacuum flash', 'Electrospinning', 'Dip coating', 'DC magnetron sputtering', 'Wet transfer', 'Press transfer', 

In [13]:
substrates = [m for m, _ in unique_names.get('Substrate', [])]
print(f'Substrates: {substrates}')

manual_substrate_synonyms = {
    'FTO': [
        'FTO',
        'FTO glass',
        'FTO/glass',
        'Glass/FTO',
        'FTO-30',
        'FTO-70',
        'FTO-50',
        'FTO-glass',
    ],
    'ITO': ['ITO', 'ITO glass', 'ITO/glass', 'Glass/ITO', 'ITO-glass'],
    'SLG': ['Glass', 'glass', 'SLG'],
    'Silicon': ['Silicon', 'Si', 'Si substrate', 'Silicon wafer', 'c-Si'],
    'p-type silicon': ['p-type silicon', 'p-type Cz silicon', 'p-Si', 'PSi'],
    'n-type silicon': ['n-type silicon', 'n-Si', 'n+Si'],
    'PEN/ITO': ['PEN/ITO', 'PEN-ITO', 'ITO-PEN', 'PEN/ITO strip'],
    'PET/ITO': ['PET/ITO', 'PET-ITO', 'ITO-PET'],
}

substrate_map = {}
for standard_substrate, synonyms in manual_substrate_synonyms.items():
    if len(synonyms) < 2:
        continue
    for synonym in synonyms:
        substrate_map[synonym] = standard_substrate

unique_substrates = []
for substrate in substrates:
    unique_substrate = substrate_map.get(substrate, substrate)
    if unique_substrate not in unique_substrates:
        unique_substrates.append(unique_substrate)

print(f'Unique substrates after mapping: {json.dumps(unique_substrates, indent=2)}')

Substrates: ['FTO', 'ITO', 'Glass', 'PET', 'PEN', 'PEN/ITO', 'PET/ITO', 'FTO glass', 'Glass/ITO', 'ITO/PET', 'ITO/PEN', 'Quartz', 'FTO/glass', 'IZO', 'PEN-ITO', 'ITO-glass', 'Glass/FTO', 'ITO-PET', 'PDMS', 'TCO', 'p-type silicon', 'ITO glass', 'MgF2/Glass', 'c-Si', 'Ti', 'FTO-30', 'FTO-70', 'Flexible substrate', 'glass', 'HPMC', 'Silicon', 'Kapton', 'FTO-50', 'FTO-glass', 'ITO-PEN', 'Parylene', 'Silicon heterojunction', 'PSi', 'Ni', 'PET-IZO', 'n-type silicon', 'TCO/glass', 'Al-foil', 'Sapphire', 'Stainless steel', 'ITO/glass', 'Cellophane paper', 'PI-SWNT', 'FTO-A', 'SiO2', 'SWNT-PI', 'NOA 63', 'PET:ITO', 'CFRP', 'ITP-coated Glass', 'FTO-HA', 'FTO-N', 'PI', 'PET/IMI', 'MgF2/PET/ITO', 'Cu foil', 'Polymer', 'Si', 'SiO2/Si', 'FTO Glass', 'SLG', 'Ti metal-coated glass', 'LiF/FTO', 'FTO substrate with Sm3+-Ce3+ co-doped glass-ceramic waveguides', 'Textured silicon', 'Graphene', 'n-Si', 'OPV8', 'sm-ITO', 'Willow glass', 'ITO-coated PEN', 'Silk-Ag nanowires', 'Carbon cloth', 'TLHA-coated gla

In [14]:
contacts = [m for m, _ in unique_names.get('Contact', [])]
print(f'Contacts: {contacts}')

manual_contact_synonyms = {
    'Au': [
        'Au',
        'Gold',
    ],
    'Ag': [
        'Ag',
        'Silver',
    ],
    'ITO': [
        'ITO',
    ],
    'Carbon': [
        'Carbon',
        'C',
        'carbon',
        'Carbon paste',
        'Carbon electrode',
        'Carbon black electrode',
    ],
    'Carbon-mp': [
        'Carbon-mp',
        'mp-Carbon',
        'Carbon mesoporous',
        'm-Carbon',
        'm-carbon electrode',
        'porous carbon',
    ],
    'Cu': [
        'Cu',
        'Copper',
    ],
}

contact_map = {}
for standard_contact, synonyms in manual_contact_synonyms.items():
    if len(synonyms) < 2:
        continue
    for synonym in synonyms:
        contact_map[synonym] = standard_contact

unique_contacts = []
for contact in contacts:
    unique_contact = contact_map.get(contact, contact)
    if unique_contact not in unique_contacts:
        unique_contacts.append(unique_contact)

print(f'Unique contacts after mapping: {json.dumps(unique_contacts, indent=2)}')

Contacts: ['Au', 'Ag', 'ITO', 'Carbon', 'Cu', 'MoO3', 'Al', 'FTO', 'Ni', 'MoOx', 'IZO', 'LiF', 'C', 'Pt', 'Cr', 'carbon', 'Carbon paste', 'Ta-WOx', 'Silver', 'TCO', 'IO:H', 'AgNWs', 'Carbon electrode', 'Ti wire', 'Back contact', 'Graphite', 'AZO', 'C-rCP', 'Mo', 'Gold', 'Cathode', 'Anode', 'Au/Ag', 'Ag-grid', 'Silver paste', 'Cu2S', 'rGO', 'Metal', 'Se', 'SWCNT@85%', 'Pd', 'TCE', 'Graphene', 'H2PtCl6', 'TETA-Gr', 'ZrAcac', 'Au:Pd', 'Ag grid', 'WO3', 'Ta4C3F2', 'Ti4N3', 'mp-Carbon', 'VOx', 'BCP', 'RGO', 'ITO/Au gold finger', 'Platinum', 'nanocarbon', 'NiO/Ag/NiO', 'Carbon mesoporous', 'C-G', 'AgAl', 'Carbon black electrode', 'ALD ITO', 'Ag/Au', 'SWCNT', 'Pt coated FTO', 'CNT sheet', 'LIG@NiOX', 'AgNW', 'DMD', 'Back electrode', 'Copper', 'Metal electrode', 'DWNT', 'ICO:H', 'W', 'MoOx/Au/MoOx', 'Ca', 'Carbon/conductive cloth', 'm-Carbon', 'F-CE', 'm-carbon electrode', 'WO3/Ag/WO3', 'Gr/CB', 'PEDOT:F', 'Carbon w/ 30 wt% Mn3O4', 'TiO2/ultrathin Ag/TiO2', 'Ag paste', 'CNTs', 'CNT', 'Cu2S/bra

In [15]:
etls = [m for m, _ in unique_names.get('Electron-transport', [])]
print(f'Electron transport layers ({len(etls)}): {etls}')

etl_synonyms = {
    'SnO2-c': [
        'SnO2',
        'SnO2-c',
        'SnOx',
        'c-SnO2',
        'ALD-SnO2',
        'SnO2-SnOx',
        'SnO2-x',
        'ALD-SnOx',
        'CBD-SnO2',
        'ALD SnOx',
        'Crystalline SnO2',
    ],
    'TiO2-c': [
        'TiO2-c',
        'TiO2',
        'c-TiO2',
        'Compact TiO2',
        'compact-TiO2',
        'cp-TiO2',
        'C-TiO2',
        'compact TiO2',
        'compacted TiO2',
        'TiO2 compact',
        'cTiO2',
        'TiO2 compact layer',
        'TiO2 CL',
        'Compact-TiO2',
    ],
    'C60': ['C60'],
    'PCBM-60': ['PCBM', 'PC60BM', 'PCBM-60'],
    'BCP': ['BCP'],
    'PC61BM': ['PC61BM'],
    'TiO2-mp': [
        'TiO2-mp',
        'mp-TiO2',
        'm-TiO2',
        'mesoporous TiO2',
        'TiO2 mesoporous',
        'Mesoporous TiO2',
        'mesoporous-TiO2',
        'meso-TiO2',
        'Mesoscopic TiO2',
        'mp TiO2',
        'ms-TiO2',
        'Meso.TiO2',
        'mpTiO2',
        'TiO2 scaffold',
        'M-TiO2',
        'Thin mp-TiO2',
    ],
    'TiO2-np': ['TiO2-np', 'TiO2-NPs'],
    'SnO2-np': [
        'SnO2-np',
        'SnO2 QD',
        'SnO2 QDs',
        'SnO2 NP',
        'SnO2 NPs',
        'np-SnO2',
        'QD-SnO2',
        'NP-SnO2',
        'SnO2 QDs-TQDs',
    ],
    'TiO2-bl': ['TiO2-bl', 'bl-TiO2', 'b-TiO2'],
}

etl_map = {}
for standard_etl, synonyms in etl_synonyms.items():
    if len(synonyms) < 2:
        continue
    for synonym in synonyms:
        etl_map[synonym] = standard_etl

unique_etls = []
for etl in etls:
    unique_etl = etl_map.get(etl, etl)
    if unique_etl not in unique_etls:
        unique_etls.append(unique_etl)

print(
    f'Unique electron transport layers after mapping ({len(unique_etls)}): {json.dumps(unique_etls, indent=2)}'
)

Electron transport layers (811): ['SnO2', 'TiO2', 'C60', 'PCBM', 'c-TiO2', 'BCP', 'PC61BM', 'm-TiO2', 'mp-TiO2', 'ZnO', 'ICBA', 'CdS', 'TiO2 mesoporous', 'SnOx', 'PC60BM', 'bl-TiO2', 'mesoporous TiO2', 'WS2', 'SnS2', 'ZnSe', 'Compact TiO2', 'TPBi', 'compact-TiO2', 'SnO2 QD', 'AZO', 'IGZO', 'ZnO NPs', 'Mesoporous TiO2', 'mesoporous-TiO2', 'TiO2-NPs', 'In2S3', 'ZnOS', 'C60-ETPA', 'cp-TiO2', 'C-TiO2', 'ZnO nanorods', 'Bphen', 'meso-TiO2', 'PC71BM', 'PFN-Br', 'ALD-SnO2', 'ETL', 'c-SnO2', 'compact TiO2', 'TiO2 nanorods', 'WO3', 'LiF', 'SnO2-KBr', 'PEI', 'PC61BM + C60', 'SnO2 QDs', 'TiOx', 'GO', 'SnO2-SnOx', 'b-TiO2', 'bathocuproine', 'CeO2', 'Mesoscopic TiO2', 'C60-SAM', 'ZnS', 'F8', 'PC61BM-C60', 'CdZnS', 'compacted TiO2', 'ZnMgO', 'SnO2-Cl', 'SnO2 NP', 'mp TiO2', 'LBSO', 'ZnO NP', 'SnO2 w/ urea', 'RbF-SnO2', 'NDP', 'TiO2/ZnO', 'Fe2O3', 'SnO2 NPs', 'TiO2 compact', 'CPTA', 'Li-TiO2', 'n-CsGeI3', 'Cd0.5Zn0.5S', 'i-ZnO', 'Bathocuproine', 'Meo-2PACz', 'ZnO@TiO2 NAs', 'PC61BM + TBABF4', 'AT-TiO

In [16]:
htls = [m for m, _ in unique_names.get('Hole-transport', [])]
print(f'Hole transport layers ({len(htls)}): {htls}')

htl_synonyms = {
    'Spiro-OMeTAD': [
        'Spiro-OMeTAD',
        'spiro-OMeTAD',
        'Spiro-MeOTAD',
        'spiro-MeOTAD',
        'SpiroOMeTAD',
        'spiroOMeTAD',
        'spiroMeOTAD',
        'SpiroMeOTAD',
        'spiro-OMETAD',
        'spiro-OSMeTAD',
        'spiro-MEOTAD',
        'spiro-OMeTAD (doped)',
        'Commercial spiro-OMeTAD',
        'spiro-SMeTAD',
        'Spiro',
    ],
    'NiOx': ['NiOx', 'NiO', 'NiOX'],
    'PTAA': ['PTAA'],
    'PEDOT:PSS': ['PEDOT:PSS', 'PEDOT', 'PEDOT/PSS', 'PEDOT-PSS'],
    'MeO-2PACz': ['MeO-2PACz', 'MeO-2PACZ', 'MeO-2PACz SAM', 'Meo-2PACz'],
    '2PACz': ['2PACz', '2PACZ'],
    'P3HT': ['P3HT'],
    'Me-4PACz': ['Me-4PACz', '4PACz', 'Me-4PACZ'],
    'CuSCN': ['CuSCN'],
    'Cu2O': ['Cu2O'],
    'CuI': ['CuI'],
    'SAM': ['SAM'],
    'MeO-4PACz': ['MeO-4PACz'],
}

htl_map = {}
for standard_htl, synonyms in htl_synonyms.items():
    if len(synonyms) < 2:
        continue
    for synonym in synonyms:
        htl_map[synonym] = standard_htl

unique_htls = []
for htl in htls:
    unique_htl = htl_map.get(htl, htl)
    if unique_htl not in unique_htls:
        unique_htls.append(unique_htl)

print(
    f'Unique hole transport layers after mapping ({len(unique_htls)}): {json.dumps(unique_htls, indent=2)}'
)

Hole transport layers (1080): ['Spiro-OMeTAD', 'spiro-OMeTAD', 'NiOx', 'PTAA', 'PEDOT:PSS', 'Spiro-MeOTAD', 'MeO-2PACz', 'P3HT', 'NiO', 'Me-4PACz', 'CuSCN', 'Cu2O', '2PACz', 'CuI', 'spiro-MeOTAD', 'SAM', 'MeO-4PACz', 'NiOX', 'MoO3', 'P3CT-N', 'CBTS', 'HTL', '4PADCB', 'CuInS2', 'TaTm', 'PDCBT', 'SpiroOMeTAD', 'CuPc', 'CFTS', 'CuO', 'PEDOT', 'CdTe', 'PTAA:F4-TCNQ', 'Cu:NiOx', 'V2O5', 'Carbon', 'CZTS', 'Spiro-TTB', 'CuSbS2', 'PTB7', 'PBDB-T', 'GO', 'CuOx', 'poly-TPD', 'CuAlO2', 'PolyTPD', 'SAMs', 'CbzNaph', 'Me-4PAcz', 'Spiro-OMETAD', 'Spiro-OmeTAD', 'MoOx', 'Me4PACz', 'Spiro', 'MASnBr3', 'PTAA:F4TCNQ', 'TFB', 'Spiro-OMeOTAD', 'VNPB', 'cp-NiOx', 'LaNiO3', 'CZTSe', 'PANI-PAMPSA', 'MoS2', 'MPA-CPA', 'MoTe2', '4PACz', 'MeO-2PACz:Me-4PACz', 'Ph-4PACz', 'Cu:NiOX', 'TAPC', 'PTA', 'PM6', 'BDT-C8-3O', 'rGO', 'PVA + F-127 + KI + I2', 'A-PEDOT:PSS', 'poly(triarylamine)', 'NiOx:Sr', 'CuCrO2', 'NiO NPs', 'SAM (2PACz and Me-4PACz)', '2PACZ', 'spiroOMeTAD', 'D18-Cl', 'Spiro-OMeTAD w/ TCPO', 'p-CsGeI3',

In [17]:
combined_map = synthesis_method_map | substrate_map | contact_map | etl_map | htl_map
with open('../src/perovskite_solar_cell_database/synonym_map.json', 'w') as f:
    json.dump(combined_map, f, indent=2)