In [None]:
# ruff: noqa: E402

<div style="
    background-color: #f7f7f7;
    background-image: url(''), url('') ;
    background-position: left bottom, right top;
    background-repeat: no-repeat,  no-repeat;
    background-size: auto 60px, auto 160px;
    border-radius: 5px;
    box-shadow: 0px 3px 1px -2px rgba(0, 0, 0, 0.2), 0px 2px 2px 0px rgba(0, 0, 0, 0.14), 0px 1px 5px 0px rgba(0,0,0,.12);">

<h1 style="
    color: #2a4cdf;
    font-style: normal;
    font-size: 2.25rem;
    line-height: 1.4em;
    font-weight: 600;
    padding: 30px 200px 0px 30px;"> 
        Performance Evolution Analysis of Perovskite Solar Cells</h1>

<p style="
    line-height: 1.4em;
    padding: 30px 200px 0px 30px;">
    This notebook analyses some aspects of the performance evolution in time in the field with the data from the <a href="https://nomad-lab.eu/prod/v1/staging/gui/search/perovskite-solar-cells-database" target="_blank">Perovskite database in NOMAD</a>.  
</p>

<p style="font-size: 1.25em; font-style: italic; padding: 5px 200px 30px 30px;">
</p>
</div>

In [1]:
from plotly_theme import register_template, set_defaults

register_template()
set_defaults()

In [2]:
# load the data from into a df from the parquet file
import pandas as pd

df = pd.read_parquet('perovskite_solar_cell_database.parquet')

In [3]:
# set in the df a source_database column. If data.ref.name_of_person_entering_the_data is 'LLM Extraction' then 'LLM Extracted', else 'Manual Entry'
df['source_database'] = df['data.ref.name_of_person_entering_the_data'].apply(
    lambda x: 'LLM Extracted' if x == 'LLM Extraction' else 'Manual Entry'
)
from plotly_theme import DEFAULT_COLORWAY

SOURCE_ORDER = ['Manual Entry', 'LLM Extracted']

COLOR_MAP = dict(zip(SOURCE_ORDER, DEFAULT_COLORWAY))

In [None]:
import matplotlib.colors as mcolors


def darken_color(hex_color, factor=0.7):
    """
    Darken a hex color by a given factor (0 < factor < 1).
    factor < 1 → darker
    factor = 1 → same color
    """
    rgb = mcolors.hex2color(hex_color)  # convert hex to (r,g,b) in [0,1]
    dark_rgb = tuple(max(0, c * factor) for c in rgb)
    return mcolors.to_hex(dark_rgb)

In [None]:
import numpy as np
import plotly.graph_objects as go

# columns we REQUIRE to be present
required_cols = [
    'results.properties.optoelectronic.solar_cell.fill_factor',
    'results.properties.optoelectronic.solar_cell.short_circuit_current_density',
    'results.properties.optoelectronic.solar_cell.open_circuit_voltage',
    'results.properties.optoelectronic.solar_cell.efficiency',
]

# drop rows where ANY required value is missing
df_clean = df.dropna(subset=required_cols).copy()

# alias for readability
ff = df_clean['results.properties.optoelectronic.solar_cell.fill_factor']
jsc = df_clean[
    'results.properties.optoelectronic.solar_cell.short_circuit_current_density'
]
voc = df_clean['results.properties.optoelectronic.solar_cell.open_circuit_voltage']
pce = df_clean['results.properties.optoelectronic.solar_cell.efficiency']

# unit correction
df_clean['jsc_corrected'] = jsc * 0.1

# compute expected PCE
df_clean['pce_calc'] = ff * voc * df_clean['jsc_corrected']

# isclose check (absolute tolerance only)
df_clean['pce_isclose'] = np.isclose(
    pce,
    df_clean['pce_calc'],
    atol=0.2,
)

summary = (
    df_clean.groupby('source_database')['pce_isclose']
    .agg(fraction='mean', n='size')
    .reindex(['Manual Entry', 'LLM Extracted'])
)

fig = go.Figure()

# Map colors to each bar
bar_colors = [COLOR_MAP[src] for src in summary.index]
# Create darker outlines for each bar
bar_outlines = [darken_color(c, factor=0.7) for c in bar_colors]

# Add the bar trace with outlines
fig.add_bar(
    x=summary.index,
    y=summary['fraction'],
    text=[f'n={n}' for n in summary['n']],
    marker=dict(
        color=bar_colors,  # fill color
        line=dict(color=bar_outlines, width=2),  # darker outlines
    ),
)

fig.update_layout(
    yaxis_title='Fraction passing PCE ≈ FF × V<sub>OC</sub> × J<sub>SC</sub>',
    yaxis=dict(range=[0, 1]),
)

fig.show()