In [1]:
import os
import re
import numpy as np
import pandas as pd
from scipy.stats import linregress

In [2]:
RAW_MULTISUN = "raw_multisun_csv"
RAW_DARK = "raw_dark_csv"

In [3]:
def parse_name(fname):
    name = fname.lower()

    sample = re.search(r"device\d+", name)
    sample = sample.group() if sample else None

    cell = re.search(r"_([1-8])_", name)
    cell = cell.group(1) if cell else None

    dev = f"{sample}_{cell}" if sample and cell else sample

    if "dark" in name:
        inten = "dark"
    else:
        m = re.search(r"\d+p\d+sun", name)
        inten = m.group() if m else None

    scan = "LIV1" if "liv1" in name else "LIV2"
    return dev, inten, scan

In [12]:
def load_folder(folder):
    recs = []

    for f in os.listdir(folder):
        if not f.endswith(".csv"):
            continue

        dev, inten, scan = parse_name(f)
        if dev is None:
            continue

        raw = pd.read_csv(os.path.join(folder, f), header=None)

        # ---- Extract summary values ----
        summary = {}
        for _, r in raw.iterrows():
            if isinstance(r[0], str):
                k = r[0].strip().lower()
                if k in ["jsc:", "voc:", "ff:", "eff:"]:
                    summary[k[:-1].upper()] = float(r[1])

        # ---- Keep numeric JV rows only ----
        num = raw.copy()
        num[0] = pd.to_numeric(num[0], errors="coerce")
        num[1] = pd.to_numeric(num[1], errors="coerce")
        num = num.dropna(subset=[0, 1])
        num.columns = ["V", "J"]

        num["Device"] = dev
        num["Intensity"] = inten
        num["Scan"] = scan

        # Attach summary values
        for k, v in summary.items():
            num[k] = v

        recs.append(num)

    return pd.concat(recs, ignore_index=True)

In [13]:
multi = load_folder(RAW_MULTISUN)
dark  = load_folder(RAW_DARK)
alljv = pd.concat([multi, dark], ignore_index=True)

print("Total JV points:", len(alljv))
print("Unique devices:", alljv["Device"].nunique())

Total JV points: 26592
Unique devices: 24


In [14]:
def extract_jv_features(df):
    Voc = df["VOC"].iloc[0] if "VOC" in df.columns else np.nan
    Jsc = df["JSC"].iloc[0] if "JSC" in df.columns else np.nan
    FF  = df["FF"].iloc[0]  if "FF"  in df.columns else np.nan
    return Voc, Jsc, FF

In [15]:
def extract_dark_features(df):
    df = df.sort_values("V")
    V = df["V"].values.astype(float)
    J = df["J"].values.astype(float)

    mask = V < 0
    if mask.sum() < 5:
        return np.nan, np.nan

    slope, _, _, _, _ = linregress(V[mask], J[mask])
    J0 = J[np.argmin(np.abs(V))]

    return slope, J0


In [16]:
rows = []

for (dev, inten, scan), g in alljv.groupby(["Device", "Intensity", "Scan"]):
    if inten == "dark":
        slope, J0 = extract_dark_features(g)
        rows.append({
            "Device": dev,
            "Intensity": "dark",
            "Scan": scan,
            "leakage_slope": slope,
            "J0": J0
        })
    else:
        Voc, Jsc, FF = extract_jv_features(g)
        rows.append({
            "Device": dev,
            "Intensity": inten,
            "Scan": scan,
            "Voc": Voc,
            "Jsc": Jsc,
            "FF": FF
        })

features = pd.DataFrame(rows)
features.head()


Unnamed: 0,Device,Intensity,Scan,Voc,Jsc,FF,leakage_slope,J0
0,device001_1,0p1sun,LIV1,0.9617,1.511891,0.624211,,
1,device001_1,0p1sun,LIV2,1.0213,1.596917,0.724675,,
2,device001_1,0p2sun,LIV1,1.05105,3.169148,0.643677,,
3,device001_1,0p2sun,LIV2,1.05105,3.210573,0.677653,,
4,device001_1,0p3sun,LIV1,1.05105,5.30517,0.635142,,


In [17]:
light = features[features["Intensity"] != "dark"].copy()
darkf = features[features["Intensity"] == "dark"].copy()

light["Sun"] = light["Intensity"].str.replace("sun","").str.replace("p",".").astype(float)

light_agg = light.groupby("Device").agg({
    "Voc": ["mean","max"],
    "Jsc": ["mean","max"],
    "FF":  ["mean","std"]
})
light_agg.columns = ["_".join(c) for c in light_agg.columns]
light_agg = light_agg.reset_index()

dark_agg = darkf.groupby("Device").agg({
    "leakage_slope": "mean",
    "J0": "mean"
}).reset_index()

device_features = pd.merge(light_agg, dark_agg, on="Device", how="left")

print("Devices:", len(device_features))
device_features.head()

Devices: 23


Unnamed: 0,Device,Voc_mean,Voc_max,Jsc_mean,Jsc_max,FF_mean,FF_std,leakage_slope,J0
0,device001_1,1.098735,1.14045,10.393274,19.918979,0.688449,0.038685,-0.005102,0.003786
1,device001_2,1.082345,1.11065,9.770149,18.563928,0.667659,0.022543,-0.00652,0.00457
2,device001_3,1.033197,1.11065,10.134377,19.455737,0.735948,0.208638,-0.004887,0.004208
3,device001_4,1.040643,1.11065,10.03564,19.258098,0.682396,0.053346,-0.004235,0.004003
4,device001_5,0.92447,1.11065,10.275039,20.005364,0.46746,0.290469,-7.885415,-0.057738


In [18]:
os.makedirs("datasets", exist_ok=True)
device_features.to_csv("datasets/week06_device_features.csv", index=False)
print("Saved → datasets/week06_device_features.csv")

Saved → datasets/week06_device_features.csv
