In [1]:
import os
import re
import numpy as np
import pandas as pd

In [2]:
RAW_DIR = "raw_LIV"

files = sorted(os.listdir(RAW_DIR))
liv_files = [f for f in files if re.search(r"LIV[12]\.csv$", f, re.IGNORECASE)]

print("Total LIV files:", len(liv_files))
print("First 10 files:")
for f in liv_files[:10]:
    print(" ", f)

Total LIV files: 212
First 10 files:
  device001_LIV1.csv
  device001_LIV2.csv
  device002_LIV1.csv
  device002_LIV2.csv
  device003_LIV1.csv
  device003_LIV2.csv
  device004_LIV1.csv
  device004_LIV2.csv
  device005_LIV1.csv
  device005_LIV2.csv


In [3]:
def load_liv(filepath):
    df = pd.read_csv(filepath, sep=None, engine="python", encoding="latin1")
    df = df.apply(pd.to_numeric, errors="coerce")
    df = df.dropna(how="all").dropna(axis=1, how="all")
    return df

In [4]:
def standardize_vj(df):
    df = df.iloc[:, :2].copy()
    df.columns = ["Voltage_V", "Current density_mAcm2"]
    df = df.dropna()
    return df

In [5]:
def extract_params(df_vj, pin=100, npts=8):
    V = df_vj["Voltage_V"].to_numpy()
    J = df_vj["Current density_mAcm2"].to_numpy()

    idx = np.argsort(V)
    V, J = V[idx], J[idx]

    Jsc = np.interp(0.0, V, J)
    Voc = np.interp(0.0, J[::-1], V[::-1])

    P = V * J
    i_mpp = np.argmax(P)
    Pmax = P[i_mpp]

    FF = Pmax / (Voc * Jsc) if Voc and Jsc else np.nan
    PCE = (Pmax / pin) * 100

    idx_v0 = np.argsort(np.abs(V))[:npts]
    Rsh = abs(np.polyfit(J[idx_v0], V[idx_v0], 1)[0]) * 1000

    idx_j0 = np.argsort(np.abs(J))[:npts]
    Rs = abs(np.polyfit(J[idx_j0], V[idx_j0], 1)[0]) * 1000

    return dict(Voc_V=Voc, Jsc_mAcm2=Jsc, FF=FF, PCE_percent=PCE,
                Rs_ohm_cm2=Rs, Rsh_ohm_cm2=Rsh)

In [6]:
os.makedirs("summary", exist_ok=True)
os.makedirs("datasets", exist_ok=True)

In [7]:
rows = []

for f in liv_files:
    path = os.path.join(RAW_DIR, f)

    df_raw = load_liv(path)
    df_vj = standardize_vj(df_raw)
    params = extract_params(df_vj)

    params["Device"] = f.split("_")[0]
    params["Scan"] = "Reverse" if "LIV1" in f.upper() else "Forward"
    params["File"] = f

    rows.append(params)

    pd.DataFrame([params]).to_csv(
        f"summary/{f.replace('.csv','')}_summary.csv",
        index=False
    )

print("Processed files:", len(rows))

Processed files: 212


In [8]:
df_day4 = pd.DataFrame(rows)
print("Rows in dataset:", len(df_day4))
df_day4.head()

Rows in dataset: 212


Unnamed: 0,Voc_V,Jsc_mAcm2,FF,PCE_percent,Rs_ohm_cm2,Rsh_ohm_cm2,Device,Scan,File
0,1.113055,20.178596,0.640161,14.377952,8.304344,4474.454512,device001,Reverse,device001_LIV1.csv
1,1.104883,20.177047,0.650051,14.491762,8.245944,5106.92862,device001,Forward,device001_LIV2.csv
2,1.16248,20.154208,0.758766,17.777031,7.594258,7605.396653,device002,Reverse,device002_LIV1.csv
3,1.144383,20.051937,0.746704,17.134683,6.118376,3341.636255,device002,Forward,device002_LIV2.csv
4,1.152423,20.797109,0.767192,18.38734,5.584745,8414.256139,device003,Reverse,device003_LIV1.csv


In [9]:
df_day4.to_csv("datasets/perovai_devices_day4_rawLIV.csv", index=False)
print("Saved: datasets/perovai_devices_day4_rawLIV.csv")

Saved: datasets/perovai_devices_day4_rawLIV.csv
