In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans


In [2]:
df = pd.read_csv("../Week06/datasets/week06_device_features.csv")
df.head()

Unnamed: 0,Device,Voc_mean,Voc_max,Jsc_mean,Jsc_max,FF_mean,FF_std,leakage_slope,J0
0,device001_1,1.098735,1.14045,10.393274,19.918979,0.688449,0.038685,-0.005102,0.003786
1,device001_2,1.082345,1.11065,9.770149,18.563928,0.667659,0.022543,-0.00652,0.00457
2,device001_3,1.033197,1.11065,10.134377,19.455737,0.735948,0.208638,-0.004887,0.004208
3,device001_4,1.040642,1.11065,10.03564,19.258098,0.682396,0.053346,-0.004235,0.004003
4,device001_5,0.92447,1.11065,10.275039,20.005364,0.46746,0.290469,-7.885415,-0.057738


In [3]:
X = df.drop(columns=["Device"])
X = X.select_dtypes(include=[np.number])
X = X.fillna(X.median())

In [4]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:
kmeans = KMeans(n_clusters=3, random_state=42)
df["Cluster"] = kmeans.fit_predict(X_scaled)
df[["Device","Cluster"]]



Unnamed: 0,Device,Cluster
0,device001_1,2
1,device001_2,2
2,device001_3,2
3,device001_4,2
4,device001_5,0
5,device001_6,2
6,device001_7,2
7,device003_1,1
8,device003_2,1
9,device003_3,1


In [7]:
# make sure Cluster is numeric
df["Cluster"] = pd.to_numeric(df["Cluster"], errors="coerce")

# average ONLY numeric columns
cluster_means = df.groupby("Cluster").mean(numeric_only=True)
cluster_means

Unnamed: 0_level_0,Voc_mean,Voc_max,Jsc_mean,Jsc_max,FF_mean,FF_std,leakage_slope,J0
Cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.92447,1.11065,10.275039,20.005364,0.46746,0.290469,-7.885415,-0.057738
1,1.049873,1.108663,10.977152,22.154996,0.706393,0.046283,-0.370737,0.0028
2,1.036176,1.114907,10.127737,19.569909,0.717958,0.216764,-0.005839,0.004739


In [8]:
df.to_csv("week07_results.csv", index=False)
print("Saved → week07_results.csv")

Saved → week07_results.csv


In [9]:
df.groupby("Cluster").mean(numeric_only=True)

Unnamed: 0_level_0,Voc_mean,Voc_max,Jsc_mean,Jsc_max,FF_mean,FF_std,leakage_slope,J0
Cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.92447,1.11065,10.275039,20.005364,0.46746,0.290469,-7.885415,-0.057738
1,1.049873,1.108663,10.977152,22.154996,0.706393,0.046283,-0.370737,0.0028
2,1.036176,1.114907,10.127737,19.569909,0.717958,0.216764,-0.005839,0.004739


In [10]:
cluster_labels = {
    0: "Shunt limited",
    1: "Recombination limited",
    2: "Transport/interface limited"
}

df["Failure_Mode"] = df["Cluster"].map(cluster_labels)
df[["Device","Failure_Mode"]]

Unnamed: 0,Device,Failure_Mode
0,device001_1,Transport/interface limited
1,device001_2,Transport/interface limited
2,device001_3,Transport/interface limited
3,device001_4,Transport/interface limited
4,device001_5,Shunt limited
5,device001_6,Transport/interface limited
6,device001_7,Transport/interface limited
7,device003_1,Recombination limited
8,device003_2,Recombination limited
9,device003_3,Recombination limited


In [11]:
cluster_labels = {
    0: "Shunt-limited",
    1: "Recombination-limited",
    2: "Transport/interface-limited"
}

df["Failure_Mode"] = df["Cluster"].map(cluster_labels)
df[["Device","Cluster","Failure_Mode"]]

Unnamed: 0,Device,Cluster,Failure_Mode
0,device001_1,2,Transport/interface-limited
1,device001_2,2,Transport/interface-limited
2,device001_3,2,Transport/interface-limited
3,device001_4,2,Transport/interface-limited
4,device001_5,0,Shunt-limited
5,device001_6,2,Transport/interface-limited
6,device001_7,2,Transport/interface-limited
7,device003_1,1,Recombination-limited
8,device003_2,1,Recombination-limited
9,device003_3,1,Recombination-limited


In [12]:
def suggest(row):
    if row["Failure_Mode"] == "Recombination-limited":
        return "Reduce traps, improve bulk and interface passivation, optimize crystallization."
    if row["Failure_Mode"] == "Shunt-limited":
        return "Improve film coverage, reduce pinholes, improve substrate cleaning."
    if row["Failure_Mode"] == "Transport/interface-limited":
        return "Improve contacts, tune ETL/HTL thickness, reduce interface barriers."
    return "Stable device"

df["Suggestion"] = df.apply(suggest, axis=1)
df[["Device","Failure_Mode","Suggestion"]]

Unnamed: 0,Device,Failure_Mode,Suggestion
0,device001_1,Transport/interface-limited,"Improve contacts, tune ETL/HTL thickness, redu..."
1,device001_2,Transport/interface-limited,"Improve contacts, tune ETL/HTL thickness, redu..."
2,device001_3,Transport/interface-limited,"Improve contacts, tune ETL/HTL thickness, redu..."
3,device001_4,Transport/interface-limited,"Improve contacts, tune ETL/HTL thickness, redu..."
4,device001_5,Shunt-limited,"Improve film coverage, reduce pinholes, improv..."
5,device001_6,Transport/interface-limited,"Improve contacts, tune ETL/HTL thickness, redu..."
6,device001_7,Transport/interface-limited,"Improve contacts, tune ETL/HTL thickness, redu..."
7,device003_1,Recombination-limited,"Reduce traps, improve bulk and interface passi..."
8,device003_2,Recombination-limited,"Reduce traps, improve bulk and interface passi..."
9,device003_3,Recombination-limited,"Reduce traps, improve bulk and interface passi..."


In [13]:
df.to_csv("week07_with_guidance.csv", index=False)
print("Saved → week07_with_guidance.csv")

Saved → week07_with_guidance.csv


In [14]:
df.groupby("Failure_Mode").mean(numeric_only=True)

Unnamed: 0_level_0,Voc_mean,Voc_max,Jsc_mean,Jsc_max,FF_mean,FF_std,leakage_slope,J0,Cluster
Failure_Mode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Recombination-limited,1.049873,1.108663,10.977152,22.154996,0.706393,0.046283,-0.370737,0.0028,1.0
Shunt-limited,0.92447,1.11065,10.275039,20.005364,0.46746,0.290469,-7.885415,-0.057738,0.0
Transport/interface-limited,1.036176,1.114907,10.127737,19.569909,0.717958,0.216764,-0.005839,0.004739,2.0


In [15]:
df["Risk"] = df[["J0","leakage_slope","FF_std"]].rank(pct=True).mean(axis=1)
df.sort_values("Risk", ascending=False)[["Device","Failure_Mode","Risk","Suggestion"]].head(10)

Unnamed: 0,Device,Failure_Mode,Risk,Suggestion
13,device003_7,Transport/interface-limited,0.768116,"Improve contacts, tune ETL/HTL thickness, redu..."
14,device003_8,Recombination-limited,0.681159,"Reduce traps, improve bulk and interface passi..."
12,device003_6,Recombination-limited,0.666667,"Reduce traps, improve bulk and interface passi..."
17,device004_3,Recombination-limited,0.652174,"Reduce traps, improve bulk and interface passi..."
2,device001_3,Transport/interface-limited,0.652174,"Improve contacts, tune ETL/HTL thickness, redu..."
18,device004_4,Recombination-limited,0.637681,"Reduce traps, improve bulk and interface passi..."
5,device001_6,Transport/interface-limited,0.623188,"Improve contacts, tune ETL/HTL thickness, redu..."
3,device001_4,Transport/interface-limited,0.623188,"Improve contacts, tune ETL/HTL thickness, redu..."
11,device003_5,Recombination-limited,0.608696,"Reduce traps, improve bulk and interface passi..."
7,device003_1,Recombination-limited,0.594203,"Reduce traps, improve bulk and interface passi..."


In [16]:
df[["Device","Failure_Mode","Risk","Suggestion"]].sort_values("Risk", ascending=False)\
  .to_csv("perovai_lab_report.csv", index=False)

print("Saved → perovai_lab_report.csv")

Saved → perovai_lab_report.csv
