In [4]:
import time
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error # Added for more metrics

# Configuration
NUM_DEVICES = 10000  # Adjust as needed
KP_MIN = 50e-6       # Minimum KP value for bottom clipping, from PDF [cite: 37]

# --- Load data ---
# Using the level3_v11 files specified in the PDF [cite: 2]
params_df = pd.read_csv('mosfet_params_level3_v11.csv')
measurements_df = pd.read_csv('measurements_level3_v11.csv')

# --- Preprocess data (Specific to KP model level3_NN_v1_kp.pdf) ---
X_list = []
y_list = [] # Target list for KP
mosfet_ids = []

# Create a temporary lookup for W and L from params_df
params_lookup = params_df.set_index('MOSFET_ID')

grouped = measurements_df.groupby('MOSFET_ID')
for mosfet_id, group in grouped:
    group_sorted = group.sort_values('meas_index')
    if len(group_sorted) == 561:
        try:
            # Get W and L for this MOSFET ID [cite: 2]
            W = params_lookup.loc[mosfet_id, 'W']
            L = params_lookup.loc[mosfet_id, 'L']
            # Get KP and apply log transform [cite: 2]
            kp = params_lookup.loc[mosfet_id, 'KP']
            y_kp_log = np.log1p(kp)

            # Get measurement features [cite: 2]
            measurement_features = group_sorted[['VGS', 'VDS', 'ID']].values.flatten()

            # Concatenate measurement features with W and L [cite: 2]
            features = np.concatenate((measurement_features, [W, L]))

            X_list.append(features)
            y_list.append(y_kp_log) # Append log-transformed KP
            mosfet_ids.append(mosfet_id)

        except KeyError:
            print(f"Warning: MOSFET_ID {mosfet_id} not found in params_df. Skipping.")
            continue

X = np.array(X_list)
y = np.array(y_list).reshape(-1, 1) # y contains log1p(KP)

print(f"Input shape (X): {X.shape}") # Should be (num_devices, 1685) [cite: 8]
print(f"Target shape (y - log-transformed KP): {y.shape}")

# --- Split data ---
# Use the same random_state if comparing to a specific training run
X_train, X_test, y_train_log, y_test_log = train_test_split(X, y, test_size=0.2, random_state=42) # [cite: 4]

# Get original y_test KP values (before log transform) for final evaluation
y_original_kp = np.expm1(y)
_, y_test_original_kp = train_test_split(y_original_kp, test_size=0.2, random_state=42)

# --- Scale inputs ---
input_scaler = StandardScaler()
X_train_scaled = input_scaler.fit_transform(X_train) # [cite: 4]
X_test_scaled = input_scaler.transform(X_test) # [cite: 4]

# --- Scale outputs (Log-transformed KP) ---
output_scaler = StandardScaler()
y_train_log_scaled = output_scaler.fit_transform(y_train_log) # [cite: 4]
# We need the FITTED output_scaler later

# --- Load model ---
# Load the specific KP model (Note: PDF saves v2) [cite: 36]
model_filename = 'mosfet_level3_kp_model_v2.keras'
print(f"Loading model: {model_filename}")
model = load_model(model_filename)

# --- Adjust test set to NUM_DEVICES ---
num_test_devices = X_test_scaled.shape[0]
print(f"Number of devices in original test set: {num_test_devices}")

if num_test_devices >= NUM_DEVICES:
    X_test_scaled_adjusted = X_test_scaled[:NUM_DEVICES]
    y_test_original_kp_adjusted = y_test_original_kp[:NUM_DEVICES]
    print(f"Using first {NUM_DEVICES} devices from the test set")
elif num_test_devices > 0:
    print(f"Warning: Test set has fewer than {NUM_DEVICES} devices ({num_test_devices}). Using all available.")
    X_test_scaled_adjusted = X_test_scaled
    y_test_original_kp_adjusted = y_test_original_kp
    NUM_DEVICES = num_test_devices
else:
    print("Error: No devices in the test set.")
    exit()

# --- Measure prediction time ---
start_time = time.time()
predictions_log_scaled = model.predict(X_test_scaled_adjusted) # [cite: 6]
end_time = time.time()

# --- Inverse transform predictions ---
predictions_log = output_scaler.inverse_transform(predictions_log_scaled)
predictions_unclipped = np.expm1(predictions_log) # [cite: 37]

# --- Apply Bottom Clipping ---
predictions_clipped = np.maximum(predictions_unclipped, KP_MIN) # [cite: 37]

# --- Calculate time metrics ---
time_taken = end_time - start_time
predictions_per_second = NUM_DEVICES / time_taken if time_taken > 0 else 0

# --- Print timing results ---
print(f"\nTime taken to predict {NUM_DEVICES} devices: {time_taken:.3f} seconds")
print(f"Average predictions per second: {predictions_per_second:.2f}")
print(f"Average time per prediction: {(time_taken / NUM_DEVICES) * 1000:.3f} milliseconds" if NUM_DEVICES > 0 else "N/A")

# --- Calculate and Print Accuracy Metrics ---
epsilon = 1e-10 # To avoid division by zero [cite: 6]

# Metrics without clipping
print("\n--- Accuracy Statistics (Without Clipping) ---")
relative_errors_unclipped = np.abs((predictions_unclipped - y_test_original_kp_adjusted) / (y_test_original_kp_adjusted + epsilon)) * 100
mpe_unclipped = np.mean(relative_errors_unclipped)
r2_unclipped = r2_score(y_test_original_kp_adjusted, predictions_unclipped)
mse_unclipped = mean_squared_error(y_test_original_kp_adjusted, predictions_unclipped)
rmse_unclipped = np.sqrt(mse_unclipped)

print(f"Mean Percentage Error: {mpe_unclipped:.2f}%")
print(f"R² Score: {r2_unclipped:.4f}")
print(f"MSE: {mse_unclipped:.15f}") # Match PDF precision [cite: 39]
print(f"RMSE: {rmse_unclipped:.10f}") # Match PDF precision [cite: 39]

# Metrics with bottom clipping
print("\n--- Accuracy Statistics (With Bottom Clipping at KP_MIN={:.1e}) ---".format(KP_MIN)) # [cite: 37]
relative_errors_clipped = np.abs((predictions_clipped - y_test_original_kp_adjusted) / (y_test_original_kp_adjusted + epsilon)) * 100
mpe_clipped = np.mean(relative_errors_clipped)
r2_clipped = r2_score(y_test_original_kp_adjusted, predictions_clipped)
mse_clipped = mean_squared_error(y_test_original_kp_adjusted, predictions_clipped)
rmse_clipped = np.sqrt(mse_clipped)

print(f"Mean Percentage Error: {mpe_clipped:.2f}%") # [cite: 39]
print(f"R² Score: {r2_clipped:.4f}") # [cite: 39]
print(f"MSE: {mse_clipped:.15f}") # Match PDF precision [cite: 39]
print(f"RMSE: {rmse_clipped:.10f}") # Match PDF precision [cite: 39]


# --- Clipping Statistics --- [cite: 37]
n_samples = len(y_test_original_kp_adjusted)
n_clipped_below = np.sum(predictions_unclipped < KP_MIN)
pct_clipped_below = (n_clipped_below / n_samples) * 100 if n_samples > 0 else 0

print(f"\n--- Clipping Details (out of {n_samples} samples) ---")
print(f"Values clipped below KP_MIN ({KP_MIN:.1e}): {n_clipped_below} ({pct_clipped_below:.2f}%)")

# Optional: Print min/max/mean as in PDF for comparison [cite: 37]
print("\n--- Value Range Summary ---")
print(f"True KP min/max/mean:           {y_test_original_kp_adjusted.min():.4e} / {y_test_original_kp_adjusted.max():.4e} / {y_test_original_kp_adjusted.mean():.4e}")
print(f"Unclipped Predictions min/max/mean: {predictions_unclipped.min():.4e} / {predictions_unclipped.max():.4e} / {predictions_unclipped.mean():.4e}")
print(f"Clipped Predictions min/max/mean:   {predictions_clipped.min():.4e} / {predictions_clipped.max():.4e} / {predictions_clipped.mean():.4e}")

Input shape (X): (55000, 1685)
Target shape (y - log-transformed KP): (55000, 1)
Loading model: mosfet_level3_kp_model_v2.keras
Number of devices in original test set: 11000
Using first 10000 devices from the test set
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

Time taken to predict 10000 devices: 1.273 seconds
Average predictions per second: 7853.12
Average time per prediction: 0.127 milliseconds

--- Accuracy Statistics (Without Clipping) ---
Mean Percentage Error: 6.81%
R² Score: 0.9700
MSE: 0.000000000508705
RMSE: 0.0000225545

--- Accuracy Statistics (With Bottom Clipping at KP_MIN=5.0e-05) ---
Mean Percentage Error: 6.81%
R² Score: 0.9700
MSE: 0.000000000508705
RMSE: 0.0000225545

--- Clipping Details (out of 10000 samples) ---
Values clipped below KP_MIN (5.0e-05): 0 (0.00%)

--- Value Range Summary ---
True KP min/max/mean:           5.0007e-05 / 4.9994e-04 / 2.7556e-04
Unclipped Predictions min/max/mean: 5.7704e-05 / 5.0096e-04 / 2.7345e-04
Clip