In [1]:
import time
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error # Added for more metrics

# Configuration
NUM_DEVICES = 10000  # Adjust as needed

# --- Load data ---
# Using the same v9 files as specified in the KP PDF [cite: 2]
params_df = pd.read_csv('mosfet_params_v9.csv')
measurements_df = pd.read_csv('measurements_v9.csv')

# --- Preprocess data (Specific to KP model level1_NN_v2_kp.pdf) ---
X_list = []
y_list = [] # Target list for KP
mosfet_ids = []

# Create a temporary lookup for W and L from params_df
params_lookup = params_df.set_index('MOSFET_ID')

grouped = measurements_df.groupby('MOSFET_ID')
for mosfet_id, group in grouped:
    group_sorted = group.sort_values('meas_index')
    if len(group_sorted) == 561:
        try:
            # Get W and L for this MOSFET ID
            W = params_lookup.loc[mosfet_id, 'W']
            L = params_lookup.loc[mosfet_id, 'L']
            # Get KP and apply log transform [cite: 2]
            kp = params_lookup.loc[mosfet_id, 'KP']
            y_kp_log = np.log1p(kp)

            # Get measurement features
            measurement_features = group_sorted[['VGS', 'VDS', 'ID']].values.flatten()

            # Concatenate measurement features with W and L [cite: 2]
            features = np.concatenate((measurement_features, [W, L]))

            X_list.append(features)
            y_list.append(y_kp_log) # Append log-transformed KP
            mosfet_ids.append(mosfet_id)

        except KeyError:
            print(f"Warning: MOSFET_ID {mosfet_id} not found in params_df. Skipping.")
            continue # Skip if MOSFET ID isn't in both files

X = np.array(X_list)
y = np.array(y_list).reshape(-1, 1) # y now contains log1p(KP)

print(f"Input shape (X): {X.shape}") # Should be (num_devices, 1685) [cite: 7]
print(f"Target shape (y - log-transformed KP): {y.shape}")

# --- Split data ---
# Use the same random_state if comparing to a specific training run
X_train, X_test, y_train_log, y_test_log = train_test_split(X, y, test_size=0.2, random_state=42)

# We also need the original y_test for final evaluation
# Split original KP values (before log transform) for final comparison
y_original_kp = np.expm1(y) # Get original KP back from the full log-transformed y
_, y_test_original_kp = train_test_split(y_original_kp, test_size=0.2, random_state=42)


# --- Scale inputs ---
input_scaler = StandardScaler()
# Fit the input scaler ONLY on the training data
X_train_scaled = input_scaler.fit_transform(X_train)
# Apply the SAME fitted scaler to the test data
X_test_scaled = input_scaler.transform(X_test)

# --- Scale outputs (Log-transformed KP) ---
output_scaler = StandardScaler()
# Fit the output scaler ONLY on the log-transformed training data [cite: 4]
y_train_log_scaled = output_scaler.fit_transform(y_train_log)
# We need the FITTED output_scaler later for inverse transform

# --- Load model ---
# Load the specific KP model [cite: 36]
model = load_model('mosfet_kp_model_v2.keras')

# --- Adjust test set to NUM_DEVICES ---
num_test_devices = X_test_scaled.shape[0]
print(f"Number of devices in original test set: {num_test_devices}")

if num_test_devices >= NUM_DEVICES:
    X_test_scaled_adjusted = X_test_scaled[:NUM_DEVICES]
    # IMPORTANT: Adjust the ORIGINAL KP test set for final comparison
    y_test_original_kp_adjusted = y_test_original_kp[:NUM_DEVICES]
    print(f"Using first {NUM_DEVICES} devices from the test set")
elif num_test_devices > 0:
    print(f"Warning: Test set has fewer than {NUM_DEVICES} devices ({num_test_devices}). Using all available.")
    X_test_scaled_adjusted = X_test_scaled
    y_test_original_kp_adjusted = y_test_original_kp # Use the original KP y_test
    NUM_DEVICES = num_test_devices # Update NUM_DEVICES to actual number used
else:
    print("Error: No devices in the test set.")
    exit()

# --- Measure prediction time ---
start_time = time.time()
# Predict using the adjusted SCALED test set
predictions_log_scaled = model.predict(X_test_scaled_adjusted)
end_time = time.time()

# --- Inverse transform predictions ---
# Step 1: Inverse scale using the output_scaler fitted on y_train_log [cite: 6]
predictions_log = output_scaler.inverse_transform(predictions_log_scaled)
# Step 2: Inverse the log transform using np.expm1 [cite: 6]
predictions_unscaled_kp = np.expm1(predictions_log)

# --- Calculate time metrics ---
time_taken = end_time - start_time
predictions_per_second = NUM_DEVICES / time_taken if time_taken > 0 else 0

# --- Print timing results ---
print(f"\nTime taken to predict {NUM_DEVICES} devices: {time_taken:.3f} seconds")
print(f"Average predictions per second: {predictions_per_second:.2f}")
print(f"Average time per prediction: {(time_taken / NUM_DEVICES) * 1000:.3f} milliseconds" if NUM_DEVICES > 0 else "N/A")

# --- Calculate accuracy metrics using ORIGINAL KP values ---
# Compare predictions_unscaled_kp with y_test_original_kp_adjusted
# Avoid division by zero using a small epsilon as in the PDF [cite: 6]
epsilon = 1e-10
relative_errors = np.abs((predictions_unscaled_kp - y_test_original_kp_adjusted) / (y_test_original_kp_adjusted + epsilon)) * 100

mean_error_pct = np.mean(relative_errors)
median_error_pct = np.median(relative_errors)
std_error_pct = np.std(relative_errors)
min_error_pct = np.min(relative_errors)
max_error_pct = np.max(relative_errors)

# Print accuracy results
print("\nAccuracy Statistics (KP Prediction Errors in % - Corrected):")
print(f"Mean Error: {mean_error_pct:.2f}%") # Matches PDF format [cite: 7]
print(f"Median Error: {median_error_pct:.2f}%")
print(f"Standard Deviation: {std_error_pct:.2f}%")
print(f"Min Error: {min_error_pct:.2f}%")
print(f"Max Error: {max_error_pct:.2f}%")

# --- Calculate other metrics (Optional) ---
r2 = r2_score(y_test_original_kp_adjusted, predictions_unscaled_kp)
mse = mean_squared_error(y_test_original_kp_adjusted, predictions_unscaled_kp)
rmse = np.sqrt(mse)

print(f"\nR² Score: {r2:.4f}") # Matches PDF format [cite: 37]
# Using scientific notation for MSE/RMSE as in PDF [cite: 37]
print(f"MSE: {mse:.4e}")
print(f"RMSE: {rmse:.4e}")

Input shape (X): (50000, 1685)
Target shape (y - log-transformed KP): (50000, 1)
Number of devices in original test set: 10000
Using first 10000 devices from the test set
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

Time taken to predict 10000 devices: 1.389 seconds
Average predictions per second: 7197.94
Average time per prediction: 0.139 milliseconds

Accuracy Statistics (KP Prediction Errors in % - Corrected):
Mean Error: 9.36%
Median Error: 5.61%
Standard Deviation: 12.85%
Min Error: 0.00%
Max Error: 197.93%

R² Score: 0.9632
MSE: 6.2052e-10
RMSE: 2.4910e-05
