In [6]:
import time
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error # Added for more metrics

# Configuration
NUM_DEVICES = 11000  # Change this value to adjust the number of devices

# Load data
params_df = pd.read_csv('mosfet_params_level3_v11.csv')
measurements_df = pd.read_csv('measurements_level3_v11.csv')

# --- Preprocess data ---
X_list = []
mosfet_ids = []
grouped = measurements_df.groupby('MOSFET_ID')
for mosfet_id, group in grouped:
    group_sorted = group.sort_values('meas_index')
    if len(group_sorted) == 561:  # Ensure consistent data shape
        features = group_sorted[['VGS', 'VDS', 'ID']].values.flatten()
        X_list.append(features)
        mosfet_ids.append(mosfet_id)
X = np.array(X_list)

params_df.set_index('MOSFET_ID', inplace=True)
# Ensure y targets VTO specifically
y_list = [params_df.loc[mid, ['VTO']].values for mid in mosfet_ids]
y = np.array(y_list).reshape(-1, 1)

# --- Split data ---
# Use the same random_state if comparing to a specific training run
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Scale inputs ---
input_scaler = StandardScaler()
# Fit the input scaler ONLY on the training data
X_train_scaled = input_scaler.fit_transform(X_train)
# Apply the SAME fitted scaler to the test data
X_test_scaled = input_scaler.transform(X_test)

# --- Scale outputs (Target: VTO) ---
output_scaler = StandardScaler()
# Fit the output scaler ONLY on the training data
y_train_scaled = output_scaler.fit_transform(y_train)
# We need the FITTED output_scaler later for inverse transform

# --- Load model ---
# Ensure this model was trained using the scaled data as prepared above
model = load_model('mosfet_VTO_model_level3_v11.keras')

# --- Adjust test set to NUM_DEVICES ---
num_test_devices = X_test_scaled.shape[0]
print(f"Number of devices in original test set: {num_test_devices}")

if num_test_devices >= NUM_DEVICES:
    X_test_scaled_adjusted = X_test_scaled[:NUM_DEVICES]
    y_test_adjusted = y_test[:NUM_DEVICES] # Use the original y_test slice
    print(f"Using first {NUM_DEVICES} devices from the test set")
elif num_test_devices > 0: # Handle cases with fewer than NUM_DEVICES
     # Option 1: Use available devices (as done here)
    print(f"Warning: Test set has fewer than {NUM_DEVICES} devices ({num_test_devices}). Using all available.")
    X_test_scaled_adjusted = X_test_scaled
    y_test_adjusted = y_test # Use the original y_test
    NUM_DEVICES = num_test_devices # Update NUM_DEVICES to actual number used
    # Option 2: Repeat samples (use the tile logic from your original script if preferred)
    # repeat_factor = (NUM_DEVICES // num_test_devices) + 1
    # X_test_scaled_adjusted = np.tile(X_test_scaled, (repeat_factor, 1))[:NUM_DEVICES]
    # y_test_adjusted = np.tile(y_test, (repeat_factor, 1))[:NUM_DEVICES]
    # print(f"Adjusted test set by repeating samples to {NUM_DEVICES} devices")
else:
    print("Error: No devices in the test set.")
    exit() # Or handle error appropriately

# --- Measure prediction time ---
start_time = time.time()
# Predict using the adjusted SCALED test set
predictions_scaled = model.predict(X_test_scaled_adjusted)
end_time = time.time()

# --- Inverse transform predictions ---
# Use the output_scaler fitted on y_train
predictions_unscaled = output_scaler.inverse_transform(predictions_scaled)

# --- Calculate time metrics ---
time_taken = end_time - start_time
# Use the actual number of predictions made, stored in NUM_DEVICES
predictions_per_second = NUM_DEVICES / time_taken if time_taken > 0 else 0

# --- Print timing results ---
print(f"\nTime taken to predict {NUM_DEVICES} devices: {time_taken:.3f} seconds")
print(f"Average predictions per second: {predictions_per_second:.2f}")
print(f"Average time per prediction: {(time_taken / NUM_DEVICES) * 1000:.3f} milliseconds" if NUM_DEVICES > 0 else "N/A")

# --- Calculate accuracy metrics using UNSCALED values ---
# Compare predictions_unscaled with y_test_adjusted
# Avoid division by zero
non_zero_mask = y_test_adjusted != 0
relative_errors = np.full_like(y_test_adjusted, np.nan) # Initialize with NaN
relative_errors[non_zero_mask] = np.abs((predictions_unscaled[non_zero_mask] - y_test_adjusted[non_zero_mask]) / y_test_adjusted[non_zero_mask]) * 100

# Filter out NaNs before calculating statistics
valid_errors = relative_errors[~np.isnan(relative_errors)]

if len(valid_errors) > 0:
    mean_error_pct = np.mean(valid_errors)
    median_error_pct = np.median(valid_errors)
    std_error_pct = np.std(valid_errors)
    min_error_pct = np.min(valid_errors)
    max_error_pct = np.max(valid_errors)

    # Print accuracy results
    print("\nAccuracy Statistics (VTO Prediction Errors in % - Corrected):")
    print(f"Mean Error: {mean_error_pct:.2f}%")
    print(f"Median Error: {median_error_pct:.2f}%")
    print(f"Standard Deviation: {std_error_pct:.2f}%")
    print(f"Min Error: {min_error_pct:.2f}%")
    print(f"Max Error: {max_error_pct:.2f}%")

    # --- Calculate other metrics (Optional) ---
    r2 = r2_score(y_test_adjusted, predictions_unscaled)
    mse = mean_squared_error(y_test_adjusted, predictions_unscaled)
    rmse = np.sqrt(mse)

    print(f"\nR² Score: {r2:.5f}")
    print(f"MSE: {mse:.5f}")
    print(f"RMSE: {rmse:.5f}")
else:
    print("\nCould not calculate error statistics (possibly all target values were zero or test set was empty).")

Number of devices in original test set: 11000
Using first 11000 devices from the test set
[1m344/344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step

Time taken to predict 11000 devices: 0.713 seconds
Average predictions per second: 15432.54
Average time per prediction: 0.065 milliseconds

Accuracy Statistics (VTO Prediction Errors in % - Corrected):
Mean Error: 2.26%
Median Error: 1.54%
Standard Deviation: 2.61%
Min Error: 0.00%
Max Error: 71.95%

R² Score: 0.99170
MSE: 0.00056
RMSE: 0.02375
