In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm # Use tqdm.notebook for Jupyter
import time

# --- Configuration ---
MEASUREMENTS_FILE = 'measurements_level3_v11.csv'
PARAMS_FILE = 'mosfet_params_level3_v11.csv'
VDS_SATURATION = 5.0  # Vds value for saturation region data
VGS_LINEAR_THRESHOLD_LOW = 0.5 # Lower Vgs bound for linear fitting
VGS_LINEAR_THRESHOLD_HIGH = 2 # Upper Vgs bound for linear fitting

In [2]:
def load_data(measurements_file, params_file):
    """Load the measurement and parameter CSV files into DataFrames."""
    df_meas = pd.DataFrame()
    df_params = pd.DataFrame()
    try:
        df_meas = pd.read_csv(measurements_file)
        print(f"Loaded {measurements_file}")
    except FileNotFoundError:
        print(f"Error: {measurements_file} not found.")

    try:
        df_params = pd.read_csv(params_file)
        print(f"Loaded {params_file}")
    except FileNotFoundError:
        print(f"Error: {params_file} not found.")

    # Find common MOSFET IDs
    if not df_meas.empty and not df_params.empty:
        common_ids = np.intersect1d(df_meas['MOSFET_ID'].unique(), df_params['MOSFET_ID'].unique())
        print(f"\nFound {len(common_ids)} MOSFET IDs common to both files.")
        # Filter DataFrames to only include common IDs for efficiency
        df_meas = df_meas[df_meas['MOSFET_ID'].isin(common_ids)].copy()
        df_params = df_params[df_params['MOSFET_ID'].isin(common_ids)].copy()
        print(f"DataFrames filtered to common IDs.")
    else:
        common_ids = []
        print("\nCould not find common IDs due to missing files.")


    return df_meas, df_params, common_ids

# Load data once
df_meas, df_params, common_ids = load_data(MEASUREMENTS_FILE, PARAMS_FILE)

Loaded measurements_level3_v11.csv
Loaded mosfet_params_level3_v11.csv

Found 55000 MOSFET IDs common to both files.
DataFrames filtered to common IDs.


In [3]:
def process_mosfets_kp(df_meas, df_params, common_ids, num_devices, vds_saturation, vgs_low, vgs_high):
    """
    Process MOSFET data to calculate Vth and KP for a specified number of devices.
    Returns a DataFrame with results.
    """
    results = []
    processed_ids = []

    if not df_meas.empty and not df_params.empty and len(common_ids) > 0:

        # Limit the number of devices if specified
        if num_devices is not None and num_devices < len(common_ids):
            ids_to_process = common_ids[:num_devices]
            print(f"\nProcessing first {num_devices} of {len(common_ids)} common MOSFETs...")
        else:
            ids_to_process = common_ids
            print(f"\nProcessing all {len(common_ids)} common MOSFETs...")

        # Group measurements by MOSFET_ID for potentially faster access (optional optimization)
        # measurements_grouped = df_meas.groupby('MOSFET_ID')
        # params_dict = df_params.set_index('MOSFET_ID').to_dict('index') # Faster lookup

        start_time = time.time()

        for mosfet_id in tqdm(ids_to_process, desc="Calculating Vth & KP"):
            # --- Initialization for this MOSFET ---
            vth_extrapolated = np.nan
            kp_extracted = np.nan
            kp_actual = np.nan
            vth_actual = np.nan
            w_val = np.nan
            l_val = np.nan
            kp_percent_error = np.nan

            # --- Get Data for Current MOSFET ---
            # df_single_mosfet = measurements_grouped.get_group(mosfet_id) # Faster if using groupby
            df_single_mosfet = df_meas[df_meas['MOSFET_ID'] == mosfet_id] # Standard filtering
            # actual_params_dict = params_dict.get(mosfet_id) # Faster if using dict
            actual_params_df = df_params[df_params['MOSFET_ID'] == mosfet_id] # Standard filtering

            if actual_params_df.empty:
                # print(f"Warning: Parameters not found for MOSFET_ID {mosfet_id}. Skipping.") # Can be noisy
                continue # Skip if no params found

            # --- Saturation Data ---
            df_sat = df_single_mosfet[np.isclose(df_single_mosfet['VDS'], vds_saturation)].copy()
            if df_sat.empty:
                continue # Skip if no saturation data

            # --- Sqrt(Id) ---
            df_sat['SQRT_ID'] = np.sqrt(df_sat['ID'].clip(lower=1e-15)) # Clip ensures non-negative

            # --- Linear Region Data ---
            df_linear_region = df_sat[
                (df_sat['VGS'] > vgs_low) &
                (df_sat['VGS'] <= vgs_high) &
                (df_sat['ID'] > 1e-9) # Avoid points too close to zero
            ]

            # --- Linear Fit ---
            if len(df_linear_region) >= 2:
                vgs_linear = df_linear_region['VGS']
                sqrt_id_linear = df_linear_region['SQRT_ID']

                try:
                    slope, intercept = np.polyfit(vgs_linear, sqrt_id_linear, 1)

                    if slope > 1e-9: # Check for non-zero positive slope
                        # --- Calculate Vth ---
                        vth_extrapolated = -intercept / slope

                        # --- Calculate KP ---
                        kp_times_W_div_L = 2 * (slope**2)

                        w_val = actual_params_df['W'].iloc[0]
                        l_val = actual_params_df['L'].iloc[0]
                        W_div_L = w_val / l_val if l_val != 0 else np.nan

                        if W_div_L != 0 and not np.isnan(W_div_L):
                             kp_extracted = kp_times_W_div_L / W_div_L
                        else:
                             kp_extracted = np.nan # Cannot calculate KP if W/L is invalid

                    # else: Vth/KP remain NaN if slope is zero or negative

                except (np.linalg.LinAlgError, ValueError):
                    # Fit failed, Vth/KP remain NaN
                    pass # Silently skip on fit failure for bulk processing

            # --- Get Actual Values & Calculate Error ---
            vth_actual = actual_params_df['VTO'].iloc[0]
            kp_actual = actual_params_df['KP'].iloc[0]

            if not np.isnan(kp_extracted) and not np.isnan(kp_actual):
                if kp_actual != 0:
                    kp_percent_error = abs((kp_extracted - kp_actual) / kp_actual) * 100
                elif kp_extracted == 0:
                     kp_percent_error = 0.0 # Both zero -> zero error
                else:
                     kp_percent_error = float('inf') # Actual is zero, extracted is not

            # --- Store Results ---
            results.append({
                'MOSFET_ID': mosfet_id,
                'Actual_KP': kp_actual,
                'Extrapolated_KP': kp_extracted,
                'KP_Percentage_Error': kp_percent_error,
                'Actual_Vth': vth_actual,
                'Extrapolated_Vth': vth_extrapolated,
                'W': w_val,
                'L': l_val,
            })
            processed_ids.append(mosfet_id)

        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f"\nTime taken to process {len(processed_ids)} MOSFETs: {elapsed_time:.2f} seconds")

        return pd.DataFrame(results)

    else:
        print("\nCannot proceed. Ensure measurement and parameter data are loaded and common IDs exist.")
        return pd.DataFrame()

In [4]:
def display_kp_results(df_kp_results):
    """Display KP results and analyze percentage errors."""
    if not df_kp_results.empty:
        print("\n--- KP Calculation and Comparison Results ---")
        # (Code to display parts of the DataFrame) ...

        # Analyze KP Percentage Error
        # Filter out non-numeric results before calculating stats
        valid_kp_errors = df_kp_results['KP_Percentage_Error'].dropna().replace([np.inf, -np.inf], np.nan).dropna()

        if not valid_kp_errors.empty: # Check if any valid errors exist
            print("\n--- Summary Statistics for KP Percentage Error ---")
            # >>> Calculations for mean and median are here <<<
            print(f"Mean Error:        {valid_kp_errors.mean():.2f}%")
            print(f"Median Error:      {valid_kp_errors.median():.2f}%")
            # >>>                                           <<<
            print(f"Standard Deviation:{valid_kp_errors.std():.2f}%")
            print(f"Min Error:         {valid_kp_errors.min():.2f}%")
            print(f"Max Error:         {valid_kp_errors.max():.2f}%")
            num_valid = len(valid_kp_errors)
            num_total = len(df_kp_results)
            num_nan_inf = num_total - num_valid
            print(f"MOSFETs Analyzed:  {num_total}")
            print(f"Valid KP Errors:   {num_valid} ({num_nan_inf} NaN/Inf errors excluded)")

            # (Code to plot histogram) ...

        else:
            # This message prints if NO valid errors were found
            print("\nNo valid KP percentage errors calculated to summarize.")
    else:
        print("\nNo results to display.")

In [7]:
# --- Main Execution ---

# Define how many devices to process (set to None to process all common IDs)
NUM_DEVICES_TO_PROCESS = 10000  # <<< MODIFY THIS VALUE OR SET TO None >>>

# Ensure data is loaded before processing
if 'df_meas' in locals() and 'df_params' in locals() and not df_meas.empty and not df_params.empty:
    # Process the MOSFETs
    df_kp_results = process_mosfets_kp(
        df_meas,
        df_params,
        common_ids, # Pass the list of common IDs
        NUM_DEVICES_TO_PROCESS,
        VDS_SATURATION,
        VGS_LINEAR_THRESHOLD_LOW,
        VGS_LINEAR_THRESHOLD_HIGH
    )

    # Display the results
    display_kp_results(df_kp_results)
else:
    print("Please ensure data is loaded successfully in Block 2 before running this block.")


Processing first 10000 of 55000 common MOSFETs...


Calculating Vth & KP:   0%|          | 0/10000 [00:00<?, ?it/s]


Time taken to process 10000 MOSFETs: 290.58 seconds

--- KP Calculation and Comparison Results ---

--- Summary Statistics for KP Percentage Error ---
Mean Error:        46.46%
Median Error:      44.49%
Standard Deviation:15.07%
Min Error:         14.04%
Max Error:         92.32%
MOSFETs Analyzed:  10000
Valid KP Errors:   10000 (0 NaN/Inf errors excluded)
