## Wafer Test Simulation, Data Analysis with Python
This Jupyter Notebook produces the following:
<ul>
    <li>test data for a  simulated wafer, 
    with X/Y coordinates & PART_ID, and data for 7 tests</li>
    <li>A test limits table</li>
    <li>A wafer map showing the PASS/FAIL result of each test</li>
    <li>heat maps by test to show variation by wafer location</li>
    <li>histogram of selected tests</li>    
    <li>correlation between selected pairs of tests, 
    with best fit and stats</li></li>        
    <li>interactive 3-D contour maps by test </li>
</ul>

In [None]:
import polars as pl
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os
import itertools
from scipy import stats


## Globals and Functions

In [None]:
%%time
sites = [1,2,3,4]

# constants
# round wafer, same number of die in x, y dimension
x_die = 51                 # 1 through 51 inclusive
y_die = x_die              # 1 through 51 inclusive
sqr_waf_die_num = x_die*y_die
wafer_center_x = x_die // 2
wafer_center_y = y_die // 2
wafer_radius = wafer_center_x
os.makedirs('HTML', exist_ok=True)

def get_random(count, offset, mean, sigma, seed=1):
    ''' generate pseudo random data '''
    np.random.seed(seed)
    return [offset + np.random.normal(mean, sigma) for i in range(count)]

def get_limit_item(df_limits, test, item):
    return df_limits.filter(pl.col('TEST_NAME') == test).item(0, item)


## Limit Table
Limit tables contain test limits and units for every measurement. For a test to pass, its measurement result fall within the boundaries of the test limits

In [None]:
%%time
test_list = ['OFFSET_VOLTAGE','BANDGAP_VOLTAGE', 'PIN1_LEAKAGE', 'PIN2_LEAKAGE',
             'SUPPLY_CURRENT', 'PROP_DELAY','PLL_FREQ']
df_limits = pl.DataFrame(
    {
        'TEST_NUM'  : [num for num in range(100, 800, 100)],
        'TEST_NAME' : test_list,
        'LO_LIMIT'  : [-2.0, 1.175, 0.0, 0.0, 10.0, 16.5, 19.5],
        'HI_LIMIT'  : [ 2.0, 1.225, 1.5, 1.5, 15.0, 17.5, 20.5],
        'UNITS'     :    ['mV', 'V', 'uA', 'uA', 'mA', 'nS', 'MHz'],
    }
)


## Make test data
This is all simulated, not from real device or design

In [None]:
df_wafer = (
    pl.LazyFrame(  # Polars dataframes are fast, lazy frames are faster
        {
            'X_COORD'         : 1 + (np.arange(0,sqr_waf_die_num,1) // x_die),
            'Y_COORD'         : 1 + (np.arange(0,sqr_waf_die_num,1) % y_die),
            'PIN1_LEAKAGE'    : get_random(sqr_waf_die_num, 0.1, 0.0, 0.1, 20), 
            'PIN2_LEAKAGE'    : get_random(sqr_waf_die_num, 0.1,  0.0, 0.1, 25), 
            'SUPPLY_CURRENT'  : get_random(sqr_waf_die_num, 12.0, 0.0, 1.0, 30),
            'PROP_DELAY'      : get_random(sqr_waf_die_num, 17.05, 0.0, 0.2, 35),
            'PLL_FREQ'        : get_random(sqr_waf_die_num, 20.0, 0.0, 0.2, 40), 
            'OFFSET_VOLTAGE'  : get_random(sqr_waf_die_num, 0.0, 0.0, 0.8, 45), 
            'BANDGAP_VOLTAGE' : get_random(sqr_waf_die_num, 1.2, 0.0, 0.01, 50),
            'DEVICE_PF'       : ['PASS' for i in range(sqr_waf_die_num)]
        }
    )
    .with_columns(
        from_center = (
            (
                (pl.col('Y_COORD') - 25).pow(2) +
                (pl.col('X_COORD') - 25).pow(2)
            ).sqrt()
        )
    )
    .filter(pl.col('from_center')<= (wafer_radius - 0.8))
    .filter(pl.col('Y_COORD') < 49)  # adds a flat edge
    .with_row_index('PART_ID', offset=1)
    .with_columns(SITE = (pl.col('PART_ID')-1).mod(4)+1)
    .with_columns(SITE = pl.lit('S') + pl.col('SITE').cast(pl.String))
    .with_columns(TOUCHDOWN = pl.col('PART_ID').cum_count().over('SITE'))
    .with_columns(
        PIN1_LEAKAGE = (pl.col('PIN1_LEAKAGE') + (pl.col('from_center')/25)),
        PIN2_LEAKAGE = (pl.col('PIN2_LEAKAGE') + (pl.col('from_center')/25)),
        PLL_FREQ = (
            pl.when(pl.col('SITE') == 'S3')
            .then (pl.col('PLL_FREQ')+ 0.5)
            .otherwise('PLL_FREQ')
        ),
    )
    .collect()   # this converts lazy frame to data frame by query execution
)

# add PF column for each test
for test in test_list:
    lo_limit = get_limit_item(df_limits, test, 'LO_LIMIT')
    hi_limit = get_limit_item(df_limits, test, 'HI_LIMIT')
    df_wafer = (
        pl.LazyFrame(df_wafer)
        .with_columns(   # Calcualte pass or fail result of every measurement
                pl.when(
                    (pl.col(test) < lo_limit) | 
                    (pl.col(test) > hi_limit)
                )
                .then(pl.lit('F'))
                .otherwise(pl.lit('P'))
                .alias(test+'_PF')
        )
        .with_columns(
            DEVICE_PF = 
                pl.when(pl.col(test+'_PF') == 'F')
                .then(pl.lit('FAIL'))
                .otherwise('DEVICE_PF')
        )
        .with_columns(
            DEVICE_PF_INT = 
                pl.when(pl.col('DEVICE_PF') == 'PASS')
                .then(pl.lit(1))
                .otherwise(pl.lit(0))
        )
        .collect()
    )
            
# re-order the columns
pf_cols = [t + '_PF' for t in test_list]

df_wafer = (
    df_wafer
    .select(
        pl.col(
            ['PART_ID', 'TOUCHDOWN', 'X_COORD', 'Y_COORD', 'SITE', 'DEVICE_PF','DEVICE_PF_INT'] +
            sorted(test_list + pf_cols)
        ) 
    )
)   
die_count = len(df_wafer)
pass_count = len(df_wafer.filter(pl.col('DEVICE_PF') == 'PASS'))
fail_count = len(df_wafer.filter(pl.col('DEVICE_PF') == 'FAIL'))
wafer_yield = 100*pass_count/die_count
site_list = sorted(list(set(df_wafer['SITE'])))

df_wafer.write_csv('wafer.csv')
df_wafer

## Wafer statistics
die count and yield

In [None]:
print(f'{die_count = :,d}')
print(f'{pass_count = :,d}')
print(f'{fail_count = :,d}')
print(f'{wafer_yield = :.1f}')
print(f'{site_list = }')

## 3-D Heatmap

In [None]:
fig = go.Figure(
    data = go.Heatmap(
        z = df_wafer['DEVICE_PF_INT'],
        x = df_wafer['Y_COORD'],
        y = df_wafer['X_COORD'],
        colorscale = ['crimson', 'lightgreen'], # color palette for heatmap,
        xgap=0.5, ygap=0.5,
        customdata=np.stack(
            (
                df_wafer['X_COORD'],
                df_wafer['Y_COORD'],
                df_wafer['SITE'],
                df_wafer['DEVICE_PF'],
            ), 
        axis=-1
        ),
    ),
)

my_grid_color = 'lightblue'

wf_title = (
    'Wafer Map (Pass/Fail) --  SIMULATED DATA<br>'.upper() +
    f'<sup>Count: {die_count:,}   ' +
    f'Pass: {pass_count:,}   ' +  
    f'Fail: {fail_count:,}   ' + 
    f'Yield:  <b>{wafer_yield:.1f}%</b></sup>'
)
fig.update_layout(
    template='simple_white',
    height=800,
    width=800,
    title=wf_title,
    xaxis_title='X_COORD', 
    xaxis = dict(tick0=0, dtick=5, showgrid=True, gridcolor=my_grid_color),
    yaxis_title='Y_COORD',
    yaxis = dict(tick0=0, dtick=5, showgrid=True,  gridcolor=my_grid_color),
)
fig.update_traces(
    showscale=False,
    hovertemplate =
        '<br><b>X,Y</b>:  %{x}, %{y}'+
        '<br><b>SITE</b>: %{customdata[2]}'+
        '<br><b>%{customdata[3]}</b>' +
        '<extra></extra>',
)
fig.write_html('./HTML/Pass_Fail_Wafer_Map.html')
fig.show()

## Generate Wafer Map by Test

In [None]:
for i, test in enumerate(test_list):
    if i > 0 : #in [3, 6]:
        test_count = len(df_wafer.drop_nulls(test))
        test_pass = len(df_wafer.filter(pl.col(test + '_PF') == 'P'))
        test_fail = len(df_wafer.filter(pl.col(test + '_PF') == 'F'))
        test_yield = 100 * test_pass / test_count
        lo_limit = get_limit_item(df_limits, test, 'LO_LIMIT')
        hi_limit = get_limit_item(df_limits, test, 'HI_LIMIT')
        units = get_limit_item(df_limits, test, 'UNITS')
        fig = go.Figure(
            data = go.Heatmap(
                z = df_wafer[test],
                x = df_wafer['Y_COORD'],
                y = df_wafer['X_COORD'],
                # colorscale = ['red', 'green'], # color palette for heatmap,
                xgap=0.5, ygap=0.5,
                customdata=np.stack(
                    (
                        df_wafer['X_COORD'],
                        df_wafer['Y_COORD'],
                        df_wafer['SITE'],
                        df_wafer[test],
                        df_wafer[test+'_PF'],
                    ), 
                axis=-1
                ),
            ),
        )
        wf_title = (
            f'{test} heat map'.upper() + '[' + units + '] --  SIMULATED DATA<br>' + 
            f'<sup>LSL: {lo_limit:.3f}   '  + 
            f'USL: {hi_limit:.3f}   '  +
            f'Count: {test_count:,}   ' +
            f'Pass: {test_pass:,}   ' +  
            f'Fail: {test_fail:,}   ' + 
            f'Test Yield:  <b>{test_yield:.1f}%</b></sup>'
        )
        
        my_grid_color = 'lightblue'
        fig.update_layout(
            template='simple_white',
            height=800,
            width=800,
            title=wf_title,
            xaxis_title='X_COORD', 
            xaxis = dict(tick0=0, dtick=5, showgrid=True, gridcolor=my_grid_color),
            yaxis_title='Y_COORD',
            yaxis = dict(tick0=0, dtick=5, showgrid=True,  gridcolor=my_grid_color),
        )
        fig.update_traces(
            showscale=True,
            hovertemplate =
                '<br><b>X,Y</b>:  %{x}, %{y}'+
                '<br><b>SITE</b>: %{customdata[2]}'+
                '<br><b>Meas Result = %{customdata[3]:.3f}</b>' +
                '<br><b>Test PF = %{customdata[4]}</b>' +
                '<extra></extra>',
        )
        fig.write_html(f'./HTML/Wafer_Heat_Map_{test}.html')
        fig.show()

## Generate Histogram of selected Tests

In [None]:
for i, test in enumerate(test_list):
    if i in [4, 6]:
        test_count = len(df_wafer.drop_nulls(test))
        test_pass = len(df_wafer.filter(pl.col(test + '_PF') == 'P'))
        test_fail = len(df_wafer.filter(pl.col(test + '_PF') == 'F'))
        test_yield = 100 * test_pass / test_count
        lo_limit = get_limit_item(df_limits, test, 'LO_LIMIT')
        hi_limit = get_limit_item(df_limits, test, 'HI_LIMIT')
        units = get_limit_item(df_limits, test, 'UNITS')
        df_test = (
            df_wafer
            .select(pl.col(test, 'SITE', 'TOUCHDOWN'))
            .pivot(on='SITE', values=test, index='TOUCHDOWN')
        )
        fig = go.Figure()
        for i, site in enumerate(site_list):
            fig.add_trace(
                go.Histogram(
                    x=df_test[site],
                    name=site,
                    opacity=0.5,
                    ),
                )
        fig.update_layout(
            template='simple_white',
            barmode='overlay',
            height=600, width=900,
            title=(
                test + ' --  SIMULATED DATA<br>' +
                f'<sup>Count: {test_count:,}   ' +
                f'Pass: {test_pass:,}   ' +  
                f'Fail: {test_fail:,}   ' + 
                f'Yield:  <b>{test_yield:.1f}%</b></sup>'
            ),
            xaxis_title=test + ' [' + units + ']<br>', 
            yaxis_title='COUNT',
            legend=dict(title='TEST SITE'),
            )
        fig.add_vline(x=lo_limit,line_width=3, line_dash="dash", line_color='gray')
        fig.add_vline(x=hi_limit,line_width=3, line_dash="dash", line_color='gray')
        fig.add_annotation(
            text='Lower<br>Limit',
            x=lo_limit, xref='x', 
            y=1, yref='paper',
            showarrow=False,
            xanchor='left',
            yanchor='top',
        )
        fig.add_annotation(
            text='Upper<br>Limit',
            x=hi_limit, xref='x', 
            y=1, yref='paper',
            showarrow=False,
            xanchor='right',
            yanchor='top',
        )
        fig.write_html(f'./HTML/Histogram_{test}.html')
        fig.show()

## Generate Corellation plots of selected Tests

In [None]:
test_pairs = list(itertools.combinations(test_list, 2))
for i, pair in enumerate(test_pairs, start=1):
    test_1 = pair[0]
    test_2 = pair[1]
    res = stats.linregress(df_wafer[test_1], df_wafer[test_2])
    r = float(res.rvalue)
    slope = float(res.slope)
    y_intercept = float(res.intercept)
    x_min = df_wafer[test_1].min()
    x_max = df_wafer[test_1].max()
    y_x_min = float(slope * x_min + y_intercept)
    y_x_max = float(slope * x_max + y_intercept)

    if ((i==1) or (abs(r) > 0.6)):
        fig=px.scatter(
            df_wafer,
            x=test_1,
            y=test_2,
            template='simple_white',
            height=800, width=800,
            color='SITE'
        )
        fig.update_layout(
            template='simple_white',
            barmode='overlay',
            height=600, width=800,
            title= (
                f'CORRELATION: {test_1} vs. {test_2}  --  SIMULATED DATA' + 
                f'<br><sup>r = {r:.3f}   slope = {slope:.3f}  intercept = {y_intercept:.3f} </sup>'
            ),
            xaxis_title=test_1 + ' [' + get_limit_item(df_limits, test_1, 'UNITS') + ']<br>', 
            yaxis_title=test_2 + ' [' + get_limit_item(df_limits, test_2, 'UNITS') + ']', 
            legend=dict(title='TEST SITE'),
        )
        fig.add_shape(type="line",
              x0=x_min, 
              y0=y_x_min, 
              x1=x_max, 
              y1=y_x_max,
              line_width=2, 
              line_dash='solid',
              line_color='gray',
              opacity=1
        )
        fig.add_annotation(
            text='Best<br>Fit',
            x=x_max, xref='x', 
            y=y_x_max, yref='y',
            showarrow=False,
            xanchor='center',
            yanchor='bottom'
        )
        test_1_lo_limit = get_limit_item(df_limits, test_1, 'LO_LIMIT')
        test_1_hi_limit = get_limit_item(df_limits, test_1, 'HI_LIMIT')
        test_2_lo_limit = get_limit_item(df_limits, test_2, 'LO_LIMIT')
        test_2_hi_limit = get_limit_item(df_limits, test_2, 'HI_LIMIT')
        fig.add_vline(x=test_1_lo_limit, line_width=2, line_dash="dash", line_color='gray')
        fig.add_vline(x=test_1_hi_limit, line_width=2, line_dash="dash", line_color='gray')
        fig.add_hline(y=test_2_lo_limit, line_width=2, line_dash="dash", line_color='gray')
        fig.add_hline(y=test_2_hi_limit, line_width=2, line_dash="dash", line_color='gray')
        fig.write_html(f'./HTML/Correlation_{test_1}_{test_2}_{str(i).zfill(2)}.html')
        fig.show()

## Generate 3-D Surface/Contour map of selected Tests

In [None]:
#-------------------------------------------------------------------------------
#    Generate 3-D Surface/Contour map of selected Tests
#-------------------------------------------------------------------------------     
test = 'PIN2_LEAKAGE'
units = get_limit_item(df_limits, test, 'UNITS')
z_data = df_wafer.select(pl.col(['X_COORD','Y_COORD',test]))
z_unstacked = (
    z_data
    .pivot(
        on ='Y_COORD',
        index='X_COORD'
    )
    .to_pandas()
    .set_index('X_COORD', drop=True)
)
z_unstacked_columns = z_unstacked.columns
for c in z_unstacked_columns:
    if c != 'X_COORD':
        z_unstacked = z_unstacked.rename(columns = {c: c.zfill(2)})
data_columns = sorted([c for c in sorted(z_unstacked.columns) if c!='X_COORD'])
z_unstacked = z_unstacked[data_columns]

z_unstacked_columns = data_columns
z_unstacked = z_unstacked[z_unstacked_columns]
z_unstacked
 
fig = go.Figure(
    data=[
        go.Surface(
            x=z_unstacked.index,
            y=data_columns,
            z=z_unstacked
            )
        ]
)
fig.update_traces(
    contours_z=dict(
        show=True,
        usecolormap=True,
        highlightcolor="limegreen", 
        project_z=True),
    )
fig.update_layout(
    title = (
        'Contour/Surface Map  --  SIMULATED DATA' + 
        f'<br><sup>{test} [ {units} ]'    
    ),
    scene_camera_eye=dict(x=1.87, y=0.88, z=-0.64),
    width=800, height=800,
    margin=dict(l=10, r=10, b=30, t=30),
    template='simple_white',
    scene = dict(
        xaxis = dict(
                backgroundcolor="rgb(200, 200, 230)",
                gridcolor="white",
                showbackground=True,
                zerolinecolor="white",),
        yaxis = dict(
            backgroundcolor="rgb(230, 200,230)",
            gridcolor="white",
            showbackground=True,
            zerolinecolor="white"),
        zaxis = dict(
            backgroundcolor="rgb(230, 230,200)",
            gridcolor="white",
            showbackground=True,
            zerolinecolor="white",),
        ),
)
fig.update_layout(scene = dict(
                xaxis_title='X_COORD',
                yaxis_title='Y_COORD',
                zaxis_title= test + ' ['+ units + ']'),)
fig.write_html(f'./HTML/Surface_Contour_Map_{test}.html')
fig.show()