### Check your current path


In [1]:
pwd

'c:\\Users\\Dell\\Documents\\reduce_redundant'

In [1]:
import os
import pandas as pd

# Specify the directory path
directory = 'path/to/your/folder'
directory = 'c:\\Users\\Dell\\Documents\\reduce_redundant\\log_file'

# Get the list of all files
file_list = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]

print('Check if below file match expectation')
print(file_list)

Check if below file match expectation
['mock1.txt', 'mock2.txt', 'mock3.txt']


In [31]:
def get_decode_raw_file(file_list, col_header:str='LOT')->pd.DataFrame:
    merging_list = []
    for file_name in file_list:
        file_location = os.path.join(directory, file_name)
        # Open the file in read mode
        find_header = False
        content = []
        header = ''

        with open(file_location, 'r') as file:
            # Read the file line by line
            for line in file:
                if find_header:
                    content.append(line.strip().split())
                else:
                    if col_header in line.strip():
                        find_header = True
                        header = line.strip().split()
            if len(header)!=len(content[0]):
                raise ValueError('Length not match!')
        df = pd.DataFrame(content, columns=header)
        merging_list.append(df)
    merged_df = pd.concat(merging_list, ignore_index=True)
    return merged_df

column_to_keep = ['LOT','waferID','DieX','DieY']

def group_data(df:pd.DataFrame, fail_map:dict)->pd.DataFrame:
    print(f'You dataframe column had {list(df.columns)}')
    for new_col, cols_to_sum in fail_map.items():
        for single_col in cols_to_sum:
            if single_col not in list(df.columns):
                raise ValueError(f'Column name {single_col} not in column name')
        df[new_col] = df[cols_to_sum].astype(int).sum(axis=1)

    #kepp wanted column
    fin_col_to_keep = list(set(list(fail_map.keys()) + list(column_to_keep)))
    return df[fin_col_to_keep]

def pivot_wide_table_to_long(df:pd.DataFrame, fail_map:dict)->pd.DataFrame:
    # Group by DieY, waferID, LOT, DieX
    grouped_df = df.groupby(column_to_keep).first().reset_index()

    # Pivot TYPE1 and TYPE2 from wide table to long table
    long_df = pd.melt(grouped_df, id_vars=column_to_keep, value_vars=list(fail_map.keys()), var_name='FAIL_TYPE', value_name='VALUE')
    return long_df

def backfill_dummy(df:pd.DataFrame, fail_map:dict)->pd.DataFrame:
    # Define the range for DieX and DieY
    die_X_range = range(-10, 11)
    die_Y_range = range(-10, 11)
    
    #define columns to create dummy
    columns_to_group = ['DieX', 'DieY','waferID','FAIL_TYPE']
    column_to_keep = columns_to_group + ['VALUE']
    
    #change to int format
    df['DieX'] = df['DieX'].astype(int)
    df['DieY'] = df['DieY'].astype(int)
    
    #clean unwanted column
    df = df[column_to_keep]
    
    # Create a MultiIndex for all combinations of DieX and DieY
    index = pd.MultiIndex.from_product([die_X_range, die_Y_range,list(df['waferID'].unique()),list(fail_map.keys())], names=columns_to_group)

    df = df.drop_duplicates(subset=columns_to_group)
    df = df.set_index(columns_to_group).reindex(index).reset_index()

    # Fill missing LOT, waferID, FAIL_TYPE with dummy values and VALUE with 0
    df['VALUE'] = df['VALUE'].fillna(0)
    
    return df

def gen_1st_level_to_result_checking(df:pd.DataFrame):
    # Pivot the DataFrame
    pivot_df = df.pivot_table(index=['waferID', 'DieY', 'FAIL_TYPE'], columns='DieX', values='VALUE').reset_index()

    pivot_df.columns.name = None
    return pivot_df


In [3]:
first_level_df = get_decode_raw_file(file_list,'LOT')
first_level_df.head()

Unnamed: 0,LOT,waferID,DieX,DieY,FailBit,SB,HTB,VTB,BL,partialBL,SBL,BLK,cross,others
0,TKB,TKB.02,1,1,1,30000,5000,30,30,20,1,32,12,32
1,TKB,TKB.02,1,1,1,30000,5000,30,30,0,23,4,2,32
2,TKB,TKB.02,2,2,1,30000,5000,30,30,0,23,4,2,32
3,TKB,TKB.02,3,5,1,30000,5000,30,300,0,23,4,2,32
4,TKB,TKA.01,1,1,1,35000,5000,35,30,20,1,32,12,32


In [4]:
fail_type_map = {
    'TYPE1': ['SB', 'HTB'], 'TYPE2':['VTB','BL']
}

second_level_data = group_data(first_level_df, fail_type_map)
second_level_data.head()

You dataframe column had ['LOT', 'waferID', 'DieX', 'DieY', 'FailBit', 'SB', 'HTB', 'VTB', 'BL', 'partialBL', 'SBL', 'BLK', 'cross', 'others']


Unnamed: 0,DieX,DieY,LOT,waferID,TYPE2,TYPE1
0,1,1,TKB,TKB.02,60,35000
1,1,1,TKB,TKB.02,60,35000
2,2,2,TKB,TKB.02,60,35000
3,3,5,TKB,TKB.02,330,35000
4,1,1,TKB,TKA.01,65,40000


In [25]:
long_table_raw = pivot_wide_table_to_long(second_level_data, fail_type_map)
long_table = backfill_dummy(long_table_raw,fail_type_map)
long_table.head()

Unnamed: 0,DieX,DieY,waferID,FAIL_TYPE,VALUE
0,-10,-10,TKA.01,TYPE1,0.0
1,-10,-10,TKA.01,TYPE2,0.0
2,-10,-10,TKB,TYPE1,35000.0
3,-10,-10,TKB,TYPE2,60.0
4,-10,-10,TKB.02,TYPE1,0.0


In [75]:
result_check_1st_raw = gen_1st_level_to_result_checking(long_table)
result_check_1st_raw = result_check_1st_raw.sort_values(by=['FAIL_TYPE', 'DieY'])

In [76]:
result_check_1st_raw

Unnamed: 0,waferID,DieY,FAIL_TYPE,-10,-9,-8,-7,-6,-5,-4,...,1,2,3,4,5,6,7,8,9,10
0,TKA.01,-10,TYPE1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
42,TKB,-10,TYPE1,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,...,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0
84,TKB.02,-10,TYPE1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,TKA.01,-9,TYPE1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
44,TKB,-9,TYPE1,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,...,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0,35000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,TKB,9,TYPE2,60.0,60.0,60.0,60.0,60.0,60.0,60.0,...,60.0,60.0,330.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0
123,TKB.02,9,TYPE2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41,TKA.01,10,TYPE2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
83,TKB,10,TYPE2,60.0,60.0,60.0,60.0,60.0,60.0,60.0,...,60.0,60.0,330.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0


In [77]:
# Set DieY and FAIL_TYPE as index
df = result_check_1st_raw
df.set_index(['FAIL_TYPE','DieY'], inplace=True)

# Group by waferID and save to CSV
grouped_df = df.groupby('waferID')

# Create a new DataFrame to store the combined result
combined_df = pd.DataFrame()

# Iterate over each group and concatenate them into the combined DataFrame
for name, group in grouped_df:
    group = group.drop(columns='waferID')
    group.columns = pd.MultiIndex.from_product([[name], group.columns])
    combined_df = pd.concat([combined_df, group], axis=1)

combined_df = combined_df.reset_index()

In [81]:
combined_df.to_csv('grouped.csv', index=False)

In [13]:
# Rearrange columns
# existing_cols = pivot_df.columns.tolist()
# cols_to_rearrange = list(map(str, range(-10, 11)))
# cols = ['LOT', 'waferID', 'DieY', 'FAIL_TYPE'] + [col for col in cols_to_rearrange if col in existing_cols]
# pivot_df = pivot_df[cols]

In [29]:
pivot_df.to_csv('raw_check.csv', index=False)

In [32]:
import pandas as pd
import numpy as np

# Define the range for DieX and DieY
die_range = range(-10, 11)  # From -10 to 10 including 0

# Create a list of dictionaries with the mock data
data = []
for die_x in die_range:
    for die_y in die_range:
        data.append({
            'LOT': 'TKB',
            'waferID': 'TKB',
            'DieX': die_x,
            'DieY': die_y,
            'FailBit': 1,
            'SB': 30000,
            'HTB': 5000,
            'VTB': 30 if die_x != 3 else 300,  # Example variation
            'BL': 30,
            'partialBL': np.random.choice([0, 20]),  # Randomly choose between 0 and 20
            'SBL': np.random.choice([1, 23]),       # Randomly choose between 1 and 23
            'BLK': np.random.choice([4, 32]),       # Randomly choose between 4 and 32
            'cross': np.random.choice([2, 12]),     # Randomly choose between 2 and 12
            'others': 32
        })

# Create the DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df.to_csv('mock3.csv', index=False)
