## Specify Inputs

In [85]:
#Define Batches
batches = "UCB003A[K-N]"#|UCB003AE-US00-24|UCB003AF-US00-24|UCB003AF-US00-26|UCB003AG-US00-02|UCB003AG-US00-15|UCB003AG-US00-34|UCB003AG-US00-35|"
#batches = "UCB004AA"

# Define Layers
Layers = 3  # Number of samples in each group

# Specify any cracked cells
CrackedCells = [
    "UCB003AL-US00-23", "UCB003AL-US00-40", "UCB003AN-US00-19"
]

MissingCells = ["UCB003AL-US00-77", "UCB003AL-US00-63", "UCB003AL-US00-26"]

#Show only yielded cells
ShowOnlyYield = "yes" #"yes" only shows cells that yielded according to datahub. Change this variable to show "non-yielded" cells 

## Import Modules and Functions

In [86]:
#import modules used in this script
import pandas as pd
import numpy as np
import genealogy_v2
import query_tray_samples_V4 as query_tray_samples
import unit_cell_electrical_yield_and_metrics_v2 as uceym
import warnings
from datetime import datetime
from qsdc.client import Client
#assert version("qs-data-client") >= "0.2.15", "qsdc version must be greater than 0.2.15"

#Remove warning messages
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)
#establish clients 
qs_client = Client()
conn = qs_client.get_mysql_engine()

# !pip install --upgrade qs-data-client  ## RUN THIS IF YOU NEED TO UPDATE qs-data-client
#Run in terminal if not already logged in:     gcloud auth application-default login

## Pull Cell Data From Datahub

The two datasets that we care about are:
* `MFG-60L-UC-CTQ`. This should Critical to Quality metrics per the <u>[specs](https://qs-cloud.atlassian.net/wiki/spaces/HAB/pages/1782611969/Specifications+A1+Cell+Tiering+-+Post+UCT)</u> provided by Jordi. It contains columns `disposition` and `failure_modes` which should be pretty self-explanatory.
* `MFG-60L-UC-MR`. This table should contain the same number of rows (one per cell) as the CTQ table, but the `disposition_mr` and `failure_modes_mr` columns will be replaced with values corresponding to the manual review data. If no manual review was done, the standard metro `diposition` and `failure_modes` will be reflected.This makes it much easier to do 1:1 comparisons across the datasets. We can adjust to include only the manually reviewed cells if that's the preference.


In [87]:
## Query Data from Datahub

#Pull cell metrology data from datahub, both standard/auto metrology and manual review
dfctq = qs_client.data_hub.get_dataset(dataset = 'MFG-60L-UC-CTQ') ## standard metro review of unit cells
dfmr = qs_client.data_hub.get_dataset(dataset = 'MFG-60L-UC-MR') ## manual review of unit cells
dfc = qs_client.data_hub.get_dataset(dataset = 'MFG-60L-UNIT-CELL-TEST-CYCLE') ##electrical test data of unit cells
dmlg = qs_client.data_hub.get_dataset(dataset = 'MFG-80L-6L-PRODUCTION') ##multilayer info (ML6_id)
#dfc = dfc[dfc['US_id'].str.contains(batches, na=False)]
cols = [
    "US_id",
    "TestCycleStart_datetime_first",
    "TestCycleStart_datetime",
    "UCT_Version",
    "idtest_recipe",
    "RunIndex",
    "CycleIndex",
    "dvdt",
    "recipe_dvdt_range",
    "CeilingHoldTime",
    "CE",
    "CapacityChargeFraction",
    "CeilingRestVoltage",
    "AMSDcCapacity",
    "CycleFailure",
    "AnyFailure",
    "MedDcASR",
    "DischargeCapacity"
]
dfc = dfc[dfc['US_id'].str.contains(batches, na=False)].sort_values(["RunIndex","CycleIndex"])[cols]

# Keep cells from batches of interest and remove those that failed screening
filtered_dfctq = dfctq[dfctq['US_id'].str.contains(batches, na=False)] #keep cells that are from the batches of interest
filtered_dfmr = dfmr[dfmr['US_id'].str.contains(batches, na=False)] #keep only cells that are from the batches of interest

if ShowOnlyYield == "yes":
    yielded_dfctq = filtered_dfctq[filtered_dfctq['unit_cell_test_yield'] == 1] #keep cells that yielded
    yielded_dfmr = filtered_dfmr[filtered_dfmr['unit_cell_test_yield'] == 1] #keep cells that yielded

  return pd.read_csv(bytesIO(blob_bytes))


## Generate Cell Status/Tier Spreadsheet

### Pull Automated and Manual Review Data into one dataframe

In [88]:
## Overwrite automated results with manual review results
# First, merge the DataFrames on 'US_id' to align rows
merged_df = yielded_dfctq.merge(yielded_dfmr[['US_id', 'edge_thickness_tier_us_mr', 'A1_anode_tier_top_us_mr', 'A1_anode_tier_bottom_us_mr',
                                              'cathode_alignment_custom_model_tier_us_mr', 'median_contour_catholyte_pct_us_mr', 'max_f2f_distance_us','disposition_mr', 'failure_modes_mr']], on='US_id', how='left')
# Then, overwrite 'edge_thickness_tier_us' in 'filtered_dfctq' where 'edge_thickness_tier_us_mr' has a value
merged_df['edge_thickness_tier_us'] = merged_df['edge_thickness_tier_us_mr'].combine_first(merged_df['edge_thickness_tier_us'])
# Then, overwrite 'A1_anode_tier_top_us' in 'filtered_dfctq' where 'A1_anode_tier_top_us_mr' has a value
merged_df['A1_anode_tier_top_us'] = merged_df['A1_anode_tier_top_us_mr'].combine_first(merged_df['A1_anode_tier_top_us'])
# Then, overwrite 'A1_anode_tier_bottom_us' in 'filtered_dfctq' where 'A1_anode_tier_bottom_us_mr' has a value
merged_df['A1_anode_tier_bottom_us'] = merged_df['A1_anode_tier_bottom_us_mr'].combine_first(merged_df['A1_anode_tier_bottom_us'])
# Then, overwrite 'A1_anode_tier_bottom_us' in 'filtered_dfctq' where 'A1_anode_tier_bottom_us_mr' has a value
merged_df['cathode_alignment_custom_model_tier_us'] = merged_df['cathode_alignment_custom_model_tier_us_mr'].combine_first(merged_df['cathode_alignment_custom_model_tier_us'])
#merged_df.loc[merged_df['cathode_alignment_custom_model_tier_us_mr'].notna(), 'cathode_alignment_custom_model_tier_us'] = \
    #merged_df['cathode_alignment_custom_model_tier_us_mr']

# Then, overwrite 'A1_anode_tier_bottom_us' in 'filtered_dfctq' where 'A1_anode_tier_bottom_us_mr' has a value
merged_df['median_contour_catholyte_pct_us'] = merged_df['median_contour_catholyte_pct_us_mr'].combine_first(merged_df['median_contour_catholyte_pct_us'])
# Then, overwrite 'disposition_us' in 'disposition_mr' has a value
merged_df['disposition'] = merged_df['disposition_mr'].combine_first(merged_df['disposition'])
# Then, overwrite 'disposition_us' in 'disposition_mr' has a value
merged_df['failure_modes'] = merged_df['failure_modes_mr'].combine_first(merged_df['failure_modes'])
# Drop the 'edge_thickness_tier_us_mr' column if you don't need it
dfctq_updated = merged_df.drop(columns=['edge_thickness_tier_us_mr', 'A1_anode_tier_top_us_mr', 'A1_anode_tier_bottom_us_mr',
                                              'cathode_alignment_custom_model_tier_us_mr', 'median_contour_catholyte_pct_us_mr', 'disposition_mr','failure_modes_mr' ])


#Update Final Tier of Cells
conditions = [
    dfctq_updated['disposition'] == 'Tier 1',
    dfctq_updated['disposition'] == 'Tier 2',
    dfctq_updated['disposition'] == 'Fail',
    dfctq_updated['disposition'] == 'Scrap',
    dfctq_updated['disposition'] == 'Missing Data',
]
choices = ['1', '2', '3','Scrapped', 'TBD']
dfctq_updated['Tier'] = np.select(conditions, choices)

### Assign Tiering Metrics, Cell Status, Electrical Metrics, and Tray Location

In [101]:
### Fill in tiering metrics based on data pulled from datahub

## Rename Spreadsheet and sample/batch columns
CellTiering = dfctq_updated[['US_id']].rename(columns={'US_id': 'Cell ID'}) 
CellTiering['Batch'] = dfctq_updated['US_process_flow'] #Create Tiering Spreadsheet
# Create columns for final spreadsheet
new_columns = ["Cell Status", "Cell Tier", "Edge Wetting", "Thickness", "Alignment", "Anode","Film-to-Film Distance"]
for col in new_columns:
    CellTiering[col] = np.nan


##Update Cell Tiering metrics in final spreadsheet
# Merge the dfctq_updated with the CellTiering
merged_df = CellTiering.merge(
    dfctq_updated[['US_id', 'Tier', 'median_contour_catholyte_pct_us', 'edge_thickness_tier_us', 'center_normalized_0_5mm_eroded_rect_outside_median_us', 
                   'cathode_alignment_custom_model_tier_us', 'A1_anode_tier_top_us','A1_anode_tier_bottom_us', 'max_f2f_distance_us']],
    left_on='Cell ID',
    right_on='US_id',
    how='left'
)
# Update final 'Cell Tier' columns with data from dfctq_updated
merged_df['Cell Tier'] = merged_df['Tier']
#Update 'Edge Wetting' column
EWconditions = [
    merged_df['median_contour_catholyte_pct_us'] < 80,
    (merged_df['median_contour_catholyte_pct_us'] >= 80) & (merged_df['median_contour_catholyte_pct_us'] <= 98),
    merged_df['median_contour_catholyte_pct_us'] > 98
]
EWchoices = [3, 2, 1]
merged_df['Edge Wetting'] = np.select(EWconditions, EWchoices)
#Update 'Thickness' Column
merged_df['Thickness'] = merged_df['edge_thickness_tier_us']
#Update Alignment' Column
merged_df['Alignment'] = merged_df['cathode_alignment_custom_model_tier_us']
#Update 'Anode' Column
merged_df['Anode'] = np.maximum(merged_df['A1_anode_tier_top_us'], merged_df['A1_anode_tier_bottom_us'])
#Update 'Film-to-Film Distance Column
#merged_df['Film-to-Film Distance'] = 'Low'  # Default value
merged_df.loc[merged_df['max_f2f_distance_us'] > 0.40, 'Film-to-Film Distance'] = 'High'
# Drop the extra columns from dfctq_updated
CellTiering = merged_df.drop(columns=['US_id', 'Tier', 'edge_thickness_tier_us','cathode_alignment_custom_model_tier_us',
                                      'A1_anode_tier_top_us','A1_anode_tier_bottom_us','max_f2f_distance_us'])

## Asign Final Tier to every cell
# Select columns that we are considering for tiering
columns_to_consider = [
    'Alignment',
    'Anode',
    'Thickness', 
    'Edge Wetting'
]
CellTiering['Alignment'] = CellTiering['Alignment'].fillna(0)  # Replace NaN with 0 (or another placeholder)
CellTiering['Alignment'] = CellTiering['Alignment'].astype(int) #convert to integer
CellTiering['Anode'] = CellTiering['Anode'].astype(int) #convert to integer
CellTiering['Thickness'] = CellTiering['Thickness'].astype(int) #convert to integer
CellTiering['Edge Wetting'] = CellTiering['Edge Wetting'].astype(int) #convert to integer
#Fill out Cell Tier
CellTiering['Cell Tier'] = CellTiering[columns_to_consider].max(axis=1) #Update cell tier based on avaialable tiering metrics
missing_or_zero = (CellTiering[columns_to_consider].isnull() | (CellTiering[columns_to_consider] == 0)).any(axis=1) #Identify cells that are missing data
CellTiering.loc[missing_or_zero, 'Cell Tier'] = 0 # Update 'Cell Status' for these rows


##Update 'Cell Status' if yielded cell is on already in a multilayer pouch, has high F2F Misalignment, or is Cracked/Scrapped
#Update Cell if it is already in ML
gen = genealogy_v2.get_genealogy_2L('APD|ML|UC|QSC', conn) # Query dataframe from database
us_to_ps_mapping = dict(zip(gen['2L_cell_id'], gen['6L_cell_id'])) # Map the 'US_id' to '6L_cell_id' using '2L_cell_id' in the 'gen' dataframe
CellTiering['Cell Status'] = CellTiering['Cell ID'].map(us_to_ps_mapping)
#Update Cell if it has high F2F Misalignment
CellTiering.loc[CellTiering['Film-to-Film Distance'] == 'High', 'Cell Status'] = 'High F2F Alignment' 
#Update Cell if it is cracked
CellTiering.loc[CellTiering['Cell ID'].isin(CrackedCells), 'Cell Status'] = 'Cracked/Scrapped'
#Update Cell if it is missing
CellTiering.loc[CellTiering['Cell ID'].isin(MissingCells), 'Cell Status'] = 'Missing/Lost'
# Identify rows where any of the specified columns are missing or contain 0
columns_to_check = ['Edge Wetting', 'Thickness', 'Alignment', 'Anode']
missing_or_zero = (CellTiering[columns_to_check].isnull() | (CellTiering[columns_to_check] == 0)).any(axis=1)
CellTiering.loc[missing_or_zero, 'Cell Status'] = 'Missing Tiering Metrics' # Update 'Cell Status' for these rows
#Fill in default status of 'Waiting'
CellTiering['Cell Status'] = CellTiering['Cell Status'].fillna('Waiting')

## Update cell electrical metrics, currently using python scripts to query but will need to transition to datahub using dataframe dfc
df_electrical_yield_metrics = uceym.get_electrical_yield_and_metrics(CellTiering["Cell ID"])
df_electrical_yield_metrics = df_electrical_yield_metrics[['US_id','MedDischargeASR_1C', 'DischargeCapacity_Co3', 'dVdt_delta_fastcharge']]
df_electrical_yield_metrics = df_electrical_yield_metrics.rename(columns={'US_id': 'Cell ID'}) #Create Tiering Spreadsheet
CellTiering = pd.merge(CellTiering, df_electrical_yield_metrics, left_on='Cell ID', right_on='Cell ID', how='left')


## Update tray location ##
sample_names_group = CellTiering["Cell ID"]
df_tray = query_tray_samples.get_sample_tray(sample_names_group)
CellTiering = pd.merge(CellTiering, df_tray[['sample_name', 'tray_id', 'row_index', 'col_index', 'modified']], left_on='Cell ID', right_on='sample_name', how='left')
CellTiering = CellTiering.drop(columns=['sample_name', 'modified'])

#Update if yielded cell is on 2L Reliability
#data = data[["Cell ID","Reliability Test Count"]]
# Merge the two dataframes on the 'Cell ID' column
#merged_df = pd.merge(CellTiering, data, on='Cell ID', how='right')
# Update 'Cell Status' where 'Reliability Test Count' is 1 in merged dataframe
#merged_df.loc[merged_df['Reliability Test Count'] == 1, 'Cell Status'] = '2L Reliability'
# Drop the added columns to keep only the original columns from YieldedCells
#YieldedCells = merged_df.drop(columns=['Reliability Test Count'])



### Save spreadsheet showing 'Cell Status' and 'Tier' for all cells in the batches

In [102]:
##save Tiering spreadsheet
current_date = datetime.now().strftime('%Y-%m-%d')
output_name = f'{current_date}_{batches}_Tier.xlsx'
CellTiers = CellTiering.copy()
CellTiers = CellTiers.drop(columns=['median_contour_catholyte_pct_us', 'center_normalized_0_5mm_eroded_rect_outside_median_us']) #drop rows that we don't need to show in the spreadsheet
CellTiers['Cell Tier'] = pd.to_numeric(CellTiers['Cell Tier'], errors='coerce')
CellTiers = CellTiers.sort_values(by=['Cell Tier', 'MedDischargeASR_1C'], ascending=[True, True])
CellTiers.to_excel(output_name, index=False)

## Group Eligible Cells

### Filter cells that are eligible for grouping

In [110]:
## Format and prepare dataframes for TOPSIS Rank and Grouping

#Rename 
TieredCells = CellTiering

#Keep Cells that have not been grouped
TieredCells = TieredCells[TieredCells["Cell Status"] == "Waiting"]

#Keep Tier 1 Cells to commence pairing of Tier 1 groups
TieredOneCells = TieredCells[TieredCells['Cell Tier'] == 1].copy()
TieredTwoCells = TieredCells[TieredCells['Cell Tier'] == 2].copy()
TieredThreeCells = TieredCells[TieredCells['Cell Tier'] == 3].copy()

# Remove duplicate rows across all columns
TieredOneCells = TieredOneCells.drop_duplicates()
TieredTwoCells = TieredTwoCells.drop_duplicates()
TieredThreeCells = TieredThreeCells.drop_duplicates()

### Tier 1: Calculate Cell Score and Ranking using TOPSIS Model

In [127]:
##Calculate Cell Score and Ranking for Tier 1 Cells using TOPSIS method
# Define the criteria and their corresponding optimization directions
criteria = ['median_contour_catholyte_pct_us', 'center_normalized_0_5mm_eroded_rect_outside_median_us', 'MedDischargeASR_1C', 'DischargeCapacity_Co3'] 
weights = [0.15, 0.25, 0.35, 0.25]  # Equal weights (adjust if needed)
optimization_directions = ['High', 'Proximity', 'Low', 'High']  # 'Proximity' for 'Thickness'
target_criteria = ['center_normalized_0_5mm_eroded_rect_outside_median_us']  # Criteria optimized for proximity
targets = [1.0]  # Target value for 'Thickness'


def topsis_with_target_Tier(df, criteria, weights, optimization_directions, target_criteria=None, targets=None):
    
    # Ensure numeric data for criteria
    df[criteria] = df[criteria].apply(pd.to_numeric, errors='coerce')

    normalized_matrix = df[criteria].copy()
    
    # Step 1: Normalize the Decision Matrix
    for col in criteria:
        if target_criteria and col in target_criteria:
            # Calculate absolute deviation from target
            target = targets[target_criteria.index(col)]
            normalized_matrix[col] = np.abs(df[col] - target)  # Deviation
        else:
            # Standard normalization
            normalized_matrix[col] = normalized_matrix[col] / np.sqrt((normalized_matrix[col]**2).sum())
    
    # Step 2: Weight the Criteria
    weighted_matrix = normalized_matrix * weights
    
    # Step 3: Identify Ideal and Negative Ideal Solutions
    ideal_solution = []
    negative_ideal_solution = []
    for col, direction in zip(criteria, optimization_directions):
        if target_criteria and col in target_criteria:
            # For deviation-based criteria, minimize the deviation
            ideal_solution.append(weighted_matrix[col].min())
            negative_ideal_solution.append(weighted_matrix[col].max())
        else:
            if direction == 'High':
                ideal_solution.append(weighted_matrix[col].max())
                negative_ideal_solution.append(weighted_matrix[col].min())
            elif direction == 'Low':
                ideal_solution.append(weighted_matrix[col].min())
                negative_ideal_solution.append(weighted_matrix[col].max())
    
    ideal_solution = np.array(ideal_solution)
    ideal_solution = np.array([
        weighted_matrix.iloc[:, idx].max() if direction == "maximize" else weighted_matrix.iloc[:, idx].min()
        for idx, direction in enumerate(optimization_directions)
    ])

    negative_ideal_solution = np.array(negative_ideal_solution)
    negative_ideal_solution = np.array([
        weighted_matrix.iloc[:, idx].min() if direction == "maximize" else weighted_matrix.iloc[:, idx].max()
        for idx, direction in enumerate(optimization_directions)
    ])
    
    # Step 4: Calculate Distances
    distances_to_ideal = np.sqrt(((weighted_matrix - ideal_solution)**2).sum(axis=1))
    distances_to_negative = np.sqrt(((weighted_matrix - negative_ideal_solution)**2).sum(axis=1))
    
    # Step 5: Calculate TOPSIS Score
    topsis_score = distances_to_negative / (distances_to_ideal + distances_to_negative)
    return topsis_score

# Calculate TOPSIS scores
TieredOneCells['total_rank_score'] = topsis_with_target_Tier(TieredOneCells, criteria, weights, optimization_directions, target_criteria, targets)
# Rank alternatives by TOPSIS score (higher score is better)
TieredOneCells['Rank'] = TieredOneCells['total_rank_score'].rank(ascending=False)
# Arrange Cells by best ranking at the top
TieredOneCells.sort_values(by='Rank', ascending=True, inplace=True)


### Tier 1: Group Cells using ASR Criteria

In [128]:
df = TieredOneCells
df = df.reset_index(drop=True)
df['Rank'] = (df.index + 1).astype(int)  # Add 1-based rank as integers
remainder = df.copy()  # Start with a copy of the input DataFrame
retries = 0  # Track retries for reprocessing the remainder

n = Layers

#Initialize variable and dataframe 
groupnumber = 1  # Initialize group number
df_final = pd.DataFrame()  # Initialize an empty DataFrame for the final result
grouped_list = [] # List to collect all the groups for final concatenation
cellstart = 0

# Keep track of processed indices
processed_indices = set()

while len(remainder) >= n + cellstart:
    # Find the next candidate group by slicing remainder
    candidate_group = remainder.iloc[cellstart:cellstart+n].copy()
    print(f"Processing group starting at rank {candidate_group['Rank'].tolist()}: {candidate_group['Cell ID'].tolist()}")

    # Calculate criteria for the group
    asr_range = candidate_group['MedDischargeASR_1C'].max() - candidate_group['MedDischargeASR_1C'].min()
    asr_mean_min = candidate_group['MedDischargeASR_1C'].mean() - candidate_group['MedDischargeASR_1C'].min()

    print(f"ASR range: {asr_range}, ASR mean-min: {asr_mean_min}")

    # Check if group satisfies criteria and replace cells if necessary
    if asr_range >= 2.0:
        # Replace the highest ASR cell
        max_idx = candidate_group['MedDischargeASR_1C'].idxmax()
        problem_cell_id = candidate_group.loc[max_idx, 'Cell ID']
        print(f"ASR range is too high at {asr_range}, replacing problem cell: {problem_cell_id}")
        # Replace the highest ASR cell here (you can replace it based on your logic)
        i_range = 0

        while asr_range >=2.0:
            if n + i_range >= len(remainder):  # Check if the index is valid
                print(f"No more rows in remainder to replace {problem_cell_id}.")
                cellstart = cellstart +1
                break
            candidate_group.loc[max_idx] = remainder.iloc[n + i_range] # Replace the highest ASR cell
            replacement_cell_id = remainder.iloc[n + i_range]['Cell ID']  # Cell ID of the replacement cell
            print(f"Replacing {problem_cell_id} with {replacement_cell_id}")

            # Check for duplicates in 'Cell ID' within the candidate group
            duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
            while duplicate_count > 0:
                print(f"Found {duplicate_count} duplicate(s) of 'Cell ID' in candidate_group. Incrementing i_range.")
                i_range += 1  # Increment if there are duplicates
                if n + i_range < len(remainder):
                    candidate_group.loc[max_idx] = remainder.iloc[n + i_range]
                    replacement_cell_id = remainder.iloc[n + i_range]['Cell ID']  # Replacement cell ID
                    print(f"Replacing {problem_cell_id} with {replacement_cell_id}")
                    duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
                else:
                    break
            asr_range = candidate_group['MedDischargeASR_1C'].max() - candidate_group['MedDischargeASR_1C'].min()
            asr_mean_min = candidate_group['MedDischargeASR_1C'].mean() - candidate_group['MedDischargeASR_1C'].min()
            print(f"ASR range: {asr_range}, ASR mean-min: {asr_mean_min}")
            i_range = i_range+1

    if asr_mean_min >= 0.5:
        # Replace the lowest ASR cell
        min_idx = candidate_group['MedDischargeASR_1C'].idxmin()
        problem_cell_id = candidate_group.loc[min_idx, 'Cell ID']
        print(f"Minimum ASR is too low at difference of {asr_mean_min}, replacing problem cell: {problem_cell_id}")
        # Replace the lowest ASR cell here (you can replace it based on your logic)
        i_mean_min = 0

        while asr_mean_min >= 0.5:
            if n + i_mean_min >= len(remainder):  # Check if the index is valid
                print(f"No more rows in remainder to replace {problem_cell_id}.")
                cellstart = cellstart +1
                break
            candidate_group.loc[min_idx] = remainder.iloc[n + i_mean_min]
            replacement_cell_id = remainder.iloc[n + i_mean_min]['Cell ID']  # Replacement cell ID
            print(f"Replacing {problem_cell_id} with {replacement_cell_id}")

            # Check for duplicates in 'Cell ID' within the candidate group
            duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
            while duplicate_count > 0:
                print(f"Found {duplicate_count} duplicate(s) of 'Cell ID' in candidate_group. Incrementing i_mean_min.")
                i_mean_min += 1  # Increment if there are duplicates
                if n + i_mean_min < len(remainder):
                    candidate_group.loc[min_idx] = remainder.iloc[n + i_mean_min]
                    replacement_cell_id = remainder.iloc[n + i_mean_min]['Cell ID']  # Replacement cell ID
                    print(f"Replacing {problem_cell_id} with {replacement_cell_id}")
                    duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
                else:
                    break
            
            asr_range = candidate_group['MedDischargeASR_1C'].max() - candidate_group['MedDischargeASR_1C'].min()
            asr_mean_min = candidate_group['MedDischargeASR_1C'].mean() - candidate_group['MedDischargeASR_1C'].min()
            print(f"ASR range: {asr_range}, ASR mean-min: {asr_mean_min}")
            i_mean_min = i_mean_min+1


    if asr_range < 2.0 and asr_mean_min < 0.5:
        print(f"Established group starting at rank {candidate_group['Rank'].tolist()}: {candidate_group['Cell ID'].tolist()}")
        # Assign the group number
        candidate_group['Group Number'] = groupnumber
        # Add the rows to the final list
        grouped_list.append(candidate_group)
        # Drop rows in "remainder" where "Cell ID" matches any in "candidate_group"
        remainder = remainder[~remainder['Cell ID'].isin(candidate_group['Cell ID'])]
        # Update processed indices
        processed_indices.update(candidate_group.index)
        # Update group number for the next group
        groupnumber += 1
        # Print status
        print(f"Rows in remainder after processing: {len(remainder)}")


# After loop, concatenate all the groups into a single DataFrame
# Check if grouped_list contains valid DataFrames
if not grouped_list:
    print("No valid groups were formed. Creating an empty DataFrame.")
    df_final = pd.DataFrame()  # Handle case where no groups are created
else:
    df_final = pd.concat(grouped_list, ignore_index=True)
# Add 'Remainder' label to remaining rows, if applicable
if not remainder.empty:
    remainder['Group Number'] = 'Remainder'
    df_final = pd.concat([df_final, remainder], ignore_index=True)


if not df_final.empty:
    # Reset the index
    df_final = df_final.reset_index(drop=True)    
    # Drop unnecessary columns
    columns_to_drop = ['median_contour_catholyte_pct_us', 'center_normalized_0_5mm_eroded_rect_outside_median_us', 'total_rank_score']
    df_final = df_final.drop(columns=columns_to_drop, errors='ignore')  # Use 'errors="ignore"' to avoid errors if columns are missing
    # Move 'Group Number' to the first column
    df_final = df_final[['Group Number'] + [col for col in df_final.columns if col != 'Group Number']]


# Format final DataFrame
#df_final = df_final.reset_index(drop=True)
#df_final = df_final.drop(columns=['median_contour_catholyte_pct_us', 'center_normalized_0_5mm_eroded_rect_outside_median_us']) #drop rows that we don't need to show in the spreadsheet
#df_final = df_final[['Group Number'] + [col for col in df_final.columns if col != 'Group Number']] #move 'Group Number' to first column

# Print the final DataFrame
print(f"Final Tier 1 DataFrame has {df_final.shape[0]} rows.")

# Save the ranked cells to an Excel file
current_date = datetime.now().strftime('%Y-%m-%d')
output_name = f'{current_date}_{batches}_Tier_1_Grouped.xlsx'
df_final.to_excel(output_name, index=False)

Processing group starting at rank [1, 2, 3]: ['UCB003AN-US01-11', 'UCB003AM-US00-35', 'UCB003AN-US01-31']
ASR range: 0.19709999999999894, ASR mean-min: 0.0813666666666677
Established group starting at rank [1, 2, 3]: ['UCB003AN-US01-11', 'UCB003AM-US00-35', 'UCB003AN-US01-31']
Rows in remainder after processing: 3
Processing group starting at rank [4, 5, 6]: ['UCB003AN-US01-14', 'UCB003AL-US00-70', 'UCB003AM-US00-07']
ASR range: 3.3887, ASR mean-min: 1.9090333333333334
ASR range is too high at 3.3887, replacing problem cell: UCB003AM-US00-07
No more rows in remainder to replace UCB003AM-US00-07.
Minimum ASR is too low at difference of 1.9090333333333334, replacing problem cell: UCB003AN-US01-14
No more rows in remainder to replace UCB003AN-US01-14.
Final Tier 1 DataFrame has 6 rows.


### Tier 1: Arrange cells in each group by "Pyramid" order of ASR

In [136]:
# Function to rearrange rows in pyramid order within each group
def pyramid_order(group):
    # Sort values within the group
    sorted_group = group.sort_values(by='MedDischargeASR_1C', ascending=True).reset_index(drop=True)
    
    # Rearrange in pyramid order
    pyramid = []
    while len(sorted_group) > 0:
        if len(pyramid) % 2 == 0:  # Add largest remaining value to the middle
            pyramid.append(sorted_group.iloc[-1])
            sorted_group = sorted_group.iloc[:-1]
        else:  # Add smallest remaining value to the sides
            pyramid.insert(0, sorted_group.iloc[0])
            sorted_group = sorted_group.iloc[1:]
    
    # Reconstruct DataFrame
    pyramid_df = pd.DataFrame(pyramid).reset_index(drop=True)
    
    # Update Group Number to include position in group
    pyramid_df['Group Number'] = group['Group Number'].iloc[0]  # Keep the group number
    pyramid_df['Group Number'] = pyramid_df['Group Number'].astype(str) + "." + (pyramid_df.index + 1).astype(str)
    
    return pyramid_df

if not df_final.empty:
    # Apply the function to each group
    df_pyramid = df_final.groupby('Group Number', group_keys=False).apply(pyramid_order)

    # Reset the index if needed
    df_pyramid = df_pyramid.reset_index(drop=True)

    # Save the ranked cells to an Excel file
    current_date = datetime.now().strftime('%Y-%m-%d')
    output_name = f'{current_date}_{batches}_Tier_1_Grouped_Pyramid_Order.xlsx'
    df_pyramid.to_excel(output_name, index=False)
else:
    print("No Tier 1 dataframe to rearrange")


### Tier 2: Calculate Cell Score and Ranking using TOPSIS Model

In [130]:
# Define scoring rules as dictionaries
alignment_scores = {1: 0, 2: 0}  # 1->0, 2->0
anode_scores = {1: 0, 2: 0.39}   # 1->0, 2->0.39
thickness_scores = {1: 0, 2: 0}  # 1->0, 2->0
edge_wetting_scores = {1: 0, 2: 0.06}  # 1->0, 2->0.06

# Calculate individual scores for each column
TieredTwoCells['AlignmentScore'] = TieredTwoCells['Alignment'].map(alignment_scores)
TieredTwoCells['AnodeScore'] = TieredTwoCells['Anode'].map(anode_scores)
TieredTwoCells['ThicknessScore'] = TieredTwoCells['Thickness'].map(thickness_scores)
TieredTwoCells['EdgeWettingScore'] = TieredTwoCells['Edge Wetting'].map(edge_wetting_scores)

# Calculate the total score
TieredTwoCells['SumOfSpecPoints'] = (
    TieredTwoCells['AlignmentScore'] +
    TieredTwoCells['AnodeScore'] +
    TieredTwoCells['ThicknessScore'] +
    TieredTwoCells['EdgeWettingScore']
)

# Drop intermediate score columns (optional)
TieredTwoCells = TieredTwoCells.drop(
    ['AlignmentScore', 'AnodeScore', 'ThicknessScore', 'EdgeWettingScore'], axis=1
)


##Calculate Cell Score and Ranking for Tier 2 Cells using TOPSIS method
# Define the criteria and their corresponding optimization directions
criteria = ['SumOfSpecPoints', 'MedDischargeASR_1C', 'DischargeCapacity_Co3'] 
weights = [0.70, 0.20, 0.10]  # Equal weights (adjust if needed)
optimization_directions = ['Low', 'Low', 'High']  # 'Proximity' for 'Thickness'
# Calculate TOPSIS scores
TieredTwoCells['total_rank_score'] = topsis_with_target_Tier(TieredTwoCells, criteria, weights, optimization_directions, target_criteria, targets)

# Rank alternatives by TOPSIS score (higher score is better)
TieredTwoCells['Rank'] = TieredTwoCells['total_rank_score'].rank(ascending=False)
# Arrange Cells by best ranking at the top
TieredTwoCells.sort_values(by='Rank', ascending=True, inplace=True)


### Tier 2: Group Cells using ASR Criteria

In [131]:
df = TieredTwoCells
df = df.reset_index(drop=True)
df['Rank'] = (df.index + 1).astype(int)  # Add 1-based rank as integers
remainder = df.copy()  # Start with a copy of the input DataFrame
retries = 0  # Track retries for reprocessing the remainder

n = Layers

#Initialize variable and dataframe 
groupnumber = 1  # Initialize group number
df_final_Two = pd.DataFrame()  # Initialize an empty DataFrame for the final result
grouped_list = [] # List to collect all the groups for final concatenation
cellstart = 0

# Keep track of processed indices
processed_indices = set()

while len(remainder) >= n + cellstart:
    # Find the next candidate group by slicing remainder
    candidate_group = remainder.iloc[cellstart:cellstart+n].copy()
    print(f"Processing group starting at rank {candidate_group['Rank'].tolist()}: {candidate_group['Cell ID'].tolist()}")

    # Calculate criteria for the group
    asr_range = candidate_group['MedDischargeASR_1C'].max() - candidate_group['MedDischargeASR_1C'].min()
    asr_mean_min = candidate_group['MedDischargeASR_1C'].mean() - candidate_group['MedDischargeASR_1C'].min()

    print(f"ASR range: {asr_range}, ASR mean-min: {asr_mean_min}")

    # Check if group satisfies criteria and replace cells if necessary
    if asr_range >= 2.0:
        # Replace the highest ASR cell
        max_idx = candidate_group['MedDischargeASR_1C'].idxmax()
        problem_cell_id = candidate_group.loc[max_idx, 'Cell ID']
        print(f"ASR range is too high at {asr_range}, replacing problem cell: {problem_cell_id}")
        # Replace the highest ASR cell here (you can replace it based on your logic)
        i_range = 0

        while asr_range >=2.0:
            if n + i_range >= len(remainder):  # Check if the index is valid
                print(f"No more rows in remainder to replace {problem_cell_id}.")
                cellstart = cellstart +1
                break
            candidate_group.loc[max_idx] = remainder.iloc[n + i_range] # Replace the highest ASR cell
            replacement_cell_id = remainder.iloc[n + i_range]['Cell ID']  # Cell ID of the replacement cell
            print(f"Replacing {problem_cell_id} with {replacement_cell_id}")

            # Check for duplicates in 'Cell ID' within the candidate group
            duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
            while duplicate_count > 0:
                print(f"Found {duplicate_count} duplicate(s) of 'Cell ID' in candidate_group. Incrementing i_range.")
                i_range += 1  # Increment if there are duplicates
                if n + i_range < len(remainder):
                    candidate_group.loc[max_idx] = remainder.iloc[n + i_range]
                    replacement_cell_id = remainder.iloc[n + i_range]['Cell ID']  # Replacement cell ID
                    print(f"Replacing {problem_cell_id} with {replacement_cell_id}")
                    duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
                else:
                    break
            asr_range = candidate_group['MedDischargeASR_1C'].max() - candidate_group['MedDischargeASR_1C'].min()
            asr_mean_min = candidate_group['MedDischargeASR_1C'].mean() - candidate_group['MedDischargeASR_1C'].min()
            print(f"ASR range: {asr_range}, ASR mean-min: {asr_mean_min}")
            i_range = i_range+1

    if asr_mean_min >= 0.5:
        # Replace the lowest ASR cell
        min_idx = candidate_group['MedDischargeASR_1C'].idxmin()
        problem_cell_id = candidate_group.loc[min_idx, 'Cell ID']
        print(f"Minimum ASR is too low at difference of {asr_mean_min}, replacing problem cell: {problem_cell_id}")
        # Replace the lowest ASR cell here (you can replace it based on your logic)
        i_mean_min = 0

        while asr_mean_min >= 0.5:
            if n + i_mean_min >= len(remainder):  # Check if the index is valid
                print(f"No more rows in remainder to replace {problem_cell_id}.")
                cellstart = cellstart +1
                break
            candidate_group.loc[min_idx] = remainder.iloc[n + i_mean_min]
            replacement_cell_id = remainder.iloc[n + i_mean_min]['Cell ID']  # Replacement cell ID
            print(f"Replacing {problem_cell_id} with {replacement_cell_id}")

            # Check for duplicates in 'Cell ID' within the candidate group
            duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
            while duplicate_count > 0:
                print(f"Found {duplicate_count} duplicate(s) of 'Cell ID' in candidate_group. Incrementing i_mean_min.")
                i_mean_min += 1  # Increment if there are duplicates
                if n + i_mean_min < len(remainder):
                    candidate_group.loc[min_idx] = remainder.iloc[n + i_mean_min]
                    replacement_cell_id = remainder.iloc[n + i_mean_min]['Cell ID']  # Replacement cell ID
                    print(f"Replacing {problem_cell_id} with {replacement_cell_id}")
                    duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
                else:
                    break
            
            asr_range = candidate_group['MedDischargeASR_1C'].max() - candidate_group['MedDischargeASR_1C'].min()
            asr_mean_min = candidate_group['MedDischargeASR_1C'].mean() - candidate_group['MedDischargeASR_1C'].min()
            print(f"ASR range: {asr_range}, ASR mean-min: {asr_mean_min}")
            i_mean_min = i_mean_min+1


    if asr_range < 2.0 and asr_mean_min < 0.5:
        print(f"Established group starting at rank {candidate_group['Rank'].tolist()}: {candidate_group['Cell ID'].tolist()}")
        # Assign the group number
        candidate_group['Group Number'] = groupnumber
        # Add the rows to the final list
        grouped_list.append(candidate_group)
        # Drop rows in "remainder" where "Cell ID" matches any in "candidate_group"
        remainder = remainder[~remainder['Cell ID'].isin(candidate_group['Cell ID'])]
        # Update processed indices
        processed_indices.update(candidate_group.index)
        # Update group number for the next group
        groupnumber += 1
        # Print status
        print(f"Rows in remainder after processing: {len(remainder)}")


# After loop, concatenate all the groups into a single DataFrame
# Check if grouped_list contains valid DataFrames
if not grouped_list:
    print("No valid groups were formed. Creating an empty DataFrame.")
    df_final_Two = pd.DataFrame()  # Handle case where no groups are created
else:
    df_final_Two = pd.concat(grouped_list, ignore_index=True)
# Add 'Remainder' label to remaining rows, if applicable
if not remainder.empty:
    remainder['Group Number'] = 'Remainder'
    df_final_Two = pd.concat([df_final_Two, remainder], ignore_index=True)


if not df_final_Two.empty:
    # Reset the index
    df_final_Two = df_final_Two.reset_index(drop=True)    
    # Drop unnecessary columns
    columns_to_drop = ['median_contour_catholyte_pct_us', 'center_normalized_0_5mm_eroded_rect_outside_median_us', 'SumOfSpecPoints', 'total_rank_score']
    df_final_Two = df_final_Two.drop(columns=columns_to_drop, errors='ignore')  # Use 'errors="ignore"' to avoid errors if columns are missing
    # Move 'Group Number' to the first column
    df_final_Two = df_final_Two[['Group Number'] + [col for col in df_final_Two.columns if col != 'Group Number']]

# Print the final DataFrame
print(f"Final Tier 2 DataFrame has {df_final_Two.shape[0]} rows.")

# Save the ranked cells to an Excel file
current_date = datetime.now().strftime('%Y-%m-%d')
output_name = f'{current_date}_{batches}_Tier_2_Grouped.xlsx'
df_final_Two.to_excel(output_name, index=False)

Processing group starting at rank [1, 2, 3]: ['UCB003AK-US00-26', 'UCB003AM-US00-10', 'UCB003AN-US01-22']
ASR range: 4.271700000000003, ASR mean-min: 1.4684333333333335
ASR range is too high at 4.271700000000003, replacing problem cell: UCB003AM-US00-10
Replacing UCB003AM-US00-10 with UCB003AN-US01-08
ASR range: 0.2134999999999998, ASR mean-min: 0.11570000000000036
Established group starting at rank [1, 4, 3]: ['UCB003AK-US00-26', 'UCB003AN-US01-08', 'UCB003AN-US01-22']
Rows in remainder after processing: 2
Final Tier 2 DataFrame has 5 rows.


### Tier 2: Arrange cells in each group by "Pyramid" order of ASR

In [132]:
# Function to rearrange rows in pyramid order
if not df_final_Two.empty:
    # Apply the function to each group
    df_pyramid_Two = df_final_Two.groupby('Group Number', group_keys=False).apply(pyramid_order)

    # Reset the index if needed
    df_pyramid_Two = df_pyramid_Two.reset_index(drop=True)

    # Save the ranked cells to an Excel file
    current_date = datetime.now().strftime('%Y-%m-%d')
    output_name = f'{current_date}_{batches}_Tier_2_Grouped_Pyramid_Order.xlsx'
    df_pyramid_Two.to_excel(output_name, index=False)
else:
    print("No Tier 2 dataframe to rearrange")


### Tier 3: Calculate Cell Score and Ranking using TOPSIS Model

In [133]:
# Define scoring rules as dictionaries
alignment_scores = {1: 0, 2: 0, 3: 0.26}  # scores for Alignment
anode_scores = {1: 0, 2: 0.39, 3: 0.70}   # scores for Anode
thickness_scores = {1: 0, 2: 0, 3: 0.33}  # scores for Thickness
edge_wetting_scores = {1: 0, 2: 0.06, 3: 0.26}  # scores for EdgeWetting

# Calculate individual scores for each column
TieredThreeCells['AlignmentScore'] = TieredThreeCells['Alignment'].map(alignment_scores)
TieredThreeCells['AnodeScore'] = TieredThreeCells['Anode'].map(anode_scores)
TieredThreeCells['ThicknessScore'] = TieredThreeCells['Thickness'].map(thickness_scores)
TieredThreeCells['EdgeWettingScore'] = TieredThreeCells['Edge Wetting'].map(edge_wetting_scores)

# Calculate the total score
TieredThreeCells['SumOfSpecPoints'] = (
    TieredThreeCells['AlignmentScore'] +
    TieredThreeCells['AnodeScore'] +
    TieredThreeCells['ThicknessScore'] +
    TieredThreeCells['EdgeWettingScore']
)

# Drop intermediate score columns (optional)
TieredThreeCells = TieredThreeCells.drop(
    ['AlignmentScore', 'AnodeScore', 'ThicknessScore', 'EdgeWettingScore'], axis=1
)


##Calculate Cell Score and Ranking for Tier 2 Cells using TOPSIS method
# Define the criteria and their corresponding optimization directions
criteria = ['SumOfSpecPoints', 'MedDischargeASR_1C', 'DischargeCapacity_Co3'] 
weights = [0.70, 0.20, 0.10]  # Equal weights (adjust if needed)
optimization_directions = ['Low', 'Low', 'High']  # 'Proximity' for 'Thickness'
# Calculate TOPSIS scores
TieredThreeCells['total_rank_score'] = topsis_with_target_Tier(TieredThreeCells, criteria, weights, optimization_directions, target_criteria, targets)

# Rank alternatives by TOPSIS score (higher score is better)
TieredThreeCells['Rank'] = TieredThreeCells['total_rank_score'].rank(ascending=False)
# Arrange Cells by best ranking at the top
TieredThreeCells.sort_values(by='Rank', ascending=True, inplace=True)


### Tier 3: Group Cells using ASR Criteria

In [134]:
df = TieredThreeCells
df = df.reset_index(drop=True)
df['Rank'] = (df.index + 1).astype(int)  # Add 1-based rank as integers
remainder = df.copy()  # Start with a copy of the input DataFrame
retries = 0  # Track retries for reprocessing the remainder

n = Layers

#Initialize variable and dataframe 
groupnumber = 1  # Initialize group number
df_final_Three = pd.DataFrame()  # Initialize an empty DataFrame for the final result
grouped_list = [] # List to collect all the groups for final concatenation
cellstart = 0

# Keep track of processed indices
processed_indices = set()

while len(remainder) >= n + cellstart:
    # Find the next candidate group by slicing remainder
    candidate_group = remainder.iloc[cellstart:cellstart+n].copy()
    print(f"Processing group starting at rank {candidate_group['Rank'].tolist()}: {candidate_group['Cell ID'].tolist()}")

    # Calculate criteria for the group
    asr_range = candidate_group['MedDischargeASR_1C'].max() - candidate_group['MedDischargeASR_1C'].min()
    asr_mean_min = candidate_group['MedDischargeASR_1C'].mean() - candidate_group['MedDischargeASR_1C'].min()

    print(f"ASR range: {asr_range}, ASR mean-min: {asr_mean_min}")

    # Check if group satisfies criteria and replace cells if necessary
    if asr_range >= 2.0:
        # Replace the highest ASR cell
        max_idx = candidate_group['MedDischargeASR_1C'].idxmax()
        problem_cell_id = candidate_group.loc[max_idx, 'Cell ID']
        print(f"ASR range is too high at {asr_range}, replacing problem cell: {problem_cell_id}")
        # Replace the highest ASR cell here (you can replace it based on your logic)
        i_range = 0

        while asr_range >=2.0:
            if n + i_range >= len(remainder):  # Check if the index is valid
                print(f"No more rows in remainder to replace {problem_cell_id}.")
                cellstart = cellstart +1
                break
            candidate_group.loc[max_idx] = remainder.iloc[n + i_range] # Replace the highest ASR cell
            replacement_cell_id = remainder.iloc[n + i_range]['Cell ID']  # Cell ID of the replacement cell
            print(f"Replacing {problem_cell_id} with {replacement_cell_id}")

            # Check for duplicates in 'Cell ID' within the candidate group
            duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
            while duplicate_count > 0:
                print(f"Found {duplicate_count} duplicate(s) of 'Cell ID' in candidate_group. Incrementing i_range.")
                i_range += 1  # Increment if there are duplicates
                if n + i_range < len(remainder):
                    candidate_group.loc[max_idx] = remainder.iloc[n + i_range]
                    replacement_cell_id = remainder.iloc[n + i_range]['Cell ID']  # Replacement cell ID
                    print(f"Replacing {problem_cell_id} with {replacement_cell_id}")
                    duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
                else:
                    break
            asr_range = candidate_group['MedDischargeASR_1C'].max() - candidate_group['MedDischargeASR_1C'].min()
            asr_mean_min = candidate_group['MedDischargeASR_1C'].mean() - candidate_group['MedDischargeASR_1C'].min()
            print(f"ASR range: {asr_range}, ASR mean-min: {asr_mean_min}")
            i_range = i_range+1

    if asr_mean_min >= 0.5:
        # Replace the lowest ASR cell
        min_idx = candidate_group['MedDischargeASR_1C'].idxmin()
        problem_cell_id = candidate_group.loc[min_idx, 'Cell ID']
        print(f"Minimum ASR is too low at difference of {asr_mean_min}, replacing problem cell: {problem_cell_id}")
        # Replace the lowest ASR cell here (you can replace it based on your logic)
        i_mean_min = 0

        while asr_mean_min >= 0.5:
            if n + i_mean_min >= len(remainder):  # Check if the index is valid
                print(f"No more rows in remainder to replace {problem_cell_id}.")
                cellstart = cellstart +1
                break
            candidate_group.loc[min_idx] = remainder.iloc[n + i_mean_min]
            replacement_cell_id = remainder.iloc[n + i_mean_min]['Cell ID']  # Replacement cell ID
            print(f"Replacing {problem_cell_id} with {replacement_cell_id}")

            # Check for duplicates in 'Cell ID' within the candidate group
            duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
            while duplicate_count > 0:
                print(f"Found {duplicate_count} duplicate(s) of 'Cell ID' in candidate_group. Incrementing i_mean_min.")
                i_mean_min += 1  # Increment if there are duplicates
                if n + i_mean_min < len(remainder):
                    candidate_group.loc[min_idx] = remainder.iloc[n + i_mean_min]
                    replacement_cell_id = remainder.iloc[n + i_mean_min]['Cell ID']  # Replacement cell ID
                    print(f"Replacing {problem_cell_id} with {replacement_cell_id}")
                    duplicate_count = candidate_group['Cell ID'].duplicated(keep=False).sum()
                else:
                    break
            
            asr_range = candidate_group['MedDischargeASR_1C'].max() - candidate_group['MedDischargeASR_1C'].min()
            asr_mean_min = candidate_group['MedDischargeASR_1C'].mean() - candidate_group['MedDischargeASR_1C'].min()
            print(f"ASR range: {asr_range}, ASR mean-min: {asr_mean_min}")
            i_mean_min = i_mean_min+1


    if asr_range < 2.0 and asr_mean_min < 0.5:
        print(f"Established group starting at rank {candidate_group['Rank'].tolist()}: {candidate_group['Cell ID'].tolist()}")
        # Assign the group number
        candidate_group['Group Number'] = groupnumber
        # Add the rows to the final list
        grouped_list.append(candidate_group)
        # Drop rows in "remainder" where "Cell ID" matches any in "candidate_group"
        remainder = remainder[~remainder['Cell ID'].isin(candidate_group['Cell ID'])]
        # Update processed indices
        processed_indices.update(candidate_group.index)
        # Update group number for the next group
        groupnumber += 1
        # Print status
        print(f"Rows in remainder after processing: {len(remainder)}")


# After loop, concatenate all the groups into a single DataFrame
# Check if grouped_list contains valid DataFrames
if not grouped_list:
    print("No valid groups were formed. Creating an empty DataFrame.")
    df_final_Three = pd.DataFrame()  # Handle case where no groups are created
else:
    df_final_Three = pd.concat(grouped_list, ignore_index=True)
# Add 'Remainder' label to remaining rows, if applicable
if not remainder.empty:
    remainder['Group Number'] = 'Remainder'
    df_final_Three = pd.concat([df_final_Three, remainder], ignore_index=True)


if not df_final_Three.empty:
    # Reset the index
    df_final_Three = df_final_Three.reset_index(drop=True)    
    # Drop unnecessary columns
    columns_to_drop = ['median_contour_catholyte_pct_us', 'center_normalized_0_5mm_eroded_rect_outside_median_us', 'SumOfSpecPoints', 'total_rank_score']
    df_final_Three = df_final_Three.drop(columns=columns_to_drop, errors='ignore')  # Use 'errors="ignore"' to avoid errors if columns are missing
    # Move 'Group Number' to the first column
    df_final_Three = df_final_Three[['Group Number'] + [col for col in df_final_Three.columns if col != 'Group Number']]

# Print the final DataFrame
print(f"Final Tier 2 DataFrame has {df_final_Three.shape[0]} rows.")

# Save the ranked cells to an Excel file
current_date = datetime.now().strftime('%Y-%m-%d')
output_name = f'{current_date}_{batches}_Tier_3_Grouped.xlsx'
df_final_Three.to_excel(output_name, index=False)

Processing group starting at rank [1, 2, 3]: ['UCB003AN-US00-13', 'UCB003AN-US00-21', 'UCB003AN-US00-08']
ASR range: 0.9673000000000016, ASR mean-min: 0.4542333333333346
Established group starting at rank [1, 2, 3]: ['UCB003AN-US00-13', 'UCB003AN-US00-21', 'UCB003AN-US00-08']
Rows in remainder after processing: 22
Processing group starting at rank [4, 5, 6]: ['UCB003AL-US00-42', 'UCB003AN-US00-01', 'UCB003AL-US00-19']
ASR range: 0.18459999999999965, ASR mean-min: 0.09219999999999828
Established group starting at rank [4, 5, 6]: ['UCB003AL-US00-42', 'UCB003AN-US00-01', 'UCB003AL-US00-19']
Rows in remainder after processing: 19
Processing group starting at rank [7, 8, 9]: ['UCB003AL-US00-38', 'UCB003AK-US00-10', 'UCB003AL-US00-33']
ASR range: 0.4576000000000029, ASR mean-min: 0.3044000000000011
Established group starting at rank [7, 8, 9]: ['UCB003AL-US00-38', 'UCB003AK-US00-10', 'UCB003AL-US00-33']
Rows in remainder after processing: 16
Processing group starting at rank [10, 11, 12]: ['

### Tier 3: Arrange cells in each group by "Pyramid" order of ASR

In [135]:
# Function to rearrange rows in pyramid order
if not df_final_Three.empty:
    # Apply the function to each group
    df_pyramid_Three = df_final_Three.groupby('Group Number', group_keys=False).apply(pyramid_order)

    # Reset the index if needed
    df_pyramid_Three = df_pyramid_Three.reset_index(drop=True)

    # Save the ranked cells to an Excel file
    current_date = datetime.now().strftime('%Y-%m-%d')
    output_name = f'{current_date}_{batches}_Tier_3_Grouped_Pyramid_Order.xlsx'
    df_pyramid_Three.to_excel(output_name, index=False)
else:
    print("No Tier 3 dataframe to rearrange")
