In [None]:
import pandas as pd

# Function to compute the box-and-whisker statistics
def finding_points(list_pct):
    if len(list_pct) % 2 == 0:
        half = len(list_pct) // 2
        list_left = list_pct[:half]
        list_right = list_pct[half:]
        
        min_val = list_pct[0]
        max_val = list_pct[-1]
        Q1 = (list_left[len(list_left) // 2] + list_left[(len(list_left) // 2) - 1]) / 2
        Q2 = (list_pct[len(list_pct) // 2] + list_pct[(len(list_pct) // 2) - 1]) / 2
        Q3 = (list_right[len(list_right) // 2] + list_right[(len(list_right) // 2) - 1]) / 2
        IQR = Q3 - Q1
    else:
        half = len(list_pct) // 2
        list_left = list_pct[:half]
        list_right = list_pct[half + 1:]
        
        min_val = list_pct[0]
        max_val = list_pct[-1]
        Q1 = (list_left[len(list_left) // 2] + list_left[(len(list_left) // 2) - 1]) / 2
        Q2 = list_pct[len(list_pct) // 2]
        Q3 = (list_right[len(list_right) // 2] + list_right[(len(list_right) // 2) - 1]) / 2
        IQR = Q3 - Q1
    
    outlier_lowerbound = Q1 - 1.5 * IQR
    outlier_upperbound = Q3 + 1.5 * IQR

    while min_val < outlier_lowerbound:
        min_val = list_pct[0]
        list_pct.pop(0)
        
    while max_val > outlier_upperbound:
        max_val = list_pct[-1]
        list_pct.pop(-1)

    return min_val, max_val, Q1, Q2, Q3

# Function to compute and store box-and-whisker stats for each race and district
def finding_boxandwhiskerpoints(data, region=None):
    result_data = {race: [] for race in ['black_pct', 'white_pct', 'asian_pct', 'hispanic_pct', 'aindalk_pct', 'hipi_pct', 'other_pct', 'twoormore_pct']}
    
    for race in result_data.keys():
        race_pct_by_district = {district_id: [] for district_id in data["district"].unique()}
        
        for plan_num, plan_data in data.groupby(level="plan_num"):
            for district_index, district_data in plan_data.iterrows():
                district_id = district_data["district"]
                
                if region:
                    if region == "Suburban" and "Suburban" not in district_data['category']:
                        race_pct_by_district[district_id].append(0)
                        continue
                    elif region == "Urban" and "Urban" not in district_data['category']:
                        race_pct_by_district[district_id].append(0)
                        continue
                    elif region == "Rural" and "Rural" not in district_data['category']:
                        race_pct_by_district[district_id].append(0)
                        continue
                
                race_pct = district_data[race]
                race_pct_by_district[district_id].append(race_pct)
        
        for district_id, pct_list in race_pct_by_district.items():
            if len(pct_list) >= 5:    
                pct_list.sort()
                min_val, max_val, Q1, Q2, Q3 = finding_points(pct_list)
                result_data[race].append({
                    'district_id': district_id,
                    'min': min_val,
                    'Q1': Q1,
                    'Q2': Q2,
                    'Q3': Q3,
                    'max': max_val
                })
            else:
                result_data[race].append({
                    'district_id': district_id,
                    'min': 0,
                    'Q1': 0,
                    'Q2': 0,
                    'Q3': 0,
                    'max': 0
                })
    
    race_results = []
    for race, data in result_data.items():
        race_df = pd.DataFrame(data)
        race_df['race'] = race
        race_results.append(race_df)
    
    final_result_df = pd.concat(race_results, ignore_index=True)

    final_result_df.set_index(['race', 'district_id'], inplace=True)
    
    display(final_result_df)
    
    return final_result_df

if __name__ == "__main__":
    result_df = finding_boxandwhiskerpoints(all_results_gdf)