In [5]:
import pandas as pd

In [6]:
female = pd.read_csv('./Data/female.csv')
male = pd.read_csv('./Data/male.csv')


In [None]:
print(f'For women we have (rows, columns) {female.shape}')

print(f'For men we have (rows, columns) {male.shape}')

For women we have (rows, columns) (1986, 108)
For men we have (rows, columns) (4082, 108)


Notes:
When categorical labels must be created from continuous data without predefined standards, percentile-based segmentation provides a data-driven and interpretable solution.

In [8]:
def compute_percentile_ranges(column):
    #Define percentile ranges
    ranges = [(0, 5), (5, 25), (25, 50), (50, 75), (75, 90), (90, 97), (97, 100)]

    percentiles = {(low, high): (column.quantile(low/100), column.quantile(high/100)) for low, high in ranges}

    counts = {}

    for r, (low, high) in percentiles.items():
        counts[r] = ((column >= low) & (column < high)).sum()
    
    return counts


print(compute_percentile_ranges(female['chestcircumference']))
print(compute_percentile_ranges(female['biacromialbreadth']))

print(compute_percentile_ranges(male['chestcircumference']))
print(compute_percentile_ranges(male['biacromialbreadth']))


{(0, 5): np.int64(100), (5, 25): np.int64(396), (25, 50): np.int64(492), (50, 75): np.int64(499), (75, 90): np.int64(299), (90, 97): np.int64(140), (97, 100): np.int64(59)}
{(0, 5): np.int64(93), (5, 25): np.int64(377), (25, 50): np.int64(477), (50, 75): np.int64(541), (75, 90): np.int64(297), (90, 97): np.int64(139), (97, 100): np.int64(61)}
{(0, 5): np.int64(199), (5, 25): np.int64(810), (25, 50): np.int64(1025), (50, 75): np.int64(1012), (75, 90): np.int64(616), (90, 97): np.int64(295), (97, 100): np.int64(124)}
{(0, 5): np.int64(191), (5, 25): np.int64(787), (25, 50): np.int64(989), (50, 75): np.int64(1079), (75, 90): np.int64(610), (90, 97): np.int64(303), (97, 100): np.int64(122)}


In [9]:
def comput_size_percentile_measurements(data, chest_column, shoulder_column):
    sizes = ['XS', 'S', 'M', 'L', 'XL', '2XL', '3XL']
    ranges = [0, 5, 25 , 50, 75, 90, 97]

    # Compute the values for each percentile for chest and shoulder
    chest_percentiles = {p: data[chest_column].quantile(p/100) for p in ranges}
    shoulder_percentiles = {p: data[shoulder_column].quantile(p/100) for p in ranges}

    # Map the t-shirt sizes to the corresponding chest and shoulder measurements
    size_mappings = {}
    for i, size in enumerate(sizes):
        size_mappings[size] = {
            'Chest': int(chest_percentiles[ranges[i]]),
            'Shoulder': int(shoulder_percentiles[ranges[i]])
        }
    
    return size_mappings



print(comput_size_percentile_measurements(female, 'chestcircumference', 'biacromialbreadth'))
print(comput_size_percentile_measurements(male, 'chestcircumference', 'biacromialbreadth'))

{'XS': {'Chest': 695, 'Shoulder': 283}, 'S': {'Chest': 824, 'Shoulder': 335}, 'M': {'Chest': 889, 'Shoulder': 353}, 'L': {'Chest': 940, 'Shoulder': 365}, 'XL': {'Chest': 999, 'Shoulder': 378}, '2XL': {'Chest': 1057, 'Shoulder': 389}, '3XL': {'Chest': 1117, 'Shoulder': 400}}
{'XS': {'Chest': 774, 'Shoulder': 337}, 'S': {'Chest': 922, 'Shoulder': 384}, 'M': {'Chest': 996, 'Shoulder': 403}, 'L': {'Chest': 1056, 'Shoulder': 415}, 'XL': {'Chest': 1117, 'Shoulder': 428}, '2XL': {'Chest': 1172, 'Shoulder': 441}, '3XL': {'Chest': 1233, 'Shoulder': 452}}


In [10]:
female_size = {
    'XS': {'Chest': 695, 'Shoulder': 283}, 
    'S': {'Chest': 824, 'Shoulder': 335}, 
    'M': {'Chest': 889, 'Shoulder': 353}, 
    'L': {'Chest': 940, 'Shoulder': 365}, 
    'XL': {'Chest': 999, 'Shoulder': 378}, 
    '2XL': {'Chest': 1057, 'Shoulder': 389}, 
    '3XL': {'Chest': 1117, 'Shoulder': 400}
    }

male_sizes = {
    'XS': {'Chest': 774, 'Shoulder': 337}, 
    'S': {'Chest': 922, 'Shoulder': 384}, 
    'M': {'Chest': 996, 'Shoulder': 403}, 
    'L': {'Chest': 1056, 'Shoulder': 415}, 
    'XL': {'Chest': 1117, 'Shoulder': 428}, 
    '2XL': {'Chest': 1172, 'Shoulder': 441}, 
    '3XL': {'Chest': 1233, 'Shoulder': 452}
    }

In [11]:
def assign_size(value, size_chart, measurement_type):
    sizes = list(size_chart.keys())
    
    for i in range(len(sizes) - 1):
        lower = size_chart[sizes[i]][measurement_type]
        upper = size_chart[sizes[i + 1]][measurement_type]
        
        if lower <= value < upper:
            return sizes[i]
    
    return sizes[-1]

In [12]:
def count_size_matches_and_conflicts(data, size_chart):
    matches = 0
    conflicts = 0
    
    for _, row in data.iterrows():
        chest_size = assign_size(
            row['chestcircumference'],
            size_chart,
            'Chest'
        )
        
        shoulder_size = assign_size(
            row['biacromialbreadth'],
            size_chart,
            'Shoulder'
        )
        
        if chest_size == shoulder_size:
            matches += 1
        else:
            conflicts += 1
    
    return matches, conflicts


In [13]:
female_matches, female_conflicts = count_size_matches_and_conflicts(
    female,
    female_size
)

male_matches, male_conflicts = count_size_matches_and_conflicts(
    male,
    male_sizes
)

print("Female - Matches:", female_matches, "Conflicts:", female_conflicts)
print("Male - Matches:", male_matches, "Conflicts:", male_conflicts)


Female - Matches: 474 Conflicts: 1512
Male - Matches: 1177 Conflicts: 2905
