## Inspected data


In [1]:
import pandas as pd

female = pd.read_csv('./data/female.csv')
male = pd.read_csv('./data/male.csv')

Printig Values


In [2]:
print(f'From women: (rows, columns) {female.shape}')
print(f'From men: (rows, columns) {male.shape}')

From women: (rows, columns) (1986, 108)
From men: (rows, columns) (4082, 108)


## Checking the percentiles

Determining the percentiles of the data.


In [3]:
def compute_percentile_range(column):
  #Define percentile range
  ranges = [(0, 5), (5, 25), (25, 50), (50, 75), (75, 90), (90, 97), (97, 100)]

  #Define percentile ranks for each range
  #beräkna percentiler för varje specificerat intervall
  percentiles = {(low, high): (column.quantile(low/100), column.quantile(high/100)) for low, high in ranges}

  # print(percentiles)
  #antalet poster som faller inom varje percetilintervall
  counts = {}

  for r, (low, high) in percentiles.items():
    counts[r] = ((column >= low) & (column <= high)).sum()
  return counts

print(compute_percentile_range(female['chestcircumference']))
print(compute_percentile_range(female['biacromialbreadth']))

print(compute_percentile_range(male['chestcircumference']))
print(compute_percentile_range(male['biacromialbreadth']))

{(0, 5): 100, (5, 25): 403, (25, 50): 505, (50, 75): 505, (75, 90): 306, (90, 97): 140, (97, 100): 60}
{(0, 5): 103, (5, 25): 412, (25, 50): 529, (50, 75): 586, (75, 90): 309, (90, 97): 147, (97, 100): 62}
{(0, 5): 213, (5, 25): 824, (25, 50): 1046, (50, 75): 1033, (75, 90): 630, (90, 97): 301, (97, 100): 125}
{(0, 5): 211, (5, 25): 861, (25, 50): 1090, (50, 75): 1148, (75, 90): 648, (90, 97): 303, (97, 100): 123}


## Generate T-shirt size Chart


In [4]:
def compute_percentile_measurments(data, chest_column, shoulder_column):
  sizes = ['XS', 'S', 'M', 'L', 'XL', 'XXL', '3XL']
  ranges = [0,5,25,50,75,90,97]

  #Compute the value for each percentile for chest 
  chest_percentiles = {p: data[chest_column].quantile(p/100) for p in ranges}
  #Compute the value for each percentile for shoulder
  shoulder_percentiles = {p: data[shoulder_column].quantile(p/100) for p in ranges}

  # print(f'Chest Value:{chest_percentiles}')
  # print(f'Shoulder Value:{shoulder_percentiles}')

  #map the t-shirt sizes to the corresponding chest and shoulder measurments 
  size_mappings = {}
  for i, size in enumerate(sizes):
    size_mappings[size] = {
      'Chest': int(chest_percentiles[ranges[i]]),
      'Shoulder': int(shoulder_percentiles[ranges[i]])
    }
  return size_mappings

female_sizes = compute_percentile_measurments(female, 'chestcircumference', 'biacromialbreadth')
male_sizes = compute_percentile_measurments(male, 'chestcircumference', 'biacromialbreadth')

print(female_sizes)
print(male_sizes)

{'XS': {'Chest': 695, 'Shoulder': 283}, 'S': {'Chest': 824, 'Shoulder': 335}, 'M': {'Chest': 889, 'Shoulder': 353}, 'L': {'Chest': 940, 'Shoulder': 365}, 'XL': {'Chest': 999, 'Shoulder': 378}, 'XXL': {'Chest': 1057, 'Shoulder': 389}, '3XL': {'Chest': 1117, 'Shoulder': 400}}
{'XS': {'Chest': 774, 'Shoulder': 337}, 'S': {'Chest': 922, 'Shoulder': 384}, 'M': {'Chest': 996, 'Shoulder': 403}, 'L': {'Chest': 1056, 'Shoulder': 415}, 'XL': {'Chest': 1117, 'Shoulder': 428}, 'XXL': {'Chest': 1172, 'Shoulder': 441}, '3XL': {'Chest': 1233, 'Shoulder': 452}}


In [5]:
female_sizes


{'XS': {'Chest': 695, 'Shoulder': 283},
 'S': {'Chest': 824, 'Shoulder': 335},
 'M': {'Chest': 889, 'Shoulder': 353},
 'L': {'Chest': 940, 'Shoulder': 365},
 'XL': {'Chest': 999, 'Shoulder': 378},
 'XXL': {'Chest': 1057, 'Shoulder': 389},
 '3XL': {'Chest': 1117, 'Shoulder': 400}}

In [6]:
male_sizes

{'XS': {'Chest': 774, 'Shoulder': 337},
 'S': {'Chest': 922, 'Shoulder': 384},
 'M': {'Chest': 996, 'Shoulder': 403},
 'L': {'Chest': 1056, 'Shoulder': 415},
 'XL': {'Chest': 1117, 'Shoulder': 428},
 'XXL': {'Chest': 1172, 'Shoulder': 441},
 '3XL': {'Chest': 1233, 'Shoulder': 452}}

In [7]:
def get_size(data, size_chart ):
  #Create a dictionary with size as key and number of matches as value
  matches = {size: 0 for size in size_chart.keys()}
  ties = 0
  
  #Iterate through each row in the dataframe
  for _, row in data.iterrows():
    possible_sizes = []
    #Loop through each size in the size chart
    for size, measurments in size_chart.items():
      #Check if the chest circumference and biacromial breadth are within the range of the size
      if (row['biacromialbreadth'] <= measurments['Shoulder'] and 
          row['chestcircumference'] <= measurments['Chest']):
          possible_sizes.append(size)
   #if  only one size is possible, add 1 to the number of matches 
  if len(possible_sizes) == 1:
          matches[possible_sizes[0]] += 1
  #if there are more possible sizes, increce for ties
  elif len(possible_sizes) > 1:
          ties += 1

  return matches, ties

In [8]:
#Get the number of matches and ties for each size
female_matches, female_ties = get_size(female, female_sizes)
#Get the number of matches and ties for each size
male_matches, male_ties = get_size(male, male_sizes)

print(f'Female matches: {female_matches}')
print(f'Female ties: {female_ties}')

print(f'Male matches: {male_matches}')
print(f'Male ties: {male_ties}')


Female matches: {'XS': 0, 'S': 0, 'M': 0, 'L': 0, 'XL': 0, 'XXL': 0, '3XL': 0}
Female ties: 1
Male matches: {'XS': 0, 'S': 0, 'M': 0, 'L': 0, 'XL': 0, 'XXL': 0, '3XL': 1}
Male ties: 0
