## Importing Data from google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## importing important libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb

In [None]:
df = pd.read_csv('drive/MyDrive/body_measurements_dataset.csv')

In [None]:
print(df.columns)

Index(['Gender', 'Height', 'Weight', 'Bust/Chest', 'Cup Size', 'Waist', 'Hips',
       'Body Shape Index'],
      dtype='object')


In [None]:
df['Size'] = 6

Splitting data into male and female parts to separately classify the parameters

In [None]:
df_male = df[df['Gender'] == 'Male']
df_female = df[df['Gender'] == 'Female']

## Dropping unecessary Parameters

Gender is indeterminate and Cup size is invalid for males

In [None]:
df_male = df_male.drop(['Gender', 'Cup Size'], axis=1)
df_male.head()

Unnamed: 0,Height,Weight,Bust/Chest,Waist,Hips,Body Shape Index,Size
1,"6'1""",85,40,30,31,3,6
2,"5'3""",77,44,36,42,4,6
3,"6'4""",74,38,48,49,4,6
4,"4'11""",47,35,39,41,0,6
7,"6'4""",88,37,48,49,3,6


In [None]:
df_female = df_female.drop(['Gender'], axis=1)
df_female.head()

Unnamed: 0,Height,Weight,Bust/Chest,Cup Size,Waist,Hips,Body Shape Index,Size
0,"5'1""",67,37,E,35,38,4,6
5,"6'4""",97,37,E,28,34,0,6
6,"5'9""",70,33,DD,23,23,0,6
8,"5'1""",59,41,D,25,26,0,6
12,"5'11""",92,42,DD,24,26,4,6


Making a dictionary to map body size to corresponding ratios

[![image.png](https://i.postimg.cc/BQTnh7mf/image.png)](https://postimg.cc/8jCGc48Z)

In [None]:
Body_Index = {0: ('Rectangle', 3.6 * 2.54, 3.6 * 2.54, 9 * 2.54, 10 * 2.54),
              1: ('Apple',   3.6 * 2.54, 9 * 2.54, None, None),
              2: ('Pear', 2 * 2.54, 7 * 2.54 , 9 * 2.54, None),
              3: ('Hourglass', 1 * 2.54, 3.6 * 2.54, 9 * 2.54, 10 * 2.54)
              #4: (3.6
              }

![image](https://images.prismic.io/universalstandard/64a5f0f8-f64f-4744-8ea0-a49f1ce3f7dd_size-chart-desktop.jpg?auto=compress,format)

# MALE ANALYSIS

In [None]:
df_male[['Weight', 'Bust/Chest', 'Waist', 'Hips']].describe()

Unnamed: 0,Weight,Bust/Chest,Waist,Hips
count,475.0,475.0,475.0,475.0
mean,72.812632,40.012632,36.326316,40.778947
std,16.245371,6.573493,6.208138,6.391217
min,42.0,30.0,26.0,26.0
25%,60.0,34.0,31.0,36.0
50%,71.0,40.0,36.0,42.0
75%,83.0,46.0,41.0,46.0
max,117.0,52.0,48.0,50.0


In [None]:
print(df_male.columns)
print(df_male.dtypes)

Index(['Height', 'Weight', 'Bust/Chest', 'Waist', 'Hips', 'Body Shape Index',
       'Size'],
      dtype='object')
Height              object
Weight               int64
Bust/Chest           int64
Waist                int64
Hips                 int64
Body Shape Index     int64
Size                 int64
dtype: object


Algorithmically Predicting the Size using standard size chart taking high bounds

In [None]:
def update(row):
  if row['Bust/Chest'] > 40 or row['Waist'] > 38:
    return 8
  elif row['Bust/Chest'] > 38 and row['Waist'] > 38:
    return 7
  elif row['Bust/Chest'] > 36 and row['Waist'] > 36:
    return 6
  elif row['Bust/Chest'] > 34 and row['Waist'] > 34:
    return 5
  elif row['Bust/Chest'] >= 32 and row['Waist'] >= 32:
    return 4
  elif row['Bust/Chest'] >= 30 and row['Waist'] >= 30:
    return 3
  elif row['Bust/Chest'] >= 28 and row['Waist'] >= 28:
    return 2
  else:
    return 1

df_male['Size'] = df_male.apply(update, axis=1)
df_female['Size'] = df_female.apply(update, axis=1)

In [None]:
print(df_male)

    Height  Weight  Bust/Chest  Waist  Hips  Body Shape Index  Size
1     6'1"      85          40     30    31                 3     3
2     5'3"      77          44     36    42                 4     8
3     6'4"      74          38     48    49                 4     8
4    4'11"      47          35     39    41                 0     8
7     6'4"      88          37     48    49                 3     8
..     ...     ...         ...    ...   ...               ...   ...
991   5'8"      58          39     37    37                 3     6
992   5'4"      63          46     28    31                 3     8
993   6'6"      83          45     35    43                 0     8
997   6'4"      73          47     31    38                 2     8
999   6'5"      89          32     35    39                 3     4

[475 rows x 7 columns]


In [None]:
print(df_female)

    Height  Weight  Bust/Chest Cup Size  Waist  Hips  Body Shape Index  Size
0     5'1"      67          37        E     35    38                 4     5
5     6'4"      97          37        E     28    34                 0     2
6     5'9"      70          33       DD     23    23                 0     1
8     5'1"      59          41        D     25    26                 0     8
12   5'11"      92          42       DD     24    26                 4     8
..     ...     ...         ...      ...    ...   ...               ...   ...
989   5'3"      54          45        A     29    37                 4     8
994   5'0"      59          30        A     28    31                 4     2
995   5'2"      70          40       DD     34    36                 3     4
996   5'6"      84          47        F     36    38                 0     8
998   5'1"      45          34        E     25    29                 1     1

[525 rows x 8 columns]


## importing SVM Model and Algorithmic Metrics

In [None]:
from sklearn import svm
from sklearn.metrics import accuracy_score, f1_score
import sklearn.model_selection as model_sel

In [None]:
X = df_male[['Bust/Chest', 'Waist']]
Y = df_male['Size']

In [None]:
print(X)
print(Y)

     Bust/Chest  Waist
1            40     30
2            44     36
3            38     48
4            35     39
7            37     48
..          ...    ...
991          39     37
992          46     28
993          45     35
997          47     31
999          32     35

[475 rows x 2 columns]
1      3
2      8
3      8
4      8
7      8
      ..
991    6
992    8
993    8
997    8
999    4
Name: Size, Length: 475, dtype: int64


splittting test and training data

In [None]:
x_train, x_test, y_train, y_test = model_sel.train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=101)

In [None]:
print(x_train)

     Bust/Chest  Waist
203          50     26
711          45     40
291          52     30
909          30     40
375          48     41
..          ...    ...
131          51     31
693          50     35
712          30     40
21           48     35
743          32     28

[380 rows x 2 columns]


In [None]:
print(y_train)

203    8
711    8
291    8
909    8
375    8
      ..
131    8
693    8
712    8
21     8
743    2
Name: Size, Length: 380, dtype: int64


In [None]:
poly = svm.SVC(kernel='poly',
               degree=3,
               C=1
              ).fit(x_train, y_train)

In [None]:
poly_predict = poly.predict(x_test)

# ACCURACY and PRECISION

In [None]:
print(accuracy_score(y_test, poly_predict))
print(f1_score(y_test, poly_predict, average='weighted'))

0.9473684210526315
0.9446600877192982


In [None]:
ar = np.array([[26, 26],
            [27, 26],
            [28, 26],
            [29, 27],
            [30, 28],
            [31, 29],
            [32, 30],
            [32, 31],
            [33, 32],
            [34, 33],
            [34, 34],
            [35, 34],
            [36, 35],
            [37, 36],
            [38, 37],
            [39, 38],
            [40, 39]
            ])
ar = ar.transpose()

In [None]:
size_chart = pd.DataFrame(({'Bust/Chest':ar[0],
                            'Waist':ar[1]}))


In [None]:
print(size_chart)

    Bust/Chest  Waist
0           26     26
1           27     26
2           28     26
3           29     27
4           30     28
5           31     29
6           32     30
7           32     31
8           33     32
9           34     33
10          34     34
11          35     34
12          36     35
13          37     36
14          38     37
15          39     38
16          40     39


In [None]:
size_chart_pred = poly.predict(size_chart)

In [None]:
print(size_chart_pred)

[1 1 1 1 2 2 3 3 4 4 4 4 5 5 6 8 8]


In [None]:
size_chart['Size'] = size_chart_pred

In [None]:
print(size_chart)

    Bust/Chest  Waist  Size
0           26     26     1
1           27     26     1
2           28     26     1
3           29     27     1
4           30     28     2
5           31     29     2
6           32     30     3
7           32     31     3
8           33     32     4
9           34     33     4
10          34     34     4
11          35     34     4
12          36     35     5
13          37     36     5
14          38     37     6
15          39     38     8
16          40     39     8


In [None]:
xs=[26,26]
small=[28,28]
med=[30,30]
large=[32, 32]
xl=[34,34]
x2l=[36, 36]
x3l=[38, 38]
x4l=[40, 40]

In [None]:
for row, i in size_chart.iterrows():
  if i['Size'] == 1:
    if xs[0] < i['Bust/Chest']: xs[0] = i['Bust/Chest']
    if xs[0] < i['Waist']: xs[0] = i['Waist']
  elif i['Size'] == 2:
    if small[0] < i['Bust/Chest']: small[0] = i['Bust/Chest']
    if small[0] < i['Waist']: small[0] = i['Waist']
  elif i['Size'] == 3:
    if med[0] < i['Bust/Chest']: med[0] = i['Bust/Chest']
    if med[0] < i['Waist']: med[0] = i['Waist']
  elif i['Size'] == 4:
    if large[0] < i['Bust/Chest']: large[0] = i['Bust/Chest']
    if large[0] < i['Waist']: large[0] = i['Waist']
  elif i['Size'] == 5:
    if xl[0] < i['Bust/Chest']: xl[0] = i['Bust/Chest']
    if xl[0] < i['Waist']: xl[0] = i['Waist']
  elif i['Size'] == 6:
    if x2l[0] < i['Bust/Chest']: x2l[0] = i['Bust/Chest']
    if x2l[0] < i['Waist']: x2l[0] = i['Waist']
  elif i['Size'] == 7:
    if x3l[0] < i['Bust/Chest']: x3l[0] = i['Bust/Chest']
    if x3l[0] < i['Waist']: x3l[0] = i['Waist']
  elif i['Size'] == 8:
    if x4l[0] < i['Bust/Chest']: x4l[0] = i['Bust/Chest']
    if x4l[0] < i['Waist']: x4l[0] = i['Waist']


# PRINTING SIZE CHART

In [None]:
print("\tSize\t\tBust/Chest\tWaist")
print("\txs", "\t\t", xs[0],"\t\t", xs[1])
print("\ts", "\t\t", small[0],"\t\t", small[1])
print("\tm", "\t\t", med[0],"\t\t", med[1])
print("\tl", "\t\t", large[0],"\t\t", large[1])
print("\txl", "\t\t", xl[0],"\t\t", xl[1])
print("\t2xl", "\t\t", x2l[0],"\t\t", x2l[1])
print("\t3xl", "\t\t", x3l[0],"\t\t", x3l[1])
print("\t4xl", "\t\t", x4l[0],"\t\t", x4l[1])

	Size		Bust/Chest	Waist
	xs 		 29 		 26
	s 		 31 		 28
	m 		 32 		 30
	l 		 35 		 32
	xl 		 37 		 34
	2xl 		 38 		 36
	3xl 		 38 		 38
	4xl 		 40 		 40


# OUTPUTTING SIZE CHART

In [None]:
import pickle as pk
pk.dump(poly, open('SVM_model_male.bin', 'wb'))

# FEMALE PREDICTION

In [None]:
def update(row):
  if row['Bust/Chest'] > 40 or row['Waist'] > 38:
    return 8
  elif row['Bust/Chest'] > 38 and row['Waist'] > 36:
    return 7
  elif row['Bust/Chest'] > 36 and row['Waist'] > 34:
    return 6
  elif row['Bust/Chest'] > 34 and row['Waist'] > 32:
    return 5
  elif row['Bust/Chest'] >= 32 and row['Waist'] >= 30:
    return 4
  elif row['Bust/Chest'] >= 30 and row['Waist'] >= 28:
    return 3
  elif row['Bust/Chest'] >= 28 and row['Waist'] >= 26:
    return 2
  else:
    return 1

df_female['Size'] = df_female.apply(update, axis=1)

In [None]:
from sklearn import svm
from sklearn.metrics import accuracy_score, f1_score
import sklearn.model_selection as model_sel

In [None]:
df_female.head()

Unnamed: 0,Height,Weight,Bust/Chest,Cup Size,Waist,Hips,Body Shape Index,Size
0,"5'1""",67,37,E,35,38,4,6
5,"6'4""",97,37,E,28,34,0,3
6,"5'9""",70,33,DD,23,23,0,1
8,"5'1""",59,41,D,25,26,0,8
12,"5'11""",92,42,DD,24,26,4,8


In [None]:
df_female['Cup Size'].unique()

array(['E', 'DD', 'D', 'AA', 'C', 'A', 'B', 'F'], dtype=object)

In [None]:
df_female['Cup Size'] = df_female['Cup Size'].astype('category').cat.codes

In [None]:
X = df_female[['Bust/Chest', 'Cup Size', 'Waist']]
# Y = df_female[['Body Shape Index', 'Size']]
Y = df_female[['Size']]

In [None]:
print(X)
print(Y)

     Bust/Chest  Cup Size  Waist
0            37         6     35
5            37         6     28
6            33         5     23
8            41         4     25
12           42         5     24
..          ...       ...    ...
989          45         0     29
994          30         0     28
995          40         5     34
996          47         7     36
998          34         6     25

[525 rows x 3 columns]
     Size
0       6
5       3
6       1
8       8
12      8
..    ...
989     8
994     3
995     5
996     8
998     1

[525 rows x 1 columns]


In [None]:
x_train, x_test, y_train, y_test = model_sel.train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=101)

In [None]:
print(x_train)

     Bust/Chest  Cup Size  Waist
257          34         0     26
544          31         3     39
236          28         6     31
95           40         5     34
423          44         4     38
..          ...       ...    ...
586          40         2     28
747          38         0     30
140          31         3     30
628          38         7     31
996          47         7     36

[420 rows x 3 columns]


In [None]:
print(y_train)

     Size
257     2
544     8
236     2
95      5
423     8
..    ...
586     3
747     4
140     3
628     4
996     8

[420 rows x 1 columns]


In [None]:
np.unique(y_train)

array([1, 2, 3, 4, 5, 6, 7, 8])

# USING HYPER PARAMETER TUNING TO IMPROVE ACCURACY METRICS

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'degree': [1,2,3,4],
              'kernel': ['poly']}

In [None]:
grid = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 3)

In [None]:
grid.fit(x_train, y_train)

In [None]:
poly_predict = grid.predict(x_test)

# ACCURACY

In [None]:
print(accuracy_score(y_test, poly_predict))
print(f1_score(y_test, poly_predict, average='weighted'))

0.9523809523809523
0.9645806040561532


In [None]:
import pickle as pk
pk.dump(poly, open('SVM_model_female.bin', 'wb'))