# IMPORT MODULES

In [1]:
import pandas as pd

# CATEGORIES CLASS

In [2]:
class Categories:
 
    def __init__(self, _name, _index):
        self.name = _name
        self.index = _index
        self.prob = 0
        self.entropy = 0

# FEATURES CLASS

In [3]:
class Features:
    def __init__(self, _name, _feat_type):
        self.name = _name
        self.feat_type = _feat_type
        self.num_cat = 0
        self.categories = dict()
        self.entropy = 0
        self.std_entropy = 0
        self.reliability = 0
        self.weight = 0
        self.distances = dict()
        self.category_order = dict()

# ORDINAL DATA

In [4]:
def read_ord(data):
    try:
        ordinal_categories = pd.read_csv(data,header=None,index_col=0)
    except ImportError:
        print('Error Importing Ordinal Data')

    features = dict()
    
    for feat in ordinal_categories.index:
        feat_instance = Features(feat,'ordinal')
        
        cat_names = ordinal_categories.loc[feat,ordinal_categories.loc[feat,:].notnull()].tolist()
        feat_instance.num_cat = len(cat_names)
        
        for _index, _name in enumerate(cat_names):
            feat_instance.categories[_name] = Categories(_name, _index)
            feat_instance.category_order[_index] = _name
           
        features[feat] = feat_instance
        
    return features

# NOMINAL DATA

In [5]:
def read_nom(data):
    try:
        nominal_categories = pd.read_csv(data,header=None,index_col=0)
    except ImportError:
        print('Error Importing Nominal Data')

    features = dict()
    
    for feat in nominal_categories.index:
        feat_instance = Features(feat,'nominal')
        
        cat_names = nominal_categories.loc[feat,nominal_categories.loc[feat,:].notnull()].tolist()
        feat_instance.num_cat = len(cat_names)
        
        for _index, _name in enumerate(cat_names):
            feat_instance.categories[_name] = Categories(_name, _index)
            feat_instance.category_order[_index] = _name
           
        features[feat] = feat_instance
        
    return features

# TEST READ ORD AND NOM

In [6]:
feat = read_ord('Preprocessed_Ordinal.csv')
feat_nom = read_nom('Preprocessed_Nominal.csv')
feat.update(feat_nom)

In [7]:
for f in feat.keys():
    print('name:',f)
    print('type:',feat[f].feat_type)
    print('num_cat:',feat[f].num_cat)
    print()
    for cat in feat[f].categories.keys():
        print('    index:',feat[f].categories[cat].index)
        print('    name:',feat[f].categories[cat].name)
        print()
    print()

name: achievingends_quality
type: ordinal
num_cat: 3

    index: 0
    name: poor

    index: 1
    name: medium

    index: 2
    name: good


name: anger_quality
type: ordinal
num_cat: 3

    index: 0
    name: poor

    index: 1
    name: medium

    index: 2
    name: good


name: anger_speed
type: ordinal
num_cat: 4

    index: 0
    name: slowly

    index: 1
    name: moderately

    index: 2
    name: quickly

    index: 3
    name: variably


name: appetite_amount
type: ordinal
num_cat: 4

    index: 0
    name: low

    index: 1
    name: medium

    index: 2
    name: high

    index: 3
    name: variable


name: bladder_amount
type: ordinal
num_cat: 4

    index: 0
    name: low

    index: 1
    name: medium

    index: 2
    name: high

    index: 3
    name: variable


name: body_odour
type: ordinal
num_cat: 3

    index: 0
    name: veryless

    index: 1
    name: mild

    index: 2
    name: strong


name: bodybuild_size
type: ordinal
num_cat: 3

    index: 0
    name

    index: 1
    name: medium

    index: 2
    name: quick

    index: 3
    name: variable


name: stool_consistency
type: ordinal
num_cat: 3

    index: 0
    name: loose/soft/semisolid

    index: 1
    name: medium

    index: 2
    name: hard


name: teeth_color
type: ordinal
num_cat: 3

    index: 0
    name: milkywhite

    index: 1
    name: yellowish

    index: 2
    name: dull/blackish


name: teeth_size
type: ordinal
num_cat: 4

    index: 0
    name: toosmall

    index: 1
    name: medium

    index: 2
    name: large

    index: 3
    name: toolarge


name: tongue_movements
type: ordinal
num_cat: 3

    index: 0
    name: less

    index: 1
    name: moderate

    index: 2
    name: high/excessive


name: walking_amount
type: ordinal
num_cat: 3

    index: 0
    name: less

    index: 1
    name: moderate

    index: 2
    name: high/excessive


name: walking_speed
type: ordinal
num_cat: 4

    index: 0
    name: slow

    index: 1
    name: medium

    index: 2
    nam

type: nominal
num_cat: 2

    index: 0
    name: yes

    index: 1
    name: oily


name: suit_pungent
type: nominal
num_cat: 2

    index: 0
    name: no

    index: 1
    name: yes


name: suit_sweet
type: nominal
num_cat: 2

    index: 0
    name: yes

    index: 1
    name: no


name: suit_warm
type: nominal
num_cat: 2

    index: 0
    name: yes

    index: 1
    name: no


name: teeth_brittle
type: nominal
num_cat: 2

    index: 0
    name: no

    index: 1
    name: yes


name: teeth_loose
type: nominal
num_cat: 2

    index: 0
    name: no

    index: 1
    name: yes


name: teeth_shape
type: nominal
num_cat: 2

    index: 0
    name: regular

    index: 1
    name: irregular


name: teeth_shape_even
type: nominal
num_cat: 2

    index: 0
    name: even

    index: 1
    name: uneven


name: voice_clear
type: nominal
num_cat: 2

    index: 0
    name: no

    index: 1
    name: yes


name: walking_style
type: nominal
num_cat: 3

    index: 0
    name: firm/steady

    index: 1


## Verdict: Working

Can also try running for nominal here if you want.


# READ POPULATION

In [8]:
 df = pd.read_csv('Preprocessed_Dataset.csv', index_col=0)

In [9]:
df.drop('prakriti',axis=1,inplace=True)

# run this only once otherwise you'll get errors

In [10]:
df.head()

Unnamed: 0_level_0,achievingends_quality,anger_quality,anger_speed,appetite_amount,appetite_frequency,bladder_amount,bladder_frequency,body_odour,bodybuild_size,bodyframe_breadth,...,working_quality,working_speed,working_style,skin_clear,skin_cracked,skin_freckle,skin_mark,skin_mole,skin_pimple,skin_wrinkled
sampleid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
V091206280,good,medium,slowly,medium,regular,medium,regular,mild,moderatelydeveloped,medium,...,sharp/accurate/spontaneous,medium,sharp/accurate,yes,no,no,no,no,no,no
V091201522,poor,good,quickly,medium,regular,medium,regular,veryless,welldeveloped,broad,...,wellthoughtof,slow,firm/steady,no,yes,no,no,no,yes,no
V091206079,poor,good,slowly,medium,regular,high,regular,strong,weaklydeveloped,thin/narrow,...,wavering/easilydeviated,quick/fast/brisk,unsteady,no,no,no,yes,yes,no,no
V091202231,medium,medium,variably,high,regular,medium,regular,strong,moderatelydeveloped,medium,...,sharp/accurate/spontaneous,quick/fast/brisk,sharp/accurate,no,no,no,yes,no,no,no
V091209882,poor,poor,quickly,variable,irregular,medium,regular,mild,moderatelydeveloped,medium,...,sharp/accurate/spontaneous,medium,sharp/accurate,no,no,yes,yes,no,no,no


In [11]:
def probability(df, features):
    for feat in features.keys():
        series = df[feat].value_counts(normalize=True,sort=False)
        for cat in series.index:
            features[feat].categories[cat].prob = series[cat]

    return features

In [12]:
feat = probability(df,feat)

In [13]:
for f in feat.keys():
    print(f)
    print(feat[f].feat_type,feat[f].num_cat)
    for cat in feat[f].categories.keys():
        print(feat[f].categories[cat].index,feat[f].categories[cat].name,feat[f].categories[cat].prob)
    print()

achievingends_quality
ordinal 3
0 poor 0.1984126984126984
1 medium 0.45634920634920634
2 good 0.34523809523809523

anger_quality
ordinal 3
0 poor 0.192
1 medium 0.572
2 good 0.236

anger_speed
ordinal 4
0 slowly 0.18326693227091634
1 moderately 0.3187250996015936
2 quickly 0.47808764940239046
3 variably 0.0199203187250996

appetite_amount
ordinal 4
0 low 0.17391304347826086
1 medium 0.6521739130434783
2 high 0.11462450592885376
3 variable 0.05928853754940711

bladder_amount
ordinal 4
0 low 0.012195121951219513
1 medium 0.8780487804878049
2 high 0.07317073170731707
3 variable 0.036585365853658534

body_odour
ordinal 3
0 veryless 0.44621513944223107
1 mild 0.44621513944223107
2 strong 0.10756972111553785

bodybuild_size
ordinal 3
0 weaklydeveloped 0.2964426877470356
1 moderatelydeveloped 0.44664031620553357
2 welldeveloped 0.25691699604743085

bodyframe_breadth
ordinal 3
0 thin/narrow 0.3531746031746032
1 medium 0.39285714285714285
2 broad 0.25396825396825395

bodyframe_length
ordinal 3


0 no 0.8695652173913043
1 yes 0.13043478260869565

skin_freckle
nominal 2
0 no 0.7154150197628458
1 yes 0.2845849802371542

skin_mark
nominal 2
0 no 0.6047430830039525
1 yes 0.3952569169960474

skin_mole
nominal 2
0 no 0.6877470355731226
1 yes 0.31225296442687744

skin_nature
nominal 7
0 dry 0.3492063492063492
1 oily 0.14682539682539683
2 normal 0.4166666666666667
3 seasonal/variable 0.0873015873015873
4 thick 0
5 thin 0
6 hard 0

skin_pimple
nominal 2
0 no 0.8063241106719368
1 yes 0.19367588932806323

skin_wrinkled
nominal 2
0 no 0.691699604743083
1 yes 0.308300395256917

sleep_quality
nominal 3
0 deep 0.18725099601593626
1 sound 0.41832669322709165
2 shallow 0.3944223107569721

soles_color
nominal 4
0 dark 0.2261904761904762
1 reddish 0.15476190476190477
2 paleyellow 0.5119047619047619
3 pink 0.10714285714285714

soles_firm
nominal 2
0 no 0.6679841897233202
1 yes 0.33201581027667987

soles_cracked
nominal 2
0 no 0.5849802371541502
1 yes 0.4150197628458498

soles_wrinkled
nominal 2
0 

In [14]:
def entropy(features):
    for feat in features.values():
        for cat in feat.categories.values():
            if(cat.prob):
                cat.entropy = (-1)*cat.prob*math.log(cat.prob)
            else:
                pass

    return features

In [15]:
import math
feat = entropy(feat)

In [16]:
for f in feat.keys():
    print(f)
    print(feat[f].feat_type,feat[f].num_cat)
    for cat in feat[f].categories.keys():
        print(feat[f].categories[cat].index,feat[f].categories[cat].name,feat[f].categories[cat].prob,feat[f].categories[cat].entropy)
    print()

achievingends_quality
ordinal 3
0 poor 0.1984126984126984 0.3209139051752534
1 medium 0.45634920634920634 0.35800456469063463
2 good 0.34523809523809523 0.3671679535339089

anger_quality
ordinal 3
0 poor 0.192 0.3168499021352363
1 medium 0.572 0.31952851650853803
2 good 0.236 0.3407659398537404

anger_speed
ordinal 4
0 slowly 0.18326693227091634 0.3109694460620067
1 moderately 0.3187250996015936 0.364438662775427
2 quickly 0.47808764940239046 0.3528101337130221
3 variably 0.0199203187250996 0.07800826746409728

appetite_amount
ordinal 4
0 low 0.17391304347826086 0.30420867040161026
1 medium 0.6521739130434783 0.2787678357566998
2 high 0.11462450592885376 0.24828741542881558
3 variable 0.05928853754940711 0.16751023444418836

bladder_amount
ordinal 4
0 low 0.012195121951219513 0.053740478625173824
1 medium 0.8780487804878049 0.11419299065695415
2 high 0.07317073170731707 0.19133852034411206
3 variable 0.036585365853658534 0.12102830336327353

body_odour
ordinal 3
0 veryless 0.4462151394

1 yes 0.12648221343873517 0.2615214021726854

nails_brittle
nominal 2
0 no 0.9486166007905138 0.05004006202579823
1 yes 0.05138339920948617 0.15252854429429955

nails_palate
nominal 2
0 flat 0.4248927038626609 0.36367356969819725
1 convex 0.575107296137339 0.31814858190721784

palate_color
nominal 4
0 dark 0.003968253968253968 0.021942178918696122
1 reddish 0.011904761904761904 0.05274781903384897
2 paleyellow 0.05952380952380952 0.16793921942911982
3 pink 0.9246031746031746 0.07248022900537059

palms_color
nominal 4
0 dark 0.20553359683794467 0.32518411086006654
1 reddish 0.22924901185770752 0.3376715246423077
2 paleyellow 0.3794466403162055 0.3676994645728445
3 pink 0.1857707509881423 0.31269710944593165

palms_firm
nominal 2
0 no 0.5691699604743083 0.3207706373036317
1 yes 0.4308300395256917 0.3627768186099725

palms_cracked
nominal 2
0 no 0.8656126482213439 0.12492327747633697
1 yes 0.13438735177865613 0.26971930742998496

palms_wrinkled
nominal 2
0 no 0.9169960474308301 0.07945964

In [17]:
def probability(df, features):
    for feat in features.keys():
        series = df[feat].value_counts(normalize=True,sort=False)
        for cat in series.index:
            features[feat].categories[cat].prob = series[cat]

    return features

def entropy(features):
    for feat in features.values():
        for cat in feat.categories.values():
            if(cat.prob):
                cat.entropy = (-1)*cat.prob*math.log(cat.prob)
            else:
                pass

    return features


def std_entropy(features):
    for feat in features.values():
        if(feat.num_cat):
            feat.std_entropy = math.log(feat.num_cat)
        else:
            pass

    return features

def feature_entropy(features):
    for feat in features.values():
        feat_entropy = 0
        for cat in feat.categories.values():
            feat_entropy = feat_entropy + cat.entropy

        feat.entropy = feat_entropy

    return features

def reliability(features):
    for feat in features.values():
        try:
            feat.reliability = feat.entropy/ feat.std_entropy
        except ZeroDivisionError as error:
            Logging.log_exception(error)

    return features

def total_reliability(features):
    total_rel = 0
    for feat in features.values():
        total_rel = total_rel + feat.reliability

    return total_rel

def weights(features, total_reliability):
    for feat in features:
        features[feat].weight = features[feat].reliability/ total_reliability

    return features

In [18]:
feat = std_entropy(feat)

In [19]:
feat = feature_entropy(feat)

In [20]:
feat = reliability(feat)

In [21]:
total_rel = total_reliability(feat)

In [22]:
feat = weights(feat, total_rel)


In [23]:
for f in feat.keys():
    print('name:',f)
    print('type:',feat[f].feat_type)
    print('num_cat:',feat[f].num_cat)
    print('entropy:',feat[f].entropy)
    print('std_entropy:',feat[f].std_entropy)
    print('reliability:',feat[f].reliability)
    print()
    for cat in feat[f].categories.keys():
        print('    index:',feat[f].categories[cat].index)
        print('    name:',feat[f].categories[cat].name)
        print('    prob:',feat[f].categories[cat].prob)
        print('    entropy:',feat[f].categories[cat].entropy)
        print()
    print()

name: achievingends_quality
type: ordinal
num_cat: 3
entropy: 1.046086423399797
std_entropy: 1.0986122886681098
reliability: 0.9521888970202655

    index: 0
    name: poor
    prob: 0.1984126984126984
    entropy: 0.3209139051752534

    index: 1
    name: medium
    prob: 0.45634920634920634
    entropy: 0.35800456469063463

    index: 2
    name: good
    prob: 0.34523809523809523
    entropy: 0.3671679535339089


name: anger_quality
type: ordinal
num_cat: 3
entropy: 0.9771443584975147
std_entropy: 1.0986122886681098
reliability: 0.8894351251815549

    index: 0
    name: poor
    prob: 0.192
    entropy: 0.3168499021352363

    index: 1
    name: medium
    prob: 0.572
    entropy: 0.31952851650853803

    index: 2
    name: good
    prob: 0.236
    entropy: 0.3407659398537404


name: anger_speed
type: ordinal
num_cat: 4
entropy: 1.1062265100145532
std_entropy: 1.3862943611198906
reliability: 0.7979737500489505

    index: 0
    name: slowly
    prob: 0.18326693227091634
    entrop

    name: tooshort/toolong
    prob: 0.02766798418972332
    entropy: 0.09925832165100967

    index: 1
    name: medium
    prob: 0.8695652173913043
    entropy: 0.12153212380448586

    index: 2
    name: long
    prob: 0.10276679841897234
    entropy: 0.2338245720093162


name: hands_movements
type: ordinal
num_cat: 3
entropy: 0.9063395711588029
std_entropy: 1.0986122886681098
reliability: 0.8249858303128882

    index: 0
    name: less
    prob: 0.32806324110671936
    entropy: 0.3656425182500654

    index: 1
    name: moderate
    prob: 0.5770750988142292
    entropy: 0.31726600231146573

    index: 2
    name: high/excessive
    prob: 0.09486166007905138
    entropy: 0.2234310505972719


name: head_movements
type: ordinal
num_cat: 3
entropy: 0.6892122534807857
std_entropy: 1.0986122886681098
reliability: 0.6273480285900902

    index: 0
    name: less
    prob: 0.746031746031746
    entropy: 0.218577696190941

    index: 1
    name: moderate
    prob: 0.20634920634920634
    ent

    name: medium
    prob: 0.8063241106719368
    entropy: 0.173576984016577

    index: 2
    name: large
    prob: 0.1383399209486166
    entropy: 0.27364209467720846

    index: 3
    name: toolarge
    prob: 0.003952569169960474
    entropy: 0.02187110469852775


name: tongue_movements
type: ordinal
num_cat: 3
entropy: 0.32089472577804246
std_entropy: 1.0986122886681098
reliability: 0.2920909670208364

    index: 0
    name: less
    prob: 0.9090909090909091
    entropy: 0.08664561800393172

    index: 1
    name: moderate
    prob: 0.08695652173913043
    entropy: 0.21237800307558297

    index: 2
    name: high/excessive
    prob: 0.003952569169960474
    entropy: 0.02187110469852775


name: walking_amount
type: ordinal
num_cat: 3
entropy: 0.7702937079452042
std_entropy: 1.0986122886681098
reliability: 0.7011515489955616

    index: 0
    name: less
    prob: 0.09090909090909091
    entropy: 0.21799047934530644

    index: 1
    name: moderate
    prob: 0.7193675889328063
    ent

std_entropy: 0.6931471805599453
reliability: 0.45249251741562274

    index: 0
    name: yes
    prob: 0.09486166007905138
    entropy: 0.2234310505972719

    index: 1
    name: no
    prob: 0.9051383399209486
    entropy: 0.09021286207383895


name: skin_color
type: nominal
num_cat: 8
entropy: 1.2406413819310835
std_entropy: 2.0794415416798357
reliability: 0.596622389744535

    index: 0
    name: fairpaleyellow
    prob: 0.09523809523809523
    entropy: 0.22394050068223595

    index: 1
    name: dark
    prob: 0.4087301587301587
    entropy: 0.3656909135953338

    index: 2
    name: fairreddish
    prob: 0.06349206349206349
    entropy: 0.17503748350931062

    index: 3
    name: whitish
    prob: 0.4007936507936508
    entropy: 0.3664490699908196

    index: 4
    name: fairpink
    prob: 0.031746031746031744
    entropy: 0.10952341415338372

    index: 5
    name: golden
    prob: 0
    entropy: 0

    index: 6
    name: dusky
    prob: 0
    entropy: 0

    index: 7
    name: f

In [24]:
feat['bodyframe_length'].categories['medium'].index

1

In [25]:
# # for each individual
# def distance_matrix(df, features):
#     dist_outer = list()
#     for index1 in df.index:
#         dist_inner = list()
#         for index2 in df.index:
#             if(index1 == index2):
#                 distance.append(0)
#             else:
#                 for feat in df.columns:
#                     dist_val = 0
#                     cat1 = features[feat].categories[df.loc[index1, feat]]
#                     cat2 = features[feat].categories[df.loc[index2, feat]]
#                     if(features[f].feat_type == 'nominal'):
#                         dist_val = features[feat].weights * (cat1.entropy + cat2.entropy)
#                     else:
#                         dist_val

# # for each category

# def distance(features):
#     for feat in features:
#         for cat1 in features[feat].categories.values():
#             dist_inner = dict()
#             for cat2 in features[feat].categories.values():
#                 if(cat1 == cat2):
#                     dist_inner[cat2] = 0
    

## Verdict: Working

Now work on distance in file distance.py. You may start from scratch

# Calculating Distances

In [26]:
# calculating distances between each category of an attribute

def distance_between_categories(features):
    
    for feat in features:
        
        for i in features[feat].category_order.keys():
            
            features[feat].distances[features[feat].category_order[i]] = dict()

    for feat in features:
        
        for i in features[feat].category_order.keys():

            features[feat].distances[features[feat].category_order[i]][features[feat].category_order[i]] = 0               

            if (features[feat].feat_type == 'ordinal'):
                
                for j in range(i+1, features[feat].num_cat):

                    prev = features[feat].distances[features[feat].category_order[i]][features[feat].category_order[j-1]]
                    curr = prev + (features[feat].weight)*(features[feat].categories[features[feat].category_order[j]].entropy)
                    features[feat].distances[features[feat].category_order[i]][features[feat].category_order[j]] = curr
                    features[feat].distances[features[feat].category_order[j]][features[feat].category_order[i]] = curr
            else:
                
                for j in range(i+1, features[feat].num_cat):
                    
                    curr = (features[feat].weight)*(features[feat].categories[features[feat].category_order[j]].entropy + features[feat].categories[features[feat].category_order[i]].entropy) 
                    features[feat].distances[features[feat].category_order[i]][features[feat].category_order[j]] = curr 
                    features[feat].distances[features[feat].category_order[j]][features[feat].category_order[i]] = curr
                
    
    return features



In [27]:
def distance_between_individuals(data, features):
    
    dist_mat = []
    
    for index1 in data.index:
        
        dist_temp = []
        for index2 in data.index: 
            
            dist = 0
            for col in data.columns:     
                
                if (type(data.loc[index1,col]) == str and type(data.loc[index2,col]) == str):
                    dist += (features[col].distances[data.loc[index1,col]][data.loc[index2,col]])**2
            
            dist_temp.append(math.sqrt(dist))
        
        dist_mat.append(dist_temp)
            
    dist_mat = pd.DataFrame(dist_mat)    
    
    return dist_mat

In [28]:
feat = distance_between_categories(feat)
feat["bodyframe_length"].distances["medium"]["long"]

0.0022525860286317662

In [29]:
dist_mat = distance_between_individuals(df, feat)

In [31]:
dist_mat.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,243,244,245,246,247,248,249,250,251,252
count,253.0,253.0,253.0,253.0,253.0,253.0,253.0,253.0,253.0,253.0,...,253.0,253.0,253.0,253.0,253.0,253.0,253.0,253.0,253.0,253.0
mean,0.03171,0.034298,0.03534,0.032701,0.032719,0.031943,0.033121,0.032112,0.032043,0.032681,...,0.033241,0.034756,0.032288,0.031875,0.032815,0.03381,0.033076,0.033831,0.033275,0.03236
std,0.003783,0.003404,0.003772,0.003549,0.00324,0.003292,0.003151,0.003619,0.003525,0.003389,...,0.004338,0.00344,0.003673,0.003661,0.004176,0.004555,0.003552,0.004128,0.003359,0.004215
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.02942,0.032861,0.033315,0.030439,0.030981,0.030612,0.031596,0.030217,0.029925,0.030608,...,0.030898,0.033206,0.03039,0.029855,0.030855,0.031435,0.031147,0.031557,0.031634,0.029627
50%,0.031764,0.034531,0.035625,0.033218,0.033019,0.032428,0.033294,0.032162,0.032275,0.032811,...,0.033501,0.035105,0.032221,0.032251,0.033498,0.034277,0.033291,0.033841,0.033207,0.033184
75%,0.034258,0.036145,0.037516,0.03492,0.034702,0.033917,0.034705,0.034215,0.034278,0.034999,...,0.036107,0.036505,0.0343,0.034093,0.035681,0.036896,0.035154,0.036587,0.03522,0.035055
max,0.038275,0.040357,0.042597,0.040183,0.038417,0.037156,0.040319,0.039672,0.039861,0.038153,...,0.041114,0.041126,0.040806,0.039994,0.040488,0.04185,0.039919,0.042089,0.040978,0.039571


## Verdict: Mostly working, please review!

 - Found a minor error in preprocessing_Nominal.csv, it had 'grey' as a cat in eye_color, whereas in dataset it's 'grayish' (edited)

 - Minor edit in weights fn