In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

In [2]:
train = pd.read_csv('Train.csv')
test = pd.read_csv('Test.csv')
ss = pd.read_csv('SampleSubmission.csv')
variables = pd.read_csv('VariableDefinitions.csv')

In [3]:
print('train data shape :', train.shape)
print('test data shape :', test.shape)

train data shape : (23524, 13)
test data shape : (10086, 12)


In [4]:
train.head()

Unnamed: 0,country,year,uniqueid,bank_account,location_type,cellphone_access,household_size,age_of_respondent,gender_of_respondent,relationship_with_head,marital_status,education_level,job_type
0,Kenya,2018,uniqueid_1,Yes,Rural,Yes,3,24,Female,Spouse,Married/Living together,Secondary education,Self employed
1,Kenya,2018,uniqueid_2,No,Rural,No,5,70,Female,Head of Household,Widowed,No formal education,Government Dependent
2,Kenya,2018,uniqueid_3,Yes,Urban,Yes,5,26,Male,Other relative,Single/Never Married,Vocational/Specialised training,Self employed
3,Kenya,2018,uniqueid_4,No,Rural,Yes,5,34,Female,Head of Household,Married/Living together,Primary education,Formally employed Private
4,Kenya,2018,uniqueid_5,No,Urban,No,8,26,Male,Child,Single/Never Married,Primary education,Informally employed


In [5]:
test.head()

Unnamed: 0,country,year,uniqueid,location_type,cellphone_access,household_size,age_of_respondent,gender_of_respondent,relationship_with_head,marital_status,education_level,job_type
0,Kenya,2018,uniqueid_6056,Urban,Yes,3,30,Male,Head of Household,Married/Living together,Secondary education,Formally employed Government
1,Kenya,2018,uniqueid_6060,Urban,Yes,7,51,Male,Head of Household,Married/Living together,Vocational/Specialised training,Formally employed Private
2,Kenya,2018,uniqueid_6065,Rural,No,3,77,Female,Parent,Married/Living together,No formal education,Remittance Dependent
3,Kenya,2018,uniqueid_6072,Rural,No,6,39,Female,Head of Household,Married/Living together,Primary education,Remittance Dependent
4,Kenya,2018,uniqueid_6073,Urban,No,3,16,Male,Child,Single/Never Married,Secondary education,Remittance Dependent


In [6]:
# Convert target label to numerical Data
le = LabelEncoder()
train['bank_account'] = le.fit_transform(train['bank_account'])

#Separate training features from target
X_train = train.drop(['bank_account'], axis=1)
y_train = train['bank_account']

print(y_train)

0        1
1        0
2        1
3        0
4        0
        ..
23519    0
23520    0
23521    0
23522    0
23523    0
Name: bank_account, Length: 23524, dtype: int32


In [7]:
train.head()

Unnamed: 0,country,year,uniqueid,bank_account,location_type,cellphone_access,household_size,age_of_respondent,gender_of_respondent,relationship_with_head,marital_status,education_level,job_type
0,Kenya,2018,uniqueid_1,1,Rural,Yes,3,24,Female,Spouse,Married/Living together,Secondary education,Self employed
1,Kenya,2018,uniqueid_2,0,Rural,No,5,70,Female,Head of Household,Widowed,No formal education,Government Dependent
2,Kenya,2018,uniqueid_3,1,Urban,Yes,5,26,Male,Other relative,Single/Never Married,Vocational/Specialised training,Self employed
3,Kenya,2018,uniqueid_4,0,Rural,Yes,5,34,Female,Head of Household,Married/Living together,Primary education,Formally employed Private
4,Kenya,2018,uniqueid_5,0,Urban,No,8,26,Male,Child,Single/Never Married,Primary education,Informally employed


In [8]:
 # Convert the following numerical labels from interger to float
float_array = train[["household_size", "age_of_respondent", "year"]].values.astype(float)
    
# categorical features to be onverted to One Hot Encoding
categ = ["relationship_with_head",
             "marital_status",
             "education_level",
             "job_type",
             "country"]
    
# One Hot Encoding conversion
train = pd.get_dummies(train, prefix_sep="_", columns=categ)
    
# Label Encoder conversion
train["location_type"] = le.fit_transform(train["location_type"])
train["cellphone_access"] = le.fit_transform(train["cellphone_access"])
train["gender_of_respondent"] = le.fit_transform(train["gender_of_respondent"])
    
# drop uniquid column
train = train.drop(["uniqueid"], axis=1)
    
# scale our data into range of 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
train = scaler.fit_transform(train)

In [16]:
train

array([[1., 1., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 1.],
       [1., 0., 1., ..., 0., 0., 1.],
       [1., 0., 0., ..., 0., 0., 1.]])

In [12]:
import sklearn.model_selection

In [13]:
# Split train_data
from sklearn.model_selection import train_test_split

X_Train, X_Val, y_Train, y_val = train_test_split(X_train, y_train, stratify = y_train, 
                                                  test_size = 0.3, random_state=42)

In [17]:
# calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)-1):
		distance += (row1[i] - row2[i])**2
	return sqrt(distance)


In [18]:
# Example of calculating Euclidean distance
from math import sqrt

# calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)-1):
		distance += (row1[i] - row2[i])**2
	return sqrt(distance)

In [19]:
# Test distance function
dataset = train
row0 = dataset[5]
for row in dataset:
	distance = euclidean_distance(row0, row)
	print(distance)

2.4577564756143024
2.8782592685916186
3.4655446902326914
2.2403281667614188
2.4499999999999997
0.0
1.7335230142159423
2.6695095390611217
3.025080347876914
3.2240205318860293
2.652881033476205
3.04992565241528
2.8648121275359575
1.4163485036883496
2.2410934831014973
2.005095549659743
2.855258307053847
2.8288940813250694
3.1872549481188965
1.4221960917384104
2.454078238361605
3.4778013227923656
2.455925547483999
1.7445753305318252
3.1637114746424935
3.002043899361924
2.236626924634504
2.468510296494675
2.454078238361605
2.853966488337466
2.6700541834209335
3.0050637536107794
2.28170407138782
2.2452784927941747
1.7489371570008803
2.253573629662095
2.008476933359389
2.0152325254221344
2.4604190967230104
2.7045103840128917
2.8706683823547516
3.1669298136277733
3.0746162859977257
2.879107043792313
2.844915303466911
2.67799041317575
2.0102923942063953
2.833627435716355
2.8325018788032668
3.322734851507721
2.835489375751565
3.186075500911061
2.650315516235927
3.173953306499126
3.33050560331252

1.0376282261007104
3.006818592779854
1.0052695625671935
2.655235917536854
3.011381471575287
2.8370883480752904
2.8589914654553423
3.0601268629312672
2.4577564756143024
2.450462726444976
3.021597428581141
1.001133144839459
2.450462726444976
2.243653179935958
1.7386512483144245
2.657963049723631
2.4541613971508283
2.2665991386499726
1.8240037817719479
2.8666271156898246
2.8729483461326772
3.32267087195768
3.180928321836588
2.851948651555829
2.880043146565186
2.650106958864875
3.028470348717521
2.8427025961022765
2.839479128881917
2.8782888119959003
2.2452784927941747
1.73558853813228
3.014986189579649
2.7613402542968153
2.647187571898963
2.832426825772558
2.8304557215820805
2.4571889991976152
2.851483482309975
3.1633754826837177
1.4300706418972198
2.26256256099221
2.587903571997841
1.7355885381322802
2.8508860062107293
2.8542405894394447
2.006888646840259
2.8506632875768267
2.846309820071725
2.4577564756143024
2.8373011433011084
2.6542611101427696
1.7437221376767675
3.477577185901159
3.1

3.16656730957627
3.1658976867967965
2.0101161392874882
2.001899608076371
2.83247385923777
2.2452784927941747
2.9061520873224316
2.000141718334987
1.0075227247145975
2.452341654793788
3.16656730957627
3.0407521223190415
2.2551980771867775
3.0033994345183768
2.4654805017092287
3.017305528623382
2.454418940523583
2.833627435716355
2.672295292724828
2.693293931145742
2.829770669297483
2.0066019154694166
2.2371337853816757
2.5571971706566767
3.1845191366231846
2.288255036290997
1.4191474876970098
3.1770336925975817
2.839853442291722
2.8536496502778004
1.422734105727738
2.450462726444976
2.24728230931892
2.0115370190822546
3.1882552682194065
2.856343142048013
2.8325018788032668
3.0308700845446603
2.517315319389599
2.256345298111171
2.2363531720647525
2.451850381810558
2.90590921916406
2.24449944927099
2.6489515745752024
2.504405415514982
3.0984196103227615
2.2457644689251413
2.2568879201015335
1.7390147600296286
1.7335230142159423
1.7334265411883323
3.167265428533671
1.7369353598923722
3.179

1.0328065367742538
2.454338100222559
1.4195428985473904
2.4605227768913065
2.839479128881917
3.017805249859092
2.849857775820335
1.0085378158884761
3.330356664113332
3.1623672926186224
3.0019277630856775
3.0754873514582353
2.0057584334670104
0.13095238095238093
3.166638918603813
2.8470715351972826
3.006871382384701
3.0141286726562946
2.0282877624157605
3.167265428533671
2.832802071041347
3.1765652666715405
3.168953703305895
2.840967107220425
1.0023809523809524
2.475719964455517
1.0061150896871316
2.8304557215820805
2.0078770616710027
2.001899608076371
2.271253861704673
2.037167401174524
3.4747632425586548
2.450115689804062
2.933086917602878
2.2386536790635083
2.020923712825703
3.1704068495757345
3.3301651610693423
2.8683006338382833
1.0069655365685237
2.648428276469066
3.0067535471707747
2.011243903860575
2.036571804909733
2.82949519948351
2.2520147190426134
2.854829420544801
3.037360897965501
2.6532998322843198
2.2431162018015427
2.846309820071725
2.8358892033533585
2.46325030007434
3

2.6639612126312255
3.3260336739125176
3.0061058423816194
2.2372085376236894
2.8446911209648307
1.7340543372237343
2.6489515745752024
3.219375159442211
3.0061058423816194
3.1778579545250794
2.9398977645852162
2.6865500219441594
2.6686865224492067
2.464480093553055
2.8318983961429653
1.4365846686513406
3.1661913356106806
3.0264152627165406
2.238809409864124
2.65924242873503
2.4525704966048023
2.012530193010226
3.4771492480801043
1.43145740236466
2.268522899404967
2.236626924634504
1.7349351572897471
3.206870635740771
3.010987061397014
2.005617112013158
2.653727109877002
2.002867841829993
1.426983244168733
1.0209150668376605
1.4217874636656262
2.658529250878722
2.009975124224178
2.881937061160392
1.4249239149039874
2.857679513092132
2.024494281914787
3.162837821308615
3.326374538998299
3.003176700559689
2.843045578823052
3.1660346666143884
3.1766768029190633
2.647187571898963
2.2390942552514903
3.326374538998299
0.16613861928018583
2.2414665591083236
2.8308973121320977
2.485178285578544
2

1.7426050429173292
2.4725717605584494
3.3183686194852706
2.8506632875768267
3.477251142814404
3.3305056033125275
2.4523809523809526
2.4501156898040626
2.661938836139354
2.692444497035085
2.68399556623078
2.282022067499192
2.0133708380522637
3.0984196103227615
0.25000000000000006
2.8637582144991067
2.8467758347051535
2.8363889085977485
2.320715397167116
3.1694008961973124
2.8848546967421416
1.7581701088117936
3.044134913175583
2.84579990468509
2.8330992300245206
3.0035777985323993
2.0391922392609607
2.456499085100998
2.4741933338082585
1.0242413264385404
3.176832947086791
1.4175487449976611
2.832802071041347
2.0490720204826176
2.655008530077555
3.004521347249358
2.8921231677149506
2.238809409864124
2.0000354305251715
3.0293294199696863
1.739185893944677
1.428335297945132
2.4681233058867265
3.1872709557119108
3.163479420008344
2.828827950529361
1.0037938238584287
2.8370883480752904
3.0225494187295157
1.8311381415458627
2.24121995650227
1.4209399335596775
2.002890484970304
2.8963823318486

3.189299712015355
2.839454172900137
3.4771492480801043
1.7390147600296286
3.175552338265144
2.0382190104828966
2.713738457779887
3.034664617779637
3.015174208931232
3.033333333333333
2.8388711387457564
3.164955678116906
2.647919861294283
2.505605959541483
3.007036343926715
2.650962474426284
3.330697086782319
3.0457965160584117
2.844392183476821
2.8444848574580934
2.00281689382831
2.4499525656012495
2.4577564756143024
2.6784010527825153
2.6461798075142484
3.2015621187164243
2.834427559814633
2.454078238361605
2.335222752998619
3.1685959035509716
2.834427559814633
2.0157050611198146
1.7702008674472824
2.6521650762795166
2.2773520148511266
2.8370883480752904
2.4751325578922683
3.3243765719058866
2.2501259727899763
2.001899608076371
3.1623672926186224
2.6583245366966817
2.451850381810558
3.005042059236073
2.6876892434557527
3.167265428533671
3.0007944904135213
3.0056579299442467
2.451416823888676
3.3301864397291645
2.852639307800438
3.0008502196571434
2.4632859715789754
3.321380781359551
1

3.3273969247423096
2.24728230931892
1.4195428985473904
2.8309313548019683
2.6493528067783987
3.233150987213936
2.83558933793736
3.185533662756256
3.1721667237640596
2.2545305860123754
2.4548000089417115
3.173953306499126
3.006040781349834
2.527653182745785
3.166392755786974
3.0123046977085064
2.6560097430359777
2.652672677810232
2.451790266357235
1.416016257257072
2.24121995650227
3.0077319107868528
3.0424528384802314
2.2501259727899763
3.0239266943927765
2.6637952227343096
2.8443413609793353
3.176832947086791
3.0306970677639966
3.002329027910954
3.164092222397561
2.8470715351972826
3.005042059236073
2.5971770825621308
3.005042059236073
2.007347614252311
2.4976111035326927
2.83798737410879
1.7437221376767675
2.470662327312437
1.4185441740376452
3.164092222397561
3.323182673868565
2.454078238361605
2.4610342017947713
3.166638918603813
2.660351791648766
2.4792517706767154
1.7616634326359337
2.650315516235927
2.4760771494213625
1.0204290814182442
1.7581701088117936
3.0598017290855943
2.45

3.0188758025316496
3.0422953874483976
3.01017642863658
3.0066828438257347
2.4513405097588095
3.3183549526590332
2.006182338097947
2.83377447546162
3.18591447160908
2.450028922963157
2.4577564756143024
3.6109252524994644
2.4513405097588095
2.4507229716759555
3.4721633555100686
3.033781831237455
2.452341654793788
2.4541937359189956
3.4991908685714583
3.1660346666143884
3.006600825862505
3.7446817920026274
2.8782592685916186
3.181460253154772
2.6629108329623787
3.318153360433482
3.007036343926715
3.4699520016023326
3.1949373572189623
3.233150987213936
2.4548000089417115
3.013169318575315
3.3921887395079366
3.168070759247796
3.0019277630856775
3.3176715395657306
3.322734851507721
2.862885102673719
2.648428276469066
3.4977162941520024
3.1660973351432053
2.8313728711706108
2.4629338366875038
3.322734851507721
2.6489002124657146
3.0020438993619236
3.0057258283319923
3.1775288106331354
2.843408457349875
3.165382840364985
2.010539125134387
3.479099402584126
2.841050913796615
2.258002900242971
3

2.006182338097947
2.4614591565041906
2.4626748818502504
3.743069180956189
3.6118003877313494
3.32267087195768
3.021597428581141
3.0478283038279095
3.202430518292285
3.3243765719058866
3.1721667237640596
2.650962474426284
3.321380781359551
2.4571889991976152
3.3265662602600035
3.3218048941956297
2.006182338097947
2.6535401843959483
2.454540195982976
3.1660973351432053
3.358282481342181
3.1883877315236635
2.6903645151707223
1.7389511917145728
3.1684035694870567
2.855015080715181
3.1773896501156624
3.0072484240415234
3.487953028142812
2.4554927091002416
2.83558933793736
2.8322216706742314
3.0064565819479787
3.006871382384701
3.75681542420163
2.248708372022399
3.6369146987628262
3.1682452205878535
3.6102603218932474
2.6500267401209388
3.47194293661374
3.039446818329159
2.266616646106739
3.394548450629843
2.650106958864875
3.3236943969691986
3.0038156007427133
3.466280721582764
3.001689811315621
2.2746059437947674
2.457900630661069
3.0025159518004774
3.1637706055345487
2.836002144439476
2.8

3.618159352143804
3.4690615092704884
2.4580251738137346
3.04992565241528
2.6686493479668
3.0410914095849284
1.7441008443040331
3.188468629224143
2.834499559905071
2.65024065137566
3.885478663491082
3.3303396420664475
2.653727109877002
2.8304717442123635
2.8318983961429653
3.4651987014912042
3.003771250171359
3.1640599725991416
2.8337974809900066
2.0006249023742555
3.1669298136277733
2.4627220712049502
2.829770669297483
2.6558731391990986
2.4534972031932494
3.465464535158951
3.0642921892853483
3.0017606322663433
2.8763993932324063
2.454107113260546
3.3456413923985657
3.1623000685191167
3.8744387348626486
3.612516820414512
3.01017642863658
3.3393228501457703
3.3301864397291645
3.006871382384701
3.32180489419563
2.647919861294283
3.1704068495757345
3.0022563321442473
2.0147429966136197
3.46989073651549
3.615795844877962
3.7425852621476032
2.455025157931604
3.356599077878458
3.636783763866738
2.4576699785276315
3.047950130825634
2.4847197426563996
3.0028227460914914
2.8350974899953223
3.00

3.020896609705571
3.0035777985323993
3.0159252292890373
3.470197051133573
2.45289060816028
3.0404500875189315
3.167265428533671
2.0084698770869163
3.165922755441774
3.6102603218932474
3.016384772787727
3.167081963458683
3.3405372834925267
3.1911770729796727
3.3210607408544104
3.003176700559689
3.0095096820042126
3.0072484240415234
2.8337974809900066
3.744886157895979
2.0057188644636383
3.3202669072833446
3.3243765719058866
3.018044748559635
2.662944894323591
2.451559039337215
3.163031389932156
3.0033994345183768
2.4615420659423286
3.0066828438257347
3.007036343926715
3.3655593585492394
3.1691415320062024
3.485344275183341
2.830594915154082
2.650315516235927
3.5104226089495314
3.1722337387752555
2.0024984394500787
3.1797919896599636
3.510633345318302
3.465464535158951
3.6068607464725395
3.1676457491071717
2.4576699785276315
2.4525704966048023
3.6181867710503948
3.318760662596944
3.003989222211946
2.7012731104487675
3.4829720217521687
3.465103814381965
2.483470297095032
2.454540195982976

3.4658718345744037
3.4738454255790265
2.8344995599050713
2.653727109877002
2.4518318849049856
3.0827366585750475
2.653326539149678
2.8523332534369352
2.242857142857143
2.458361870006169
3.60591759797087
3.007036343926715
3.318153360433482
3.611496665271243
2.649807463163453
3.166392755786974
2.83029448881632
2.65
3.4690615092704884
2.646223724479848
2.006182338097947
2.855258307053847
3.0235226710583185
3.0152334325987473
2.83377447546162
3.606074806518035
3.478330228592208
3.0452706725983743
1.749053842053934
2.8589914654553423
3.209680121953085
2.828452178021755
3.1860808387596053
2.8533645652814204
3.32208988125522
3.3921887395079366
3.330326875474188
3.011094376203381
2.650668422096628
3.48209706929603
3.3201004342375002
2.843408457349875
3.3183242020941957
3.001007011637965
3.319137287850701
3.318760662596944
3.320076529730197
2.8309313548019683
2.6558731391990986
2.831095554872058
2.451416823888676
3.2297248255818314
2.839678768840603
3.053369760350691
2.4577564756143024
3.009015

3.3318882922206043
2.6544832226138335
2.656297868716334
3.0135747299612836
3.4816135128789387
3.221305386023293
3.743069180956189
3.0028227460914914
2.452252654988924
3.7434667200203915
3.0322164722680793
3.007036343926715
2.8470695440508877
3.489440663648648
3.3359522025211437
3.002361126780206
2.8355143666283915
3.0020438993619236
2.8467758347051535
2.8318983961429653
2.457900630661069
3.3591694322222594
3.319178278496719
3.163031389932156
2.4548000089417115
2.008476933359389
3.0077036388478042
3.0085149378919946
3.0000236204663473
3.4978743140017605
3.0041250928841405
3.1660973351432053
3.60591759797087
3.0078138979073232
2.6517108241255456
3.015008752520924
2.65024065137566
2.875196927101605
3.7439209986916717
3.1686182672193266
3.010987061397014
3.1703245969279896
3.007142857142857
2.454107113260546
3.475094412981944
2.8340025220145058
2.8296544743916074
3.003176700559689
3.4719119135716716
2.723381792922666
3.4680179535401914
2.4576699785276315
2.748596838479384
3.175158681556124

2.646893100202618
3.6058657176462
3.186075500911061
2.4576699785276315
2.8752649488485575
2.6517706829780816
3.1660624199730383
3.32078334746733
2.6686493479668
2.4581024335239055
3.167081963458683
1.7437221376767675
2.6500267401209388
3.006871382384701
3.011551833265442
3.2506549139541883
3.323594617148476
3.3275366263949673
3.3274446284051127
2.8349025266490044
3.465464535158951
2.658496199135204
3.321739190117962
3.6099266328811423
3.003771250171359
3.0113814715752865
3.5267691882273042
3.007234285832519
1.7355885381322802
2.850438562747845
3.006600825862505
3.0027774369075217
3.1645177097941652
3.4674294359689624
3.6354967658758657
2.4541937359189956
3.465758155416609
3.469002679710227
2.451416823888676
3.020117731332855
3.0032729689585493
2.4564102359267137
2.6896132202189253
3.021597428581141
3.3875898963898283
2.6937927304038105
3.0078138979073232
2.2365749649175957
3.1703245969279896
2.2408658129920975
2.5303710751148296
3.376799931559135
2.8444818680219544
1.7386968952132043
3

2.653326539149678
3.003176700559689
3.341788596007857
3.1963786907520637
3.464612977505601
2.8355143666283915
3.7645228984641896
3.612683156856789
3.4661188080959855
3.2204227127784937
2.8300961899632395
3.3488279331845487
3.3275366263949673
3.1660973351432053
3.0020438993619236
3.4858395111516063
3.4668530823766646
3.0066828438257347
2.4541937359189956
2.650106958864875
2.836532807380683
2.4847197426563996
3.3183686194852706
3.006871382384701
3.1691549479159624
2.648603790833262
3.0066828438257347
2.650106958864875
2.8589914654553423
3.319861381418919
3.164216739367919
3.0239266943927765
2.831095554872058
3.00191265559755
2.454107113260546
3.3179919069799704
2.8333333333333335
3.6647343393181693
3.6507858005436646
2.4626748818502504
3.1886597530009633
3.318048288443742
2.4588449254369293
1.7342439392473963
3.1848573479091042
2.6475355416772866
3.1704068495757345
2.0033829099551155
3.4646465202389813
3.001266739364618
3.0436180954763543
2.653326539149678
3.74246484070328
2.839678768840

3.470197051133573
3.325719194890829
3.1704068495757345
3.0033994345183768
3.6350531098061154
3.4693907729745446
3.162672920173694
3.8845849199110063
3.3183686194852706
2.002266289678918
3.607142857142857
3.5256599067093495
3.331286786713026
3.004337378507984
3.326923258409628
3.466055839697368
2.002533825770743
2.4952619500858995
3.1656720600587693
2.6489900955037746
3.0387529143345
2.0057584334670104
3.1676457491071717
2.0435743551739725
3.165291502339805
2.25055548698538
3.615795844877962
1.7327052760628812
2.4680107571697634
3.004337378507984
3.1669298136277733
3.6120554320835727
3.3281635079426173
3.0038156007427133
3.3354160160315836
3.1644872557551462
3.017805249859092
2.832626962768674
3.0007944904135213
3.4732946189754883
3.3768427404047165
3.885478663491082
3.0056579299442467
3.3670278313719892
2.8987545218210147
2.036571804909733
3.337093967080447
3.5031480821199623
3.0107752446332063
2.4541937359189956
2.647881324796125
3.1676457491071717
2.4627220712049502
3.622956848560387

3.504856526931797
2.65
2.8358892033533585
2.832426825772558
2.8292697951408146
2.8400780066576035
2.683488608343831
2.7068412949263143
3.4741105976126385
3.344597464500995
2.653406658132906
2.4518318849049856
2.24121995650227
3.1908759522118104
2.002890484970304
3.6074288744062892
3.0046260628866577
3.187348324611483
3.002361126780206
3.017805249859092
3.4665464722753168
3.607016342272937
2.4507229716759555
3.4661188080959855
3.1736237576470905
3.504031529745594
3.1708654583728464
3.6068607464725395
3.3176715395657306
3.374890608792408
3.0061058423816194
3.003989222211946
3.6071145684887918
3.403430475603633
3.1939178309892555
3.7536837537752175
3.325535948144844
3.004337378507984
2.2403281667614188
2.007877061671003
3.6069574052594535
3.4699520016023326
3.31873162395466
2.682102434544168
3.4699520016023326
3.61651618859969
3.165833223655346
3.1766455731645085
3.2014461372346266
2.4507229716759555
3.0069731882911825
3.3755456433031203
3.0046260628866577
2.649807463163453
2.881937061160

3.1686182672193266
2.6514916864409184
3.4763486591537394
2.6629236060238894
3.4658718345744037
3.3194985004944013
2.451992572112759
2.6544832226138335
3.4738707197847445
3.01017642863658
3.3322864002155073
2.8414190348344035
2.8350974899953223
3.4655446902326914
2.4576411454889016
3.190388236172154
2.6476404589747453
3.0460198557217657
2.8508860062107293
3.1694008961973124
3.1722953913352114
3.3173434512718374
3.5040064532602955
3.179401532412726
2.8562746696811443
3.7417331403273586
3.321739190117962
2.452341654793788
3.023027645444996
2.647187571898963
3.1637114746424935
3.180190420574075
3.6062123583749397
3.2015621187164243
3.4693907729745446
2.6544148822945783
2.4794404039124007
2.002639980981114
3.3183686194852706
2.8400780066576035
2.841948686306793
3.0242678692742673
3.4778013227923656
3.0041250928841405
3.189299712015355
3.003842022048436
3.3475623174763687
3.6086701151532266
3.1700268602623196
3.004337378507984
3.467200541047018
3.183677902781959
2.4598695188742474
3.31702302

3.086755776618843
3.6968646702337744
2.8832389587795246
3.3835921954521524
2.8939592256975564
2.528662225972703
2.894180571273728
2.894180571273728
3.373587719966444
3.2115067351951923
2.8851307755172444
3.363427080130623
3.2156090750207817
3.2085981426187242
2.711857727758023
3.3637430897284863
2.8851307755172444
2.6944650161685173
3.2115067351951923
3.3669646934836623
3.212720077253009
2.88336184170563
3.0516621247045044
3.0502741410569816
3.6860380379526143
2.5315201173027226
2.5181664225224023
3.246422241424321
2.8861130480417945
2.5316376799225493
2.7096618928744864
3.3624274516109547
3.3655121951347775
2.8827778553446164
3.5379582425570115
2.2959702941973648
3.2394963705325983
3.0572355719511752
3.364892271678248
3.0516621247045044
2.883755031881215
3.551392208883045
3.052776514002367
2.70439928823465
3.3655121951347775
3.053124677242084
3.426091199461783
3.2100112672077814
3.3835921954521524
2.5134840442732727
2.895446616864333
2.3063920838496745
2.705106659785585
2.883755031881

2.7002361952622
2.7045826986239754
2.8950119350891095
2.3004928979886343
3.372185145133823
3.2446974010668233
3.360080612627621
2.6995884460002006
2.892711146619686
3.555141433507941
3.215675184688907
2.3063920838496745
3.5395001376592425
2.331594347125264
3.257665824070233
3.051731785957612
2.3060307408570635
3.0620713870088534
3.2113302108870725
2.7054471801246547
2.711361206449029
2.700656046944932
2.714365078204805
2.716816906659039
2.8851307755172444
3.399347176222193
3.3970575502926055
2.6993836955129047
3.230531256549994
3.2332386549010708
2.3022332153319693
3.355613464533126
3.0820865285735426
2.7012857021138
3.0517958729060863
3.367532891862792
3.052010415070064
3.0561868034820416
3.0565123218623125
3.3634902844253967
2.307958477876261
3.0534672311192854
3.051847884512608
2.7100530908271963
3.367664195183254
3.3577034317522605
2.3117931903831797
3.4288401019068306
2.9051765902278923
2.7092434368288134
3.0415909505066723
3.3621678030105677
3.0479733796993713
3.096268143626059
3

2.311334587137082
3.516985831987901
3.2163802699333184
2.709294701168143
3.0563314825979324
3.7874928436653614
2.705106659785585
2.705106659785585
3.363095027391481
2.7041896611288214
2.7026232322691426
2.8886653934130275
3.366438498363709
3.056164544548609
3.3843510751023578
2.7009446571183804
3.0535192142570082
2.7045826986239754
3.0672960386408956
3.235359488589817
3.074708473912037
3.5641640402936634
2.3063920838496745
2.7041896611288214
2.7058400349585674
3.2250700918748527
2.8887890267058838
3.509098959876179
2.8972629589081524
3.652434500135188
3.367512690897543
3.517811819867572
3.0441209462833285
2.704268273197768
3.3637430897284863
3.0628571798744373
3.5004381002131773
2.7058400349585674
3.212720077253009
3.386377270832085
3.056975963401794
3.3610927455880244
2.883755031881215
3.211661186005794
2.9145045436049464
3.5234885632464876
3.212720077253009
2.3048861143232218
2.512581718755439
3.6524740784054903
3.645856219558877
2.7108635486018695
3.09871690950638
2.8847132084116405

3.3582141147961853
2.3013650686360743
3.2298125862662106
3.366438498363709
3.2156090750207817
2.3049168582306843
2.8847132084116405
3.212720077253009
2.7067826539271502
3.6649269218367273
3.211457309366983
3.049832715385671
3.648940609779247
3.0572355719511752
3.2366952505796274
2.7276456918757175
3.0535192142570082
2.3063920838496745
3.4076952066338317
3.2118597547060217
3.2113081446662823
2.512722728476837
2.8762939511503403
2.3127432151318317
3.363095027391481
3.3621678030105677
2.8812288336466327
2.8822124359729453
3.3681430722435235
3.2156302302624566
3.3556768159976365
3.3681430722435235
2.7000262449383725
3.3621678030105677
2.883755031881215
3.052219370212473
3.2118597547060217
2.5131739064179834
3.6491153837471657
3.361408974639144
3.363095027391481
3.2180436586010037
3.219903380192918
2.885075758367408
3.7975824748201616
3.3655121951347775
3.363595622277458
3.08282860366036
3.7847033690682728
3.215035185991832
3.2163802699333184
3.052010415070064
2.51292012879362
3.21558703816

2.7041896611288214
3.216627013255287
3.366438498363709
3.2088852339160474
3.2113302108870725
3.658889401898997
2.525129933119942
3.3655121951347775
3.5264436743923944
3.2130950175549953
3.6434554441760567
2.7062852014960903
3.79473991472514
3.3674916480965473
3.3634060117684275
2.711857727758023
3.363427080130623
2.305009087492723
2.887930352926817
2.886339905466198
2.7000262449383725
3.367664195183254
3.5096441473198285
3.360080612627621
2.8882983872335877
2.70439928823465
3.3655121951347775
3.5303839891874524
3.207456590162644
2.8824956511915714
2.6967573120323602
3.373587719966444
3.643163697029823
3.0673431670271896
3.0565123218623125
3.373587719966444
2.890873315466642
3.212720077253009
2.8855973966582567
3.643144246389374
2.7041896611288214
3.5733336188953078
3.527904634639535
3.366438498363709
2.7019026218162967
3.7874928436653614
2.926902350543064
3.2113964086397115
2.305162794729275
2.305009087492723
3.051731785957612
3.6488046686831677
3.2113964086397115
3.507003036038689
3.6

3.287223600892944
2.301970958718763
3.222960076595551
3.0562341031773133
3.428303560964584
3.0502741410569816
2.8851307755172444
2.8893904370136045
3.5270456504570737
3.5516723409168196
3.648940609779247
2.8879539085272428
2.5082415175186146
3.2135140167114424
2.9057385165445653
3.3637430897284863
3.652434500135188
2.896113197948036
3.364164389683763
3.3750795741018127
3.5071751849284314
3.4079871503914974
3.2750214207120005
3.3742858486888943
3.0645003069863237
2.8808107009632864
3.642128778724927
3.2113302108870725
3.083264386064941
3.2135140167114424
3.3669646934836623
3.0524747404059425
2.3063920838496745
3.367664195183254
3.2113302108870725
2.3481079854025264
2.883165226509922
3.3637430897284863
3.360818656341982
3.0539601070742406
2.0766559657295187
3.211661186005794
3.5411862419251547
3.403950119674723
2.8835338690045145
2.3323916953222423
3.0674401937778066
3.5113599254529713
3.6491153837471657
3.5096441473198285
2.708320512790168
2.726112496285439
3.0516389039334255
3.51291430

2.6937043423323628
3.0797929665081853
3.2192642220729994
3.5077303681441805
2.7073323634303006
2.884025319031774
2.5189485982878503
3.0538960655428866
3.386377270832085
3.8192769495329295
2.3063920838496745
3.360080612627621
3.409810609113456
2.7052575417780353
3.0609131549920896
2.7041896611288214
3.2169521567997124
3.2118597547060217
3.6556342994778994
3.3004543423918395
3.357475498548874
3.503536438185872
2.51292012879362
3.212102433113914
3.2413761389842457
3.0494377014106355
2.7045826986239754
3.211661186005794
2.883165226509922
3.2340669972039278
3.366438498363709
3.2235325547478615
3.3917299714963285
3.3577920682639277
3.2332386549010708
2.7138481267986974
2.5080154950390576
2.7187661433826245
2.7000262449383725
3.660148814242889
2.8855973966582567
2.8887890267058838
3.0600101521921905
3.513337883537566
3.3647541209997174
3.0565123218623125
3.049832715385671
3.052010415070064
3.2123892112327304
3.2219705312600846
2.7051391420364643
3.0549808777249825
3.2113302108870725
3.0580188

2.8801218795072416
3.3750795741018127
2.509004193064716
3.056164544548609
2.8855973966582567
3.3681430722435235
2.7058400349585674
3.1225725265673927
3.5403416889300976
2.70439928823465
3.219467604329759
3.5823650012620956
2.932924561808703
3.051847884512608
3.051847884512608
3.051731785957612
3.6493289850075628
3.056975963401794
3.6487075647993548
2.8872913339608686
3.3634060117684275
2.8792606209504563
3.652396473549935
3.6488046686831677
3.0638981143844846
3.5096441473198285
2.714365078204805
3.3695952777152494
2.704163456597992
2.883140648667699
3.215587038162705
2.704163456597992
3.0535192142570082
3.0516621247045044
3.206488779769014
3.268069426707116
2.711857727758023
3.0516621247045044
2.7041896611288214
3.7968166042792495
3.2366952505796274
3.3641643896837627
2.522687306656442
2.512722728476837
3.3613120008929456
2.7093480568993042
3.1150064696758575
2.8872913339608686
3.645497020445511
2.7
3.511294539397632
2.5234456161720185
2.890069203323685
3.0555607091277914
3.65241587491

3.209404583061752
3.452607545930054
3.2219705312600846
3.071664811881871
3.5823650012620956
3.2169521567997124
3.212720077253009
2.88336184170563
3.363427080130623
2.7109168734552855
3.0576249434462333
3.238074229623726
2.0838108296330895
3.0565123218623125
3.2113964086397115
2.5506001694679226
3.2715845252908338
2.3116104957151036
3.645622977410587
3.078365182941505
3.5360758192587562
2.7045826986239754
3.3802656914989266
3.5073465172173584
2.8832389587795246
3.512914302032436
3.0539601070742406
3.215587038162705
3.523854568566666
3.2113302108870725
3.086755776618843
2.911414123677098
3.205427830055786
3.2130950175549953
3.5144671846293765
3.3626103738133413
3.051296144799011
3.5064372289856163
3.5410813842122453
3.226327531792708
2.8939592256975564
3.2115067351951923
2.771331408867907
2.5280747875644964
3.211661186005794
3.5465810242332325
2.923123077476742
2.7051391420364643
3.051847884512608
2.7041896611288214
2.3109666577915045
2.5234456161720185
2.704818493707152
3.21990338019291

3.0004166377354995
2.2422314868934308
1.732935917287202
2.0007665764461473
3.323417223403695
1.7389006613961828
1.736404918449705
2.653406658132906
1.7515072873892867
3.621459879016858
2.451992572112759
2.6716046966811535
2.001191538483047
2.003491849705592
2.450462726444976
2.0084698770869163
1.7355885381322802
2.0128287537286265
2.6588426889239694
2.697568612145678
3.01514412662287
2.2861172759826838
3.002043899361924
2.8374569832015557
3.464789686791414
2.6630119513553114
2.001191538483047
2.653967423292951
2.6500267401209388
2.466779276708802
3.0387529143345
2.246540550367263
2.0164431764620163
1.7437221376767675
3.045991008602914
3.010422183574259
3.164955678116906
2.0057584334670104
2.6493528067783987
2.8625751928453997
3.0389347996548537
2.7028801720553197
3.323182673868565
2.011243903860575
2.6480686497325077
3.0007944904135213
3.189299712015355
2.452341654793788
2.853792678710425
2.0075551742798448
3.0105577639179457
3.3729524737272354
3.3202071486887808
3.0168658557908627
2.8

3.32078334746733
3.0239266943927765
2.6583245366966817
1.743925316693373
1.7694897864379975
2.0023597190426212
2.8304717442123635
2.4742047899079873
1.7385746240400521
2.652672677810232
2.8414748972737267
2.8355143666283915
3.010492799098519
2.6866449755205464
3.0183452690613954
2.646715330521272
2.4794404039124007
2.6769042430633583
2.650668422096628
2.2413780382175785
3.1896445255774952
2.833627435716355
3.3217434566559674
2.645778094125811
1.7350985255667866
2.6507454134951423
3.0052571095972103
2.4501156898040626
2.65728472910211
2.7028801720553197
2.016829700499357
3.003959971471671
2.464480093553055
2.83798737410879
2.847559324106943
2.659152892240006
2.0340775932419093
2.256345298111171
2.653967423292951
1.7461587255414612
3.0056579299442467
3.1637706055345487
2.013497538017096
2.663663274668239
3.6102603218932474
3.164216739367919
2.664315502620827
2.650106958864875
1.7666666666666668
3.3169666223367615
3.0019277630856775
2.6489002124657146
3.1632330113193095
2.8369734519989676

In [20]:
# Locate the most similar neighbors
def get_neighbors(train, test_row, num_neighbors):
	distances = list()
	for train_row in train:
		dist = euclidean_distance(test_row, train_row)
		distances.append((train_row, dist))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(num_neighbors):
		neighbors.append(distances[i][0])
	return neighbors

In [21]:
# Example of getting neighbors for an instance
from math import sqrt

# calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)-1):
		distance += (row1[i] - row2[i])**2
	return sqrt(distance)

# Locate the most similar neighbors
def get_neighbors(train, test_row, num_neighbors):
	distances = list()
    
	for train_row in train:
		dist = euclidean_distance(test_row, train_row)
		distances.append((train_row, dist))
    
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(num_neighbors):
		neighbors.append(distances[i][0])
	return neighbors

In [22]:
# Test distance function
dataset = train
neighbors = get_neighbors(dataset, dataset[0], 3)
# print("These are my distance: " distances)
for neighbor in neighbors:
	print("These are my neighbors: {}".format(neighbors))

These are my neighbors: [array([1.       , 1.       , 0.       , 1.       , 0.1      , 0.0952381,
       0.       , 0.       , 0.       , 0.       , 0.       , 0.       ,
       1.       , 0.       , 0.       , 1.       , 0.       , 0.       ,
       0.       , 0.       , 0.       , 1.       , 0.       , 0.       ,
       0.       , 0.       , 0.       , 0.       , 0.       , 0.       ,
       0.       , 0.       , 0.       , 1.       , 1.       , 0.       ,
       0.       , 0.       ]), array([1.        , 1.        , 0.        , 1.        , 0.15      ,
       0.10714286, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 0.        ,
       1.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 1.        , 1.        ,
       0.        , 0.        , 0.    

In [23]:
# Make a classification prediction with neighbors
def predict_classification(train, test_row, num_neighbors):
	neighbors = get_neighbors(train, test_row, num_neighbors)
	output_values = [row[-1] for row in neighbors]
	prediction = max(set(output_values), key=output_values.count)
	return prediction

In [24]:
# Test distance function
dataset = train
prediction = predict_classification(dataset, dataset[0], 3)
print('Expected %d, Got %d.' % (dataset[0][-1], prediction))

Expected 0, Got 0.
