In [1]:
import pandas as pd
import numpy as np
import sklearn as skl
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
skl.__version__

'0.20.0'

### Getting the data and the features names

In [4]:
arrhythmia_raw = pd.read_csv('D:/Datasets/arrhythmia/arrhythmia_data.csv')

In [5]:
features_txt = open('D:/Datasets/arrhythmia/features_raw.txt', 'r').read()

In [6]:
features = features_txt.split(',')

In [7]:
arrhythmia_raw.describe()

Unnamed: 0,age,sex,height,weight,qrs_duration,pr_interval,qt_interval,t_interval,p_interval,vector_qrs,di_q_wave,di_r_wave,di_s_wave,di_r_sp_wave,di_s_sp_wave,di_intrisic_deflections,di_existence_ragged_r_wave,di_existence_diphasic_r_wave,di_existence_ragged_p_wave,di_existence_diphasic_p_wave,di_existence_ragged_t_wave,di_existence_diphasic_t_wave,dii_q_wave,dii_r_wave,qii_s_wave,dii_r_sp_wave,dii_s_sp_wave,dii_intrisic_deflections,dii_existence_ragged_r_wave,dii_existence_diphasic_r_wave,dii_existence_ragged_p_wave,dii_existence_diphasic_p_wave,dii_existence_ragged_t_wave,dii_existence_diphasic_t_wave,diii_q_wave,diii_r_wave,qii_s_wave.1,diii_r_sp_wave,diii_s_sp_wave,diii_intrisic_deflections,diii_existence_ragged_r_wave,diii_existence_diphasic_r_wave,diii_existence_ragged_p_wave,diii_existence_diphasic_p_wave,diii_existence_ragged_t_wave,diii_existence_diphasic_t_wave,avr_q_wave,avr_r_wave,avr_s_wave,avr_r_sp_wave,avr_s_sp_wave,avr_intrisic_deflections,avr_existence_ragged_r_wave,avr_existence_diphasic_r_wave,avr_existence_ragged_p_wave,avr_existence_diphasic_p_wave,avr_existence_ragged_t_wave,avr_existence_diphasic_t_wave,avl_q_wave,avl_r_wave,avl_s_wave,avl_r_sp_wave,avl_s_sp_wave,avl_intrisic_deflections,avl_existence_ragged_r_wave,avl_existence_diphasic_r_wave,avl_existence_ragged_p_wave,avl_existence_diphasic_p_wave,avl_existence_ragged_t_wave,avl_existence_diphasic_t_wave,avf_q_wave,avf_r_wave,avf_s_wave,avf_r_sp_wave,avf_s_sp_wave,avf_intrisic_deflections,avf_existence_ragged_r_wave,avf_existence_diphasic_r_wave,avf_existence_ragged_p_wave,avf_existence_diphasic_p_wave,avf_existence_ragged_t_wave,avf_existence_diphasic_t_wave,v1_q_wave,v1_r_wave,v1_s_wave,v1_r_sp_wave,v1_s_sp_wave,v1_intrisic_deflections,v1_existence_ragged_r_wave,v1_existence_diphasic_r_wave,v1_existence_ragged_p_wave,v1_existence_diphasic_p_wave,v1_existence_ragged_t_wave,v1_existence_diphasic_t_wave,v2_q_wave,v2_r_wave,v2_s_wave,v2_r_sp_wave,v2_s_sp_wave,v2_intrisic_deflections,v2_existence_ragged_r_wave,v2_existence_diphasic_r_wave,v2_existence_ragged_p_wave,v2_existence_diphasic_p_wave,v2_existence_ragged_t_wave,v2_existence_diphasic_t_wave,v3_q_wave,v3_r_wave,v3_s_wave,v3_r_sp_wave,v3_s_sp_wave,v3_intrisic_deflections,v3_existence_ragged_r_wave,v3_existence_diphasic_r_wave,v3_existence_ragged_p_wave,v3_existence_diphasic_p_wave,v3_existence_ragged_t_wave,v3_existence_diphasic_t_wave,v4_q_wave,v4_r_wave,v4_s_wave,v4_r_sp_wave,v4_s_sp_wave,v4_intrisic_deflections,v4_existence_ragged_r_wave,v4_existence_diphasic_r_wave,v4_existence_ragged_p_wave,v4_existence_diphasic_p_wave,v4_existence_ragged_t_wave,v4_existence_diphasic_t_wave,v5_q_wave,v5_r_wave,v5_s_wave,v5_r_sp_wave,v5_s_sp_wave,v5_intrisic_deflections,v5_existence_ragged_r_wave,v5_existence_diphasic_r_wave,v5_existence_ragged_p_wave,v5_existence_diphasic_p_wave,v5_existence_ragged_t_wave,v5_existence_diphasic_t_wave,v6_q_wave,v6_r_wave,v6_s_wave,v6_r_sp_wave,v6_s_sp_wave,v6_intrisic_deflections,v6_existence_ragged_r_wave,v6_existence_diphasic_r_wave,v6_existence_ragged_p_wave,v6_existence_diphasic_p_wave,v6_existence_ragged_t_wave,v6_existence_diphasic_t_wave,di_applitude_jj_wave,di_applitude_q_wave,di_applitude_r_wave,di_applitude_s_wave,di_applitude_r_sp_wave,di_applitude_s_sp_wave,di_applitude_p_wave,di_applitude_t_wave,di_qrsa,di_qrsta,dii_applitude_jj_wave,dii_applitude_q_wave,dii_applitude_r_wave,dii_applitude_s_wave,dii_applitude_r_sp_wave,dii_applitude_s_sp_wave,dii_applitude_p_wave,dii_applitude_t_wave,dii_qrsa,dii_qrsta,diii_applitude_jj_wave,diii_applitude_q_wave,diii_applitude_r_wave,diii_applitude_s_wave,diii_applitude_r_sp_wave,diii_applitude_s_sp_wave,diii_applitude_p_wave,diii_applitude_t_wave,diii_qrsa,diii_qrsta,avr_applitude_jj_wave,avr_applitude_q_wave,avr_applitude_r_wave,avr_applitude_s_wave,avr_applitude_r_sp_wave,avr_applitude_s_sp_wave,avr_applitude_p_wave,avr_applitude_t_wave,qi_qrsa,avr_qrsta,avl_applitude_jj_wave,avl_applitude_q_wave,avl_applitude_r_wave,avl_applitude_s_wave,avl_applitude_r_sp_wave,avl_applitude_s_sp_wave,avl_applitude_p_wave,avl_applitude_t_wave,avl_qrsa,avl_qrsta,avf_applitude_jj_wave,avf_applitude_q_wave,avf_applitude_r_wave,avf_applitude_s_wave,avf_applitude_r_sp_wave,avf_applitude_s_sp_wave,avf_applitude_p_wave,avf_applitude_t_wave,avf_qrsa,avf_qrsta,v1_applitude_jj_wave,v1_applitude_q_wave,v1_applitude_r_wave,v1_applitude_s_wave,v1_applitude_r_sp_wave,v1_applitude_s_sp_wave,v1_applitude_p_wave,v1_applitude_t_wave,v1_qrsa,v1_qrsta,v2_applitude_jj_wave,v2_applitude_q_wave,v2_applitude_r_wave,v2_applitude_s_wave,v2_applitude_r_sp_wave,v2_applitude_s_sp_wave,v2_applitude_p_wave,v2_applitude_t_wave,v2_qrsa,v2_qrsta,v3_applitude_jj_wave,v3_applitude_q_wave,v3_applitude_r_wave,v3_applitude_s_wave,v3_applitude_r_sp_wave,v3_applitude_s_sp_wave,v3_applitude_p_wave,v3_applitude_t_wave,v3_qrsa,v3_qrsta,v4_applitude_jj_wave,v4_applitude_q_wave,v4_applitude_r_wave,v4_applitude_s_wave,v4_applitude_r_sp_wave,v4_applitude_s_sp_wave,v4_applitude_p_wave,v4_applitude_t_wave,v4_qrsa,v4_qrsta,v5_applitude_jj_wave,v5_applitude_q_wave,v5_applitude_r_wave,v5_applitude_s_wave,v5_applitude_r_sp_wave,v5_applitude_s_sp_wave,v5_applitude_p_wave,v5_applitude_t_wave,v5_qrsa,v5_qrsta,v6_applitude_jj_wave,v6_applitude_q_wave,v6_applitude_r_wave,v6_applitude_s_wave,v6_applitude_r_sp_wave,v6_applitude_s_sp_wave,v6_applitude_p_wave,v6_applitude_t_wave,v6_qrsa,v6_qrsta,class
count,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0,452.0
mean,46.471239,0.550885,166.188053,68.170354,88.920354,155.152655,367.207965,169.949115,90.004425,33.676991,5.628319,51.628319,20.920354,0.141593,0.0,30.035398,0.002212,0.011062,0.011062,0.004425,0.004425,0.00885,5.619469,54.336283,20.59292,0.433628,0.150442,31.637168,0.017699,0.028761,0.002212,0.004425,0.004425,0.015487,16.026549,41.982301,20.327434,2.300885,0.318584,30.513274,0.002212,0.035398,0.002212,0.017699,0.011062,0.004425,45.362832,19.327434,7.79646,2.823009,0.070796,31.230088,0.011062,0.004425,0.004425,0.004425,0.004425,0.00885,10.274336,43.575221,19.840708,0.814159,0.0,27.300885,0.0,0.017699,0.002212,0.002212,0.006637,0.002212,7.477876,50.40708,19.79646,0.769912,0.221239,29.876106,0.004425,0.024336,0.0,0.002212,0.002212,0.002212,12.60177,23.840708,42.123894,3.99115,0.115044,18.725664,0.006637,0.015487,0.00885,0.00885,0.011062,0.022124,6.327434,33.610619,43.610619,2.035398,0.176991,22.628319,0.00885,0.00885,0.013274,0.00885,0.00885,0.017699,3.814159,42.460177,41.681416,0.539823,0.132743,27.734513,0.024336,0.004425,0.006637,0.00885,0.006637,0.011062,3.238938,46.079646,42.415929,0.522124,0.123894,31.044248,0.004425,0.013274,0.0,0.0,0.004425,0.017699,4.946903,46.911504,39.946903,0.283186,0.0,31.964602,0.0,0.006637,0.0,0.002212,0.0,0.006637,6.716814,50.238938,28.309735,0.150442,0.0,32.168142,0.002212,0.002212,0.004425,0.0,0.0,0.011062,-0.20708,-0.19292,6.013053,-1.025664,0.006858,0.0,0.647124,0.985398,13.844248,20.818363,-0.121239,-0.24823,7.169248,-1.334735,0.019469,-0.005973,0.98208,1.375,16.955531,26.958628,0.079425,-1.005973,3.492257,-1.739602,0.153982,-0.013274,0.427655,0.352876,2.670796,5.590044,0.147124,-5.234071,0.9,-1.146903,0.112832,-0.000885,-0.767257,-1.143805,-15.630752,-23.499115,-0.15531,-0.453982,3.449558,-1.240929,0.028097,0.0,0.102655,0.301549,5.450221,7.279425,-0.001106,-0.363717,4.862168,-1.31792,0.051327,-0.019248,0.68031,0.868142,9.776106,16.018363,0.659292,-1.420133,1.633628,-6.554646,0.317699,-0.00885,-0.330531,0.176106,-18.738496,-15.881195,0.963938,-0.914381,3.977876,-9.048894,0.181416,-0.015929,0.001549,2.61792,-17.982743,10.245796,0.768142,-0.65354,8.039602,-10.150664,0.032965,-0.013496,0.22677,3.89469,-8.269027,32.422788,0.001106,-0.297566,11.839381,-7.034513,0.025664,-0.002876,0.547788,2.535841,10.081195,33.32854,-0.285398,-0.277212,11.369912,-3.607522,0.016814,0.0,0.546681,1.722124,17.840044,32.87146,-0.302434,-0.278982,9.048009,-1.457301,0.003982,0.0,0.514823,1.222345,19.326106,29.47323,3.880531
std,16.466631,0.497955,37.17034,16.590803,15.364394,44.842283,33.385421,35.633072,25.826643,45.431434,10.650001,18.249901,20.541728,1.569483,0.0,10.046393,0.047036,0.104708,0.104708,0.066445,0.066445,0.093759,11.22068,17.248213,21.06105,3.093161,2.692591,9.624951,0.132002,0.167319,0.047036,0.066445,0.066445,0.123615,21.906457,23.106034,25.365424,9.212818,3.124229,18.35985,0.047036,0.184989,0.047036,0.132002,0.104708,0.066445,24.813651,17.38896,18.365908,10.319705,1.505153,27.9488,0.104708,0.066445,0.066445,0.066445,0.066445,0.093759,17.197818,22.63126,23.000074,5.251204,0.0,15.496733,0.0,0.132002,0.047036,0.047036,0.081288,0.047036,15.358883,20.207631,23.192498,4.675755,2.79129,13.028458,0.066445,0.154262,0.0,0.047036,0.047036,0.047036,26.828036,16.417268,23.865526,12.580205,1.537425,19.169566,0.081288,0.123615,0.093759,0.093759,0.104708,0.147249,20.984185,16.273403,17.523596,8.387436,2.253276,13.087253,0.093759,0.093759,0.114574,0.093759,0.093759,0.132002,16.324888,13.581019,16.424667,5.256246,2.033007,10.150109,0.154262,0.066445,0.081288,0.093759,0.081288,0.104708,11.530562,10.383034,16.951856,2.973937,2.024825,7.775554,0.066445,0.114574,0.0,0.0,0.066445,0.132002,10.770198,11.997825,17.309926,3.239088,0.0,10.463264,0.0,0.081288,0.0,0.047036,0.0,0.081288,11.092811,15.015601,22.300738,1.868024,0.0,10.272045,0.047036,0.047036,0.066445,0.0,0.0,0.104708,0.6243,0.396429,2.803274,1.482419,0.094982,0.0,0.347006,1.213289,13.192917,13.355496,0.523582,0.556216,3.490317,1.811055,0.176242,0.090256,0.553331,1.342093,14.304628,18.469142,0.547274,1.921482,3.449131,2.853022,0.878851,0.130157,0.54684,1.17557,16.697646,18.387507,0.522119,3.046912,1.176017,2.799226,0.438718,0.018814,0.393392,1.115694,10.889243,12.794286,0.539642,0.992033,2.86964,1.996923,0.193767,0.0,0.410712,0.962016,13.16999,12.694519,0.447485,0.922381,3.520306,2.052247,0.489375,0.265464,0.504777,1.05343,14.528384,16.775304,1.055564,3.313584,2.226582,4.882572,1.490322,0.141457,0.574897,1.783182,23.715007,21.014088,1.177715,3.763854,3.346176,5.890044,0.971688,0.218366,0.492221,2.409711,25.659801,26.656455,1.444173,3.414085,5.279719,7.066568,0.390403,0.264398,0.548988,2.990809,32.157008,37.362289,1.015566,1.758544,5.917391,5.061472,0.166763,0.046287,0.426941,2.429776,25.074695,34.361665,0.67506,0.992472,4.793656,2.850633,0.275907,0.0,0.370548,1.70819,16.445472,24.421643,0.603551,0.548876,3.472862,2.00243,0.050118,0.0,0.347531,1.426052,13.503922,18.493927,4.407097
min,0.0,0.0,105.0,6.0,55.0,0.0,232.0,108.0,0.0,-172.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.0,-2.7,0.0,-13.8,0.0,0.0,-1.5,-8.7,-33.3,-38.8,-3.9,-3.4,0.0,-16.5,0.0,-1.5,-1.5,-4.4,-43.0,-38.5,-1.7,-16.5,0.0,-16.3,0.0,-1.7,-2.8,-7.0,-74.0,-58.6,-1.9,-16.0,0.0,-12.8,0.0,-0.4,-2.3,-3.9,-96.5,-82.7,-7.1,-8.7,0.0,-13.3,0.0,0.0,-1.4,-6.7,-38.7,-42.1,-1.7,-6.8,0.0,-15.4,0.0,-4.5,-2.1,-5.5,-44.0,-49.4,-4.5,-19.1,0.0,-37.8,0.0,-2.9,-3.5,-4.9,-216.0,-107.2,-4.1,-30.3,0.0,-43.3,0.0,-4.0,-2.3,-8.6,-195.3,-95.1,-3.3,-32.9,0.0,-48.4,0.0,-5.6,-3.1,-11.8,-242.4,-146.2,-3.2,-20.4,0.0,-42.9,0.0,-0.9,-2.6,-8.2,-124.8,-161.4,-4.8,-14.2,0.0,-30.8,0.0,0.0,-0.9,-5.0,-56.8,-63.6,-5.6,-4.1,0.0,-28.6,0.0,0.0,-0.8,-6.0,-44.2,-38.6,1.0
25%,36.0,0.0,160.0,59.0,80.0,142.0,350.0,148.0,79.0,3.75,0.0,40.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,32.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,32.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.0,40.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,35.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,36.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,32.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.4,-0.4,4.0,-1.5,0.0,0.0,0.5,0.5,6.4,12.5,-0.4,0.0,4.6,-2.1,0.0,0.0,0.7,0.7,7.85,13.95,-0.2,-1.2,1.0,-2.45,0.0,0.0,0.2,-0.5,-7.7,-6.425,-0.1,-7.2,0.0,0.0,0.0,0.0,-1.0,-1.9,-20.85,-30.725,-0.4,-0.6,1.2,-1.9,0.0,0.0,-0.2,-0.3,-2.0,-1.025,-0.2,0.0,2.2,-2.0,0.0,0.0,0.4,0.4,1.0,4.3,0.2,0.0,0.575,-9.0,0.0,0.0,-0.7,-0.9,-25.3,-26.475,0.4,0.0,1.8,-11.925,0.0,0.0,-0.3,1.3,-25.225,-4.5,0.0,0.0,4.2,-13.0,0.0,0.0,0.0,2.0,-19.525,9.85,-0.5,0.0,7.875,-9.1,0.0,0.0,0.4,1.1,-0.925,11.275,-0.6,0.0,8.1,-4.725,0.0,0.0,0.4,0.7,8.675,15.375,-0.5,-0.425,6.6,-2.1,0.0,0.0,0.4,0.5,11.45,17.55,1.0
50%,47.0,1.0,164.0,68.0,86.0,157.0,367.0,162.0,91.0,40.0,0.0,48.0,20.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,48.0,20.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,48.0,20.0,0.0,0.0,0.0,44.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,48.0,4.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,48.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,48.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,44.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,48.0,42.0,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,40.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,48.0,36.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.1,0.0,5.7,-0.6,0.0,0.0,0.7,1.2,11.85,20.75,-0.1,0.0,6.8,-0.85,0.0,0.0,1.0,1.4,15.9,24.6,0.0,0.0,2.5,0.0,0.0,0.0,0.5,0.4,2.95,5.0,0.1,-5.6,0.7,0.0,0.0,0.0,-0.8,-1.2,-14.3,-22.3,-0.1,0.0,2.7,0.0,0.0,0.0,0.0,0.4,3.7,6.95,0.0,0.0,4.4,-0.2,0.0,0.0,0.7,0.8,9.25,14.6,0.6,0.0,1.2,-6.5,0.0,0.0,-0.4,-0.1,-17.95,-16.45,0.8,0.0,3.2,-8.3,0.0,0.0,0.1,2.4,-15.6,8.75,0.6,0.0,7.05,-8.8,0.0,0.0,0.3,3.8,-4.7,32.55,0.0,0.0,11.2,-6.0,0.0,0.0,0.6,2.4,11.4,32.75,-0.2,0.0,11.0,-3.0,0.0,0.0,0.6,1.75,18.35,30.35,-0.2,0.0,8.8,-1.1,0.0,0.0,0.5,1.35,18.1,27.9,1.0
75%,58.0,1.0,170.0,79.0,94.0,175.0,384.0,179.0,102.0,66.0,12.0,60.0,36.0,0.0,0.0,36.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64.0,36.0,0.0,0.0,36.0,0.0,0.0,0.0,0.0,0.0,0.0,28.0,56.0,40.0,0.0,0.0,44.0,0.0,0.0,0.0,0.0,0.0,0.0,56.0,32.0,0.0,0.0,0.0,56.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,56.0,40.0,0.0,0.0,36.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64.0,40.0,0.0,0.0,36.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,32.0,60.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,52.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,48.0,48.0,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,52.0,52.0,0.0,0.0,36.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,52.0,48.0,0.0,0.0,36.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,52.0,44.0,0.0,0.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,7.6,0.0,0.0,0.0,0.8,1.7,19.7,27.9,0.1,0.0,9.1,0.0,0.0,0.0,1.3,2.1,25.525,37.725,0.2,0.0,5.0,0.0,0.0,0.0,0.7,1.0,12.225,16.45,0.4,-3.8,1.3,0.0,0.0,0.0,-0.6,-0.6,-9.2,-15.425,0.1,0.0,5.1,0.0,0.0,0.0,0.4,0.9,12.325,14.9,0.2,0.0,6.7,0.0,0.0,0.0,1.0,1.4,18.4,25.2,0.9,0.0,2.0,-3.3,0.0,0.0,0.1,1.025,-10.9,-6.575,1.3,0.0,5.4,-5.6,0.0,0.0,0.3,3.7,-5.95,24.125,1.225,0.0,10.6,-5.8,0.0,0.0,0.6,5.525,7.625,56.025,0.225,0.0,15.1,-3.7,0.0,0.0,0.8,3.9,25.05,52.325,0.0,0.0,14.125,-1.9,0.0,0.0,0.7,2.8,27.9,48.1,0.0,0.0,11.2,0.0,0.0,0.0,0.7,2.1,25.825,41.125,6.0
max,83.0,1.0,780.0,176.0,188.0,524.0,509.0,381.0,205.0,169.0,88.0,156.0,88.0,24.0,0.0,100.0,1.0,1.0,1.0,1.0,1.0,1.0,76.0,132.0,92.0,36.0,56.0,76.0,1.0,1.0,1.0,1.0,1.0,1.0,92.0,116.0,132.0,64.0,44.0,92.0,1.0,1.0,1.0,1.0,1.0,1.0,136.0,80.0,80.0,84.0,32.0,92.0,1.0,1.0,1.0,1.0,1.0,1.0,88.0,148.0,92.0,44.0,0.0,96.0,0.0,1.0,1.0,1.0,1.0,1.0,88.0,128.0,120.0,44.0,44.0,80.0,1.0,1.0,0.0,1.0,1.0,1.0,140.0,216.0,116.0,80.0,28.0,116.0,1.0,1.0,1.0,1.0,1.0,1.0,132.0,216.0,108.0,72.0,32.0,96.0,1.0,1.0,1.0,1.0,1.0,1.0,96.0,132.0,112.0,76.0,36.0,88.0,1.0,1.0,1.0,1.0,1.0,1.0,88.0,92.0,124.0,24.0,40.0,60.0,1.0,1.0,0.0,0.0,1.0,1.0,88.0,136.0,120.0,60.0,0.0,132.0,0.0,1.0,0.0,1.0,0.0,1.0,88.0,148.0,108.0,28.0,0.0,104.0,1.0,1.0,1.0,0.0,0.0,1.0,1.4,0.0,19.9,0.0,1.9,0.0,1.7,3.7,155.2,74.3,1.9,0.0,19.2,0.0,3.2,0.0,3.4,7.2,64.6,87.1,5.1,0.0,21.7,0.0,14.9,0.0,2.2,7.1,66.7,95.8,6.4,0.0,11.8,0.0,3.5,0.0,1.3,6.4,26.3,10.7,2.6,0.0,15.6,0.0,2.3,0.0,2.0,5.1,115.4,46.1,2.5,0.0,19.9,0.0,9.1,0.0,2.7,7.0,64.2,69.1,16.6,0.0,26.0,0.0,19.2,0.0,1.8,16.7,268.9,247.1,10.0,0.0,28.5,0.0,14.9,0.0,3.3,13.2,133.9,123.6,15.1,0.0,28.4,0.0,7.0,0.0,2.5,18.8,165.4,137.8,9.7,0.0,36.4,0.0,2.4,0.0,2.8,15.6,103.4,182.3,3.4,0.0,29.5,0.0,5.8,0.0,2.8,8.3,82.1,127.9,2.7,0.0,23.6,0.0,0.8,0.0,2.4,6.0,88.8,115.9,16.0


In [8]:
arrhythmia_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 452 entries, 0 to 451
Columns: 280 entries, age to class
dtypes: float64(120), int64(155), object(5)
memory usage: 988.8+ KB


### Describes of the classes

In [9]:
classes = arrhythmia_raw['class']

In [10]:
classes_unique = classes.unique()

In [11]:
classes_unique = classes_unique.tolist()

In [12]:
classes_unique

[8, 6, 10, 1, 7, 14, 3, 16, 2, 4, 5, 9, 15]

In [13]:
dataframes = []

In [14]:
for _class in classes_unique:
    dataframes.append(arrhythmia_raw.loc[arrhythmia_raw['class'] == _class])

In [15]:
len(classes_unique)

13

In [16]:
len(dataframes)

13

In [17]:
describes = {}

In [18]:
new_dataframes = []

In [19]:
arrhythmia_data = pd.DataFrame()

In [20]:
#arrhythmia_data = arrhythmia_data.fillna(0)

In [21]:
arrhythmia_array = np.array

In [22]:
imp = SimpleImputer(missing_values=np.NaN, strategy='mean', copy=False)

In [27]:
for dataframe in dataframes:
    df_columns = dataframe.columns
    print(df_columns.size)
        
    df_array = dataframe.replace('?', np.NaN).values
    imp.fit(df_array)
    transformed_array = imp.transform(df_array)
    print(transformed_array.shape)
    #new_df = pd.DataFrame(data=transformed_array, columns=features)
    #arrhythmia_data = arrhythmia_data.append(pd.DataFrame(data=transformed_array, columns=features))
    #new_dataframes.append(new_df)

280
(2, 280)
280
(25, 280)
280
(50, 280)
280
(245, 280)
280
(3, 279)
280
(4, 280)
280
(15, 280)
280
(22, 280)
280
(44, 280)
280
(15, 280)
280
(13, 280)
280
(9, 280)
280
(5, 279)


In [None]:
test_raw = arrhythmia_raw.loc[arrhythmia_raw['class'] == 2]

In [None]:
missing_count = arrhythmia_raw[features] == '?'

In [None]:
columns = arrhythmia_data.columns

In [None]:
for column in columns:
    print(column)

In [None]:
vector_j = arrhythmia_data['vector_j']

In [None]:
vector_j

In [None]:
test.head(100)

In [None]:
arrhythmia_data.info()

In [None]:
y = arrhythmia_data['class']

In [None]:
y.unique()

In [None]:
y.describe()

In [None]:
X_raw = arrhythmia_data.loc[:, features].values

In [None]:
X.view()

In [None]:
X = X.astype(np.float64)

In [None]:
type(X)

In [None]:
X = StandardScaler().fit_transform(X)

In [None]:
X.view()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### KNN  .95

In [None]:
pca = PCA(.98)

In [None]:
knn = KNeighborsClassifier(n_neighbors=1)

In [None]:
X_train = pca.fit_transform(X_train)

In [None]:
X_test = pca.transform(X_test)

In [None]:
pca.explained_variance_ratio_

In [None]:
knn.fit(X_train, y_train)

In [None]:
y_pred = knn.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)

### KNN original dataset

In [None]:
X_train_od, X_test_od, y_train_od, y_test_od = train_test_split(x, y, test_size=0.2, random_state=0)

In [None]:
X_train_od.view()

In [None]:
knn.fit(X_train_od, y_train_od)

In [None]:
y_pred_od = knn.predict(X_test_od)

In [None]:
accuracy_score(y_test_od, y_pred_od)