Import Library

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import seaborn as sns
from pathlib import Path  

columns settings

In [2]:
pd.options.display.max_columns = None
pd.options.display.max_rows = 200

In [3]:
data = pd.read_csv('dataset/train.csv')
test = pd.read_csv('dataset/test.csv')

Data Preprocessing functions

In [4]:
def remove_NaN_attribute(train_data):
    critical_percentage = 0.25
    # remove Id column first as it is not related to prediction model
    copy_data = train_data.drop('Id', axis=1)
    no_of_data = copy_data.shape[0]
    for attribute in copy_data:
        # count number of NaN item in a attribute
        no_of_NaN_of_attribute_item = copy_data[attribute].isnull().sum()
        if no_of_NaN_of_attribute_item / no_of_data >= critical_percentage:
            print(str(attribute) + ": " + str(no_of_NaN_of_attribute_item / copy_data.shape[0]))
            copy_data = copy_data.drop(attribute, axis=1)
    print(len(copy_data.columns))
    return copy_data

def dropObjectColumns(data):
    object_data = data.select_dtypes(include='object').columns
    dropped_data = data.drop(object_data, axis=1)
    return dropped_data

def sdScaler(data):
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(data)
    return data_scaled

def makeOneHot(data):
    object_data = data.select_dtypes(include='object').columns
    ohe = OneHotEncoder()
    feature_arr = ohe.fit_transform(data[object_data]).toarray()
    feature_labels = ohe.categories_
    feature_labels = np.hstack(feature_labels)
    features = pd.DataFrame(feature_arr, columns= feature_labels)
    return features

def mode_replace_NaN(data):
    for attribute in data:
        data[attribute] = data[attribute].fillna(data[attribute].mode()[0])
    return data

In [5]:
cleaned_data = remove_NaN_attribute(data)
cleaned_test_data = remove_NaN_attribute(test)

Alley: 0.9376712328767123
FireplaceQu: 0.4726027397260274
PoolQC: 0.9952054794520548
Fence: 0.8075342465753425
MiscFeature: 0.963013698630137
75
Alley: 0.9266620973269363
FireplaceQu: 0.5003427004797807
PoolQC: 0.997943797121316
Fence: 0.8012337217272104
MiscFeature: 0.9650445510623715
74


In [6]:
replaced_data = mode_replace_NaN(cleaned_data)
replaced_test_data = mode_replace_NaN(cleaned_test_data )

filepath = Path('folder/subfolder/out.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
replaced_data.to_csv(filepath)  

In [7]:
dropped_cleaned_scaled_data = dropObjectColumns(replaced_data)
dropped_cleaned_scaled_test_data = dropObjectColumns(replaced_test_data)

In [8]:
X = dropped_cleaned_scaled_data.iloc[:, 0:36]
y = dropped_cleaned_scaled_data.iloc[:, 36]

In [13]:
encoded_columns = makeOneHot(replaced_data)

In [14]:
processed_data = np.concatenate([X, encoded_columns], axis=1)

In [15]:
processed_data

array([[6.000e+01, 6.500e+01, 8.450e+03, ..., 0.000e+00, 1.000e+00,
        0.000e+00],
       [2.000e+01, 8.000e+01, 9.600e+03, ..., 0.000e+00, 1.000e+00,
        0.000e+00],
       [6.000e+01, 6.800e+01, 1.125e+04, ..., 0.000e+00, 1.000e+00,
        0.000e+00],
       ...,
       [7.000e+01, 6.600e+01, 9.042e+03, ..., 0.000e+00, 1.000e+00,
        0.000e+00],
       [2.000e+01, 6.800e+01, 9.717e+03, ..., 0.000e+00, 1.000e+00,
        0.000e+00],
       [2.000e+01, 7.500e+01, 9.937e+03, ..., 0.000e+00, 1.000e+00,
        0.000e+00]])

In [16]:
X_train_scaled = sdScaler(processed_data)
# X_test_scaled = sdScaler(X_test)

In [18]:
pd.DataFrame(X_train_scaled)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269
0,0.073375,-0.146189,-0.207142,0.651479,-0.517200,1.050994,0.878668,0.514104,0.575425,-0.288653,-0.944591,-0.459303,-0.793434,1.161852,-0.120242,0.370333,1.107810,-0.241061,0.789741,1.227585,0.163779,-0.211454,0.912210,-0.951226,0.930592,0.311725,0.351000,-0.752176,0.216503,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,-1.599111,0.138777,-0.083045,-0.215859,-0.105263,0.518133,-0.418955,-0.064238,0.064238,-0.704203,-0.169981,-0.083045,0.760512,-0.21236,-0.188311,-0.159,0.337126,0.02618,-0.02618,-0.468739,-0.262324,-0.182380,-0.052414,0.622762,0.237571,-0.215859,-0.094785,-0.10854,-0.037037,-0.105263,-0.203395,-0.139832,2.955221,-0.190252,-0.271163,-0.239176,-0.16125,-0.10854,-0.186352,-0.426833,-0.078757,-0.229416,-0.169981,-0.235958,-0.289638,-0.131991,-0.231065,-0.205214,-0.250182,-0.131991,-0.163471,-0.087129,-0.184376,-0.242360,0.39841,-0.074227,-0.114827,-0.087129,-0.134652,-0.037037,-0.058621,-0.037037,-0.064238,0.101885,-0.02618,-0.037037,-0.02618,-0.02618,-0.037037,0.443533,-0.147287,-0.192177,-0.1742,-0.291025,-0.343391,-0.098397,-0.994535,-0.074227,-0.087129,1.510265,-0.16125,-0.215859,-0.094785,0.528753,-0.087129,-0.493570,-0.069409,-0.037037,-0.02618,0.134652,-0.02618,-0.02618,-0.02618,-0.087129,-0.058621,-0.064238,-0.117851,-0.02618,-0.037037,-0.188311,-0.02618,-0.208812,-0.423464,-0.02618,-0.421212,-0.282633,-0.037037,-0.131991,1.354604,-0.405308,-0.134652,-0.117851,-0.045376,-0.069409,-0.131991,-0.02618,-0.207020,-0.406452,-0.083045,-0.414427,-0.02618,-0.328236,-0.058621,-0.134652,1.377253,-0.39494,-0.163471,-0.101885,1.510265,-1.217782,-0.309994,-0.192177,-0.098397,1.411313,-1.278819,-0.045376,-0.139832,-0.333333,-0.02618,0.37262,-0.333333,-0.876102,1.120968,-0.129279,-0.064238,-0.045376,-0.300609,-0.156721,1.167244,-0.941438,-0.178331,-0.215859,-0.037037,0.288247,-0.422338,-0.317893,-0.291025,0.687938,-0.421212,-0.335864,1.578868,-0.231065,-0.316585,-0.685778,-0.114827,-0.152071,-0.098397,-0.180366,-0.195977,0.358168,-0.02618,0.149696,-0.111726,-0.069409,-0.037037,-0.052414,0.985043,-0.186352,-0.444638,-0.02618,-0.643994,-0.263813,0.263813,-0.262324,-0.137265,-0.045376,-0.02618,0.305995,-0.271163,-0.165667,1.221257,-1.006873,-0.098397,-0.058621,-0.147287,-0.154412,-0.101885,-0.02618,0.271163,-0.064238,0.731591,-0.114827,-0.253259,-0.078757,-0.600559,-0.563640,1.568348,-0.941438,-0.045376,-0.184376,-0.098397,-0.045376,0.221022,-0.037037,-0.156721,-0.078757,-0.069409,0.194085,-0.256307,-0.144841,0.299253,-0.1742,-0.052414,-0.037037,-0.078757,-0.058621,-0.058621,-0.301962,-0.045376,0.390293,-0.272616,-0.052414,-0.091035,-0.117851,0.467651,-0.305995
1,-0.872563,0.524992,-0.091886,-0.071836,2.179628,0.156734,-0.429577,-0.570750,1.171992,-0.288653,-0.641228,0.466465,0.257140,-0.795163,-0.120242,-0.482512,-0.819964,3.948809,0.789741,-0.761621,0.163779,-0.211454,-0.318683,0.600495,-0.160705,0.311725,-0.060731,1.626195,-0.704483,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,-0.489110,-0.614439,-0.083045,-0.215859,-0.105263,0.518133,-0.418955,-0.064238,0.064238,-0.704203,-0.169981,-0.083045,0.760512,-0.21236,-0.188311,-0.159,0.337126,0.02618,-0.02618,-0.468739,-0.262324,5.483049,-0.052414,-1.605749,0.237571,-0.215859,-0.094785,-0.10854,-0.037037,-0.105263,-0.203395,-0.139832,-0.338384,-0.190252,-0.271163,-0.239176,-0.16125,-0.10854,-0.186352,-0.426833,-0.078757,-0.229416,-0.169981,-0.235958,-0.289638,-0.131991,-0.231065,-0.205214,-0.250182,-0.131991,-0.163471,11.477250,-0.184376,4.126099,-2.50998,-0.074227,-0.114827,-0.087129,-0.134652,-0.037037,-0.058621,-0.037037,-0.064238,0.101885,-0.02618,-0.037037,-0.02618,-0.02618,-0.037037,0.443533,-0.147287,-0.192177,-0.1742,-0.291025,-0.343391,-0.098397,1.005495,-0.074227,-0.087129,-0.662136,-0.16125,-0.215859,-0.094785,0.528753,-0.087129,-0.493570,-0.069409,-0.037037,-0.02618,0.134652,-0.02618,-0.02618,-0.02618,-0.087129,-0.058621,-0.064238,-0.117851,-0.02618,-0.037037,-0.188311,-0.02618,-0.208812,-0.423464,-0.02618,2.374103,-0.282633,-0.037037,-0.131991,-0.738223,-0.405308,-0.134652,-0.117851,-0.045376,-0.069409,-0.131991,-0.02618,-0.207020,-0.406452,-0.083045,2.412971,-0.02618,-0.328236,-0.058621,-0.134652,-0.726083,-0.39494,-0.163471,-0.101885,-0.662136,0.821165,-0.309994,-0.192177,-0.098397,-0.708560,0.781971,-0.045376,-0.139832,-0.333333,-0.02618,0.37262,-0.333333,1.141420,-0.892086,-0.129279,-0.064238,-0.045376,-0.300609,-0.156721,1.167244,-0.941438,-0.178331,-0.215859,-0.037037,0.288247,-0.422338,3.145715,-0.291025,-1.453618,2.374103,-0.335864,-0.633365,-0.231065,-0.316585,-0.685778,-0.114827,-0.152071,-0.098397,-0.180366,-0.195977,0.358168,-0.02618,0.149696,-0.111726,-0.069409,-0.037037,-0.052414,0.985043,-0.186352,-0.444638,-0.02618,-0.643994,-0.263813,0.263813,-0.262324,-0.137265,-0.045376,-0.02618,0.305995,-0.271163,-0.165667,-0.818829,0.993174,-0.098397,-0.058621,-0.147287,-0.154412,-0.101885,-0.02618,0.271163,-0.064238,0.731591,-0.114827,-0.253259,-0.078757,-0.600559,-0.563640,1.568348,-0.941438,-0.045376,-0.184376,-0.098397,-0.045376,0.221022,-0.037037,-0.156721,-0.078757,-0.069409,0.194085,-0.256307,-0.144841,0.299253,-0.1742,-0.052414,-0.037037,-0.078757,-0.058621,-0.058621,-0.301962,-0.045376,0.390293,-0.272616,-0.052414,-0.091035,-0.117851,0.467651,-0.305995
2,0.073375,-0.011953,0.073480,0.651479,-0.517200,0.984752,0.830215,0.325915,0.092907,-0.288653,-0.301643,-0.313369,-0.627826,1.189351,-0.120242,0.515013,1.107810,-0.241061,0.789741,1.227585,0.163779,-0.211454,-0.318683,0.600495,0.849755,0.311725,0.631726,-0.752176,-0.070361,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,0.990891,0.138777,-0.083045,-0.215859,-0.105263,0.518133,-0.418955,-0.064238,0.064238,1.420045,-0.169981,-0.083045,-1.314904,-0.21236,-0.188311,-0.159,0.337126,0.02618,-0.02618,-0.468739,-0.262324,-0.182380,-0.052414,0.622762,0.237571,-0.215859,-0.094785,-0.10854,-0.037037,-0.105263,-0.203395,-0.139832,2.955221,-0.190252,-0.271163,-0.239176,-0.16125,-0.10854,-0.186352,-0.426833,-0.078757,-0.229416,-0.169981,-0.235958,-0.289638,-0.131991,-0.231065,-0.205214,-0.250182,-0.131991,-0.163471,-0.087129,-0.184376,-0.242360,0.39841,-0.074227,-0.114827,-0.087129,-0.134652,-0.037037,-0.058621,-0.037037,-0.064238,0.101885,-0.02618,-0.037037,-0.02618,-0.02618,-0.037037,0.443533,-0.147287,-0.192177,-0.1742,-0.291025,-0.343391,-0.098397,-0.994535,-0.074227,-0.087129,1.510265,-0.16125,-0.215859,-0.094785,0.528753,-0.087129,-0.493570,-0.069409,-0.037037,-0.02618,0.134652,-0.02618,-0.02618,-0.02618,-0.087129,-0.058621,-0.064238,-0.117851,-0.02618,-0.037037,-0.188311,-0.02618,-0.208812,-0.423464,-0.02618,-0.421212,-0.282633,-0.037037,-0.131991,1.354604,-0.405308,-0.134652,-0.117851,-0.045376,-0.069409,-0.131991,-0.02618,-0.207020,-0.406452,-0.083045,-0.414427,-0.02618,-0.328236,-0.058621,-0.134652,1.377253,-0.39494,-0.163471,-0.101885,1.510265,-1.217782,-0.309994,-0.192177,-0.098397,1.411313,-1.278819,-0.045376,-0.139832,-0.333333,-0.02618,0.37262,-0.333333,-0.876102,1.120968,-0.129279,-0.064238,-0.045376,-0.300609,-0.156721,1.167244,-0.941438,-0.178331,-0.215859,-0.037037,0.288247,-0.422338,-0.317893,3.436134,-1.453618,-0.421212,-0.335864,1.578868,-0.231065,-0.316585,-0.685778,-0.114827,-0.152071,-0.098397,-0.180366,-0.195977,0.358168,-0.02618,0.149696,-0.111726,-0.069409,-0.037037,-0.052414,0.985043,-0.186352,-0.444638,-0.02618,-0.643994,-0.263813,0.263813,-0.262324,-0.137265,-0.045376,-0.02618,0.305995,-0.271163,-0.165667,1.221257,-1.006873,-0.098397,-0.058621,-0.147287,-0.154412,-0.101885,-0.02618,0.271163,-0.064238,0.731591,-0.114827,-0.253259,-0.078757,-0.600559,-0.563640,1.568348,-0.941438,-0.045376,-0.184376,-0.098397,-0.045376,0.221022,-0.037037,-0.156721,-0.078757,-0.069409,0.194085,-0.256307,-0.144841,0.299253,-0.1742,-0.052414,-0.037037,-0.078757,-0.058621,-0.058621,-0.301962,-0.045376,0.390293,-0.272616,-0.052414,-0.091035,-0.117851,0.467651,-0.305995
3,0.309859,-0.369915,-0.096897,0.651479,-0.517200,-1.863632,-0.720298,-0.570750,-0.499274,-0.288653,-0.061670,-0.687324,-0.521734,0.937276,-0.120242,0.383659,1.107810,-0.241061,-1.026041,-0.761621,0.163779,-0.211454,0.296763,0.600495,0.728500,1.650307,0.790804,-0.752176,-0.176048,4.092524,-0.116339,-0.270208,-0.068692,-0.087688,-1.599111,-1.367655,-0.083045,-0.215859,-0.105263,0.518133,-0.418955,-0.064238,0.064238,1.420045,-0.169981,-0.083045,-1.314904,-0.21236,-0.188311,-0.159,0.337126,0.02618,-0.02618,2.133385,-0.262324,-0.182380,-0.052414,-1.605749,0.237571,-0.215859,-0.094785,-0.10854,-0.037037,-0.105263,-0.203395,-0.139832,-0.338384,5.256182,-0.271163,-0.239176,-0.16125,-0.10854,-0.186352,-0.426833,-0.078757,-0.229416,-0.169981,-0.235958,-0.289638,-0.131991,-0.231065,-0.205214,-0.250182,-0.131991,-0.163471,-0.087129,-0.184376,-0.242360,0.39841,-0.074227,-0.114827,-0.087129,-0.134652,-0.037037,-0.058621,-0.037037,-0.064238,0.101885,-0.02618,-0.037037,-0.02618,-0.02618,-0.037037,0.443533,-0.147287,-0.192177,-0.1742,-0.291025,-0.343391,-0.098397,-0.994535,-0.074227,-0.087129,1.510265,-0.16125,-0.215859,-0.094785,0.528753,-0.087129,-0.493570,-0.069409,-0.037037,-0.02618,0.134652,-0.02618,-0.02618,-0.02618,-0.087129,-0.058621,-0.064238,-0.117851,-0.02618,-0.037037,-0.188311,-0.02618,-0.208812,-0.423464,-0.02618,-0.421212,-0.282633,-0.037037,-0.131991,-0.738223,2.467261,-0.134652,-0.117851,-0.045376,-0.069409,-0.131991,-0.02618,-0.207020,-0.406452,-0.083045,-0.414427,-0.02618,-0.328236,-0.058621,-0.134652,-0.726083,-0.39494,6.117275,-0.101885,-0.662136,0.821165,-0.309994,-0.192177,-0.098397,-0.708560,0.781971,-0.045376,-0.139832,-0.333333,-0.02618,0.37262,3.000000,-0.876102,-0.892086,-0.129279,-0.064238,-0.045376,-0.300609,-0.156721,-0.856719,1.062205,-0.178331,4.632660,-0.037037,-3.469253,-0.422338,-0.317893,-0.291025,0.687938,2.374103,-0.335864,-0.633365,-0.231065,-0.316585,-0.685778,-0.114827,-0.152071,-0.098397,-0.180366,-0.195977,0.358168,-0.02618,0.149696,-0.111726,-0.069409,-0.037037,-0.052414,-1.015184,-0.186352,2.249020,-0.02618,-0.643994,-0.263813,0.263813,-0.262324,-0.137265,-0.045376,-0.02618,0.305995,-0.271163,-0.165667,1.221257,-1.006873,-0.098397,-0.058621,-0.147287,-0.154412,-0.101885,-0.02618,0.271163,-0.064238,-1.366883,-0.114827,-0.253259,-0.078757,1.665116,-0.563640,-0.637614,1.062205,-0.045376,-0.184376,-0.098397,-0.045376,0.221022,-0.037037,-0.156721,-0.078757,-0.069409,0.194085,-0.256307,-0.144841,0.299253,-0.1742,-0.052414,-0.037037,-0.078757,-0.058621,-0.058621,-0.301962,-0.045376,0.390293,3.668167,-0.052414,-0.091035,-0.117851,-2.138345,-0.305995
4,0.073375,0.703973,0.375148,1.374795,-0.517200,0.951632,0.733308,1.366489,0.463568,-0.288653,-0.174865,0.199680,-0.045611,1.617877,-0.120242,1.299326,1.107810,-0.241061,0.789741,1.227585,1.390023,-0.211454,1.527656,0.600495,0.809337,1.650307,1.698485,0.780197,0.563760,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,2.100892,0.138777,-0.083045,-0.215859,-0.105263,0.518133,-0.418955,-0.064238,0.064238,1.420045,-0.169981,-0.083045,-1.314904,-0.21236,-0.188311,-0.159,0.337126,0.02618,-0.02618,-0.468739,-0.262324,5.483049,-0.052414,-1.605749,0.237571,-0.215859,-0.094785,-0.10854,-0.037037,-0.105263,-0.203395,-0.139832,-0.338384,-0.190252,-0.271163,-0.239176,-0.16125,-0.10854,-0.186352,-0.426833,-0.078757,-0.229416,5.883006,-0.235958,-0.289638,-0.131991,-0.231065,-0.205214,-0.250182,-0.131991,-0.163471,-0.087129,-0.184376,-0.242360,0.39841,-0.074227,-0.114827,-0.087129,-0.134652,-0.037037,-0.058621,-0.037037,-0.064238,0.101885,-0.02618,-0.037037,-0.02618,-0.02618,-0.037037,0.443533,-0.147287,-0.192177,-0.1742,-0.291025,-0.343391,-0.098397,-0.994535,-0.074227,-0.087129,1.510265,-0.16125,-0.215859,-0.094785,0.528753,-0.087129,-0.493570,-0.069409,-0.037037,-0.02618,0.134652,-0.02618,-0.02618,-0.02618,-0.087129,-0.058621,-0.064238,-0.117851,-0.02618,-0.037037,-0.188311,-0.02618,-0.208812,-0.423464,-0.02618,-0.421212,-0.282633,-0.037037,-0.131991,1.354604,-0.405308,-0.134652,-0.117851,-0.045376,-0.069409,-0.131991,-0.02618,-0.207020,-0.406452,-0.083045,-0.414427,-0.02618,-0.328236,-0.058621,-0.134652,1.377253,-0.39494,-0.163471,-0.101885,1.510265,-1.217782,-0.309994,-0.192177,-0.098397,1.411313,-1.278819,-0.045376,-0.139832,-0.333333,-0.02618,0.37262,-0.333333,-0.876102,1.120968,-0.129279,-0.064238,-0.045376,-0.300609,-0.156721,1.167244,-0.941438,-0.178331,-0.215859,-0.037037,0.288247,2.367770,-0.317893,-0.291025,-1.453618,-0.421212,-0.335864,1.578868,-0.231065,-0.316585,-0.685778,-0.114827,-0.152071,-0.098397,-0.180366,-0.195977,0.358168,-0.02618,0.149696,-0.111726,-0.069409,-0.037037,-0.052414,0.985043,-0.186352,-0.444638,-0.02618,-0.643994,-0.263813,0.263813,-0.262324,-0.137265,-0.045376,-0.02618,0.305995,-0.271163,-0.165667,1.221257,-1.006873,-0.098397,-0.058621,-0.147287,-0.154412,-0.101885,-0.02618,0.271163,-0.064238,0.731591,-0.114827,-0.253259,-0.078757,-0.600559,-0.563640,1.568348,-0.941438,-0.045376,-0.184376,-0.098397,-0.045376,0.221022,-0.037037,-0.156721,-0.078757,-0.069409,0.194085,-0.256307,-0.144841,0.299253,-0.1742,-0.052414,-0.037037,-0.078757,-0.058621,-0.058621,-0.301962,-0.045376,0.390293,-0.272616,-0.052414,-0.091035,-0.117851,0.467651,-0.305995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,0.073375,-0.280425,-0.260560,-0.071836,-0.517200,0.918511,0.733308,-0.570750,-0.973018,-0.288653,0.873321,-0.238122,-0.542435,0.795198,-0.120242,0.250402,-0.819964,-0.241061,0.789741,1.227585,0.163779,-0.211454,0.296763,0.600495,0.768918,0.311725,-0.060731,-0.752176,-0.100558,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,0.620891,-0.614439,-0.083045,-0.215859,-0.105263,0.518133,-0.418955,-0.064238,0.064238,-0.704203,-0.169981,-0.083045,0.760512,-0.21236,-0.188311,-0.159,0.337126,0.02618,-0.02618,-0.468739,-0.262324,-0.182380,-0.052414,0.622762,0.237571,-0.215859,-0.094785,-0.10854,-0.037037,-0.105263,-0.203395,-0.139832,-0.338384,-0.190252,-0.271163,4.181030,-0.16125,-0.10854,-0.186352,-0.426833,-0.078757,-0.229416,-0.169981,-0.235958,-0.289638,-0.131991,-0.231065,-0.205214,-0.250182,-0.131991,-0.163471,-0.087129,-0.184376,-0.242360,0.39841,-0.074227,-0.114827,-0.087129,-0.134652,-0.037037,-0.058621,-0.037037,-0.064238,0.101885,-0.02618,-0.037037,-0.02618,-0.02618,-0.037037,0.443533,-0.147287,-0.192177,-0.1742,-0.291025,-0.343391,-0.098397,-0.994535,-0.074227,-0.087129,1.510265,-0.16125,-0.215859,-0.094785,0.528753,-0.087129,-0.493570,-0.069409,-0.037037,-0.02618,0.134652,-0.02618,-0.02618,-0.02618,-0.087129,-0.058621,-0.064238,-0.117851,-0.02618,-0.037037,-0.188311,-0.02618,-0.208812,-0.423464,-0.02618,-0.421212,-0.282633,-0.037037,-0.131991,1.354604,-0.405308,-0.134652,-0.117851,-0.045376,-0.069409,-0.131991,-0.02618,-0.207020,-0.406452,-0.083045,-0.414427,-0.02618,-0.328236,-0.058621,-0.134652,1.377253,-0.39494,-0.163471,-0.101885,-0.662136,0.821165,-0.309994,-0.192177,-0.098397,-0.708560,0.781971,-0.045376,-0.139832,-0.333333,-0.02618,0.37262,-0.333333,-0.876102,1.120968,-0.129279,-0.064238,-0.045376,-0.300609,-0.156721,1.167244,-0.941438,-0.178331,-0.215859,-0.037037,0.288247,-0.422338,-0.317893,-0.291025,0.687938,-0.421212,-0.335864,-0.633365,-0.231065,-0.316585,1.458197,-0.114827,-0.152071,-0.098397,-0.180366,-0.195977,0.358168,-0.02618,0.149696,-0.111726,-0.069409,-0.037037,-0.052414,0.985043,-0.186352,-0.444638,-0.02618,-0.643994,-0.263813,0.263813,-0.262324,-0.137265,-0.045376,-0.02618,0.305995,-0.271163,-0.165667,-0.818829,0.993174,-0.098397,-0.058621,-0.147287,-0.154412,-0.101885,-0.02618,0.271163,-0.064238,0.731591,-0.114827,-0.253259,-0.078757,-0.600559,-0.563640,1.568348,-0.941438,-0.045376,-0.184376,-0.098397,-0.045376,0.221022,-0.037037,-0.156721,-0.078757,-0.069409,0.194085,-0.256307,-0.144841,0.299253,-0.1742,-0.052414,-0.037037,-0.078757,-0.058621,-0.058621,-0.301962,-0.045376,0.390293,-0.272616,-0.052414,-0.091035,-0.117851,0.467651,-0.305995
1456,-0.872563,0.748718,0.266407,-0.071836,0.381743,0.222975,0.151865,0.087911,0.759659,0.722112,0.049262,1.104925,2.355701,-0.795163,-0.120242,1.061367,1.107810,-0.241061,0.789741,-0.761621,0.163779,-0.211454,0.296763,2.152216,-0.079868,0.311725,0.126420,2.033231,-0.704483,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,-1.599111,1.645210,-0.083045,-0.215859,-0.105263,0.518133,-0.418955,-0.064238,0.064238,-0.704203,-0.169981,-0.083045,0.760512,-0.21236,-0.188311,-0.159,0.337126,0.02618,-0.02618,-0.468739,-0.262324,-0.182380,-0.052414,0.622762,0.237571,-0.215859,-0.094785,-0.10854,-0.037037,-0.105263,-0.203395,-0.139832,-0.338384,-0.190252,-0.271163,-0.239176,-0.16125,-0.10854,-0.186352,-0.426833,-0.078757,4.358899,-0.169981,-0.235958,-0.289638,-0.131991,-0.231065,-0.205214,-0.250182,-0.131991,-0.163471,-0.087129,-0.184376,-0.242360,0.39841,-0.074227,-0.114827,-0.087129,-0.134652,-0.037037,-0.058621,-0.037037,-0.064238,0.101885,-0.02618,-0.037037,-0.02618,-0.02618,-0.037037,0.443533,-0.147287,-0.192177,-0.1742,-0.291025,-0.343391,-0.098397,1.005495,-0.074227,-0.087129,-0.662136,-0.16125,-0.215859,-0.094785,0.528753,-0.087129,-0.493570,-0.069409,-0.037037,-0.02618,0.134652,-0.02618,-0.02618,-0.02618,-0.087129,-0.058621,-0.064238,-0.117851,-0.02618,-0.037037,-0.188311,-0.02618,-0.208812,-0.423464,-0.02618,-0.421212,3.538152,-0.037037,-0.131991,-0.738223,-0.405308,-0.134652,-0.117851,-0.045376,-0.069409,-0.131991,-0.02618,-0.207020,-0.406452,-0.083045,-0.414427,-0.02618,3.046587,-0.058621,-0.134652,-0.726083,-0.39494,-0.163471,-0.101885,-0.662136,-1.217782,3.225872,-0.192177,-0.098397,-0.708560,0.781971,-0.045376,-0.139832,-0.333333,-0.02618,0.37262,-0.333333,1.141420,-0.892086,-0.129279,-0.064238,-0.045376,-0.300609,-0.156721,1.167244,-0.941438,-0.178331,-0.215859,-0.037037,0.288247,-0.422338,-0.317893,-0.291025,0.687938,2.374103,-0.335864,-0.633365,-0.231065,-0.316585,-0.685778,-0.114827,-0.152071,-0.098397,-0.180366,5.102650,-2.791985,-0.02618,0.149696,-0.111726,-0.069409,-0.037037,-0.052414,-1.015184,-0.186352,-0.444638,-0.02618,1.552809,-0.263813,0.263813,-0.262324,-0.137265,-0.045376,-0.02618,0.305995,-0.271163,-0.165667,-0.818829,0.993174,-0.098397,-0.058621,6.789461,-0.154412,-0.101885,-0.02618,-3.687818,-0.064238,0.731591,-0.114827,-0.253259,-0.078757,-0.600559,-0.563640,-0.637614,1.062205,-0.045376,-0.184376,-0.098397,-0.045376,0.221022,-0.037037,-0.156721,-0.078757,-0.069409,0.194085,-0.256307,-0.144841,0.299253,-0.1742,-0.052414,-0.037037,-0.078757,-0.058621,-0.058621,-0.301962,-0.045376,0.390293,-0.272616,-0.052414,-0.091035,-0.117851,0.467651,-0.305995
1457,0.309859,-0.101443,-0.147810,0.651479,3.078570,-1.002492,1.024029,-0.570750,-0.369871,-0.288653,0.701265,0.215641,0.065656,1.844744,-0.120242,1.569647,-0.819964,-0.241061,0.789741,-0.761621,1.390023,-0.211454,1.527656,2.152216,-1.575348,-1.026858,-1.033914,-0.752176,0.201405,-0.359325,-0.116339,-0.270208,-0.068692,4.953112,-0.489110,1.645210,-0.083045,-0.215859,-0.105263,0.518133,-0.418955,-0.064238,0.064238,-0.704203,-0.169981,-0.083045,0.760512,-0.21236,-0.188311,-0.159,0.337126,0.02618,-0.02618,-0.468739,-0.262324,-0.182380,-0.052414,0.622762,0.237571,-0.215859,-0.094785,-0.10854,-0.037037,-0.105263,-0.203395,-0.139832,-0.338384,5.256182,-0.271163,-0.239176,-0.16125,-0.10854,-0.186352,-0.426833,-0.078757,-0.229416,-0.169981,-0.235958,-0.289638,-0.131991,-0.231065,-0.205214,-0.250182,-0.131991,-0.163471,-0.087129,-0.184376,-0.242360,0.39841,-0.074227,-0.114827,-0.087129,-0.134652,-0.037037,-0.058621,-0.037037,-0.064238,0.101885,-0.02618,-0.037037,-0.02618,-0.02618,-0.037037,0.443533,-0.147287,-0.192177,-0.1742,-0.291025,-0.343391,-0.098397,-0.994535,-0.074227,-0.087129,1.510265,-0.16125,-0.215859,-0.094785,0.528753,-0.087129,-0.493570,-0.069409,-0.037037,-0.02618,0.134652,-0.02618,-0.02618,-0.02618,-0.087129,-0.058621,-0.064238,-0.117851,-0.02618,-0.037037,-0.188311,-0.02618,4.788990,-0.423464,-0.02618,-0.421212,-0.282633,-0.037037,-0.131991,-0.738223,-0.405308,-0.134652,-0.117851,-0.045376,-0.069409,-0.131991,-0.02618,4.830459,-0.406452,-0.083045,-0.414427,-0.02618,-0.328236,-0.058621,-0.134652,-0.726083,-0.39494,-0.163471,-0.101885,-0.662136,0.821165,-0.309994,5.203549,-0.098397,-0.708560,-1.278819,-0.045376,-0.139832,3.000000,-0.02618,-2.68370,-0.333333,-0.876102,-0.892086,-0.129279,15.567059,-0.045376,-0.300609,-0.156721,-0.856719,1.062205,-0.178331,4.632660,-0.037037,-3.469253,-0.422338,-0.317893,-0.291025,0.687938,-0.421212,-0.335864,1.578868,-0.231065,-0.316585,-0.685778,-0.114827,-0.152071,-0.098397,-0.180366,-0.195977,0.358168,-0.02618,0.149696,-0.111726,-0.069409,-0.037037,-0.052414,0.985043,-0.186352,-0.444638,-0.02618,-0.643994,-0.263813,0.263813,-0.262324,-0.137265,-0.045376,-0.02618,0.305995,-0.271163,-0.165667,1.221257,-1.006873,-0.098397,-0.058621,-0.147287,-0.154412,-0.101885,-0.02618,0.271163,-0.064238,0.731591,-0.114827,-0.253259,-0.078757,-0.600559,-0.563640,1.568348,-0.941438,-0.045376,-0.184376,-0.098397,-0.045376,0.221022,-0.037037,-0.156721,-0.078757,-0.069409,0.194085,-0.256307,-0.144841,0.299253,-0.1742,-0.052414,-0.037037,-0.078757,-0.058621,-0.058621,-0.301962,-0.045376,0.390293,-0.272616,-0.052414,-0.091035,-0.117851,0.467651,-0.305995
1458,-0.872563,-0.011953,-0.080160,-0.795151,0.381743,-0.704406,0.539493,-0.570750,-0.865548,6.092188,-1.284176,0.046905,-0.218982,-0.795163,-0.120242,-0.832788,1.107810,-0.241061,-1.026041,-0.761621,-1.062465,-0.211454,-0.934130,-0.951226,-1.211583,-1.026858,-1.090059,2.168910,-0.704483,1.473789,-0.116339,-0.270208,-0.068692,-0.087688,-0.859110,1.645210,-0.083045,-0.215859,-0.105263,0.518133,-0.418955,-0.064238,0.064238,-0.704203,-0.169981,-0.083045,0.760512,-0.21236,-0.188311,-0.159,0.337126,0.02618,-0.02618,-0.468739,-0.262324,-0.182380,-0.052414,0.622762,0.237571,-0.215859,-0.094785,-0.10854,-0.037037,-0.105263,-0.203395,-0.139832,-0.338384,-0.190252,-0.271163,-0.239176,-0.16125,-0.10854,-0.186352,2.342838,-0.078757,-0.229416,-0.169981,-0.235958,-0.289638,-0.131991,-0.231065,-0.205214,-0.250182,-0.131991,-0.163471,-0.087129,-0.184376,-0.242360,0.39841,-0.074227,-0.114827,-0.087129,-0.134652,-0.037037,-0.058621,-0.037037,-0.064238,0.101885,-0.02618,-0.037037,-0.02618,-0.02618,-0.037037,0.443533,-0.147287,-0.192177,-0.1742,-0.291025,-0.343391,-0.098397,1.005495,-0.074227,-0.087129,-0.662136,-0.16125,-0.215859,-0.094785,-1.891244,-0.087129,2.026054,-0.069409,-0.037037,-0.02618,0.134652,-0.02618,-0.02618,-0.02618,-0.087129,-0.058621,-0.064238,-0.117851,-0.02618,-0.037037,-0.188311,-0.02618,-0.208812,-0.423464,-0.02618,2.374103,-0.282633,-0.037037,-0.131991,-0.738223,-0.405308,-0.134652,-0.117851,-0.045376,-0.069409,-0.131991,-0.02618,-0.207020,-0.406452,-0.083045,2.412971,-0.02618,-0.328236,-0.058621,-0.134652,-0.726083,-0.39494,-0.163471,-0.101885,-0.662136,0.821165,-0.309994,-0.192177,-0.098397,-0.708560,0.781971,-0.045376,-0.139832,-0.333333,-0.02618,0.37262,-0.333333,1.141420,-0.892086,-0.129279,-0.064238,-0.045376,-0.300609,-0.156721,-0.856719,1.062205,-0.178331,-0.215859,-0.037037,0.288247,-0.422338,-0.317893,3.436134,-1.453618,-0.421212,-0.335864,1.578868,-0.231065,-0.316585,-0.685778,-0.114827,-0.152071,-0.098397,-0.180366,5.102650,-2.791985,-0.02618,0.149696,-0.111726,-0.069409,-0.037037,-0.052414,-1.015184,-0.186352,2.249020,-0.02618,-0.643994,-0.263813,0.263813,3.812075,-0.137265,-0.045376,-0.02618,-3.268027,-0.271163,-0.165667,1.221257,-1.006873,-0.098397,-0.058621,-0.147287,-0.154412,-0.101885,-0.02618,0.271163,-0.064238,0.731591,-0.114827,-0.253259,-0.078757,-0.600559,-0.563640,-0.637614,1.062205,-0.045376,-0.184376,-0.098397,-0.045376,0.221022,-0.037037,-0.156721,-0.078757,-0.069409,0.194085,-0.256307,-0.144841,0.299253,-0.1742,-0.052414,-0.037037,-0.078757,-0.058621,-0.058621,-0.301962,-0.045376,0.390293,-0.272616,-0.052414,-0.091035,-0.117851,0.467651,-0.305995


In [20]:
# define the model: Init KNN, n_neighbors is by sqrt(Y_test) = 12, but 12 is even number so - 1 = 11
classifier = KNeighborsClassifier(n_neighbors=17, p=663, metric='euclidean')

# fit model
classifier.fit(X , y)

In [21]:
# Predict the test set result
y_pred = classifier.predict(X_test)
pd.DataFrame(y_pred)

Unnamed: 0,0
0,145000
1,200000
2,67000
3,79000
4,150900
...,...
287,79900
288,215000
289,118500
290,178000


In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(f1_score(y_test, y_pred, average='macro'))

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
0.0005791505791505791


Machine Learning Models

In [None]:
models = pd.DataFrame(columns=["Model","MAE","MSE","RMSE","R2 Score","RMSE (Cross-Validation)"])

In [None]:
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
predictions = lin_reg.predict(X_test)

mae, mse, rmse, r_squared = evaluation(y_test, predictions)
print("MAE:", mae)
print("MSE:", mse)
print("RMSE:", rmse)
print("R2 Score:", r_squared)
print("-"*30)
rmse_cross_val = rmse_cv(lin_reg)
print("RMSE Cross-Validation:", rmse_cross_val)

new_row = {"Model": "LinearRegression","MAE": mae, "MSE": mse, "RMSE": rmse, "R2 Score": r_squared, "RMSE (Cross-Validation)": rmse_cross_val}
models = models.append(new_row, ignore_index=True)