# Dépendance

In [1]:
import seaborn as sns; sns.set()
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split as TTS
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.neighbors import RadiusNeighborsClassifier as RNC

# Variable

In [2]:
data = pd.read_csv('./data/prices.csv')

# Function

In [3]:
accuracy = lambda p,y: (p==y).sum()/p.shape[0]

In [4]:
def knn_predict(X,Y,size=0.2, seed=777, K=3):
    splits = TTS(X,
                 Y,
                 train_size=size,
                 stratify=Y,
                 random_state=seed
                )
    X_train, X_test, Y_train, Y_test = splits
    knn = KNN(n_neighbors=K)
    knn.fit(X_train, Y_train)
    prediction = knn.predict(X_test)
    print(accuracy(prediction, Y_test))
    print()
    print(knn.predict_proba(X_test))
    return True

In [5]:
def rfc_predict(X,Y,size=0.2, seed=777):
    splits = TTS(X,
                 Y,
                 train_size=size,
                 stratify=Y,
                 random_state=seed
                )
    X_train, X_test, Y_train, Y_test = splits
    rfc = RFC()
    rfc.fit(X_train, Y_train)
    prediction = rfc.predict(X_test)
    print(accuracy(prediction, Y_test))
    print()
    print(rfc.predict_proba(X_test))
    return True

In [6]:
def rnc_predict(X,Y,size=0.2, seed=777):
    splits = TTS(X,
                 Y,
                 train_size=size,
                 stratify=Y,
                 random_state=seed
                )
    X_train, X_test, Y_train, Y_test = splits
    rnc = RNC()
    rnc.fit(X_train, Y_train)
    prediction = rnc.predict(X_test)
    print(accuracy(prediction, Y_test))
    print()
    print(rnc.predict_proba(X_test))
    return True

# Notebook

In [7]:
data = data.drop(['Unnamed: 0','ID','day','local_currency','created'], axis='columns')
Y = data.available.astype('category').cat.codes
X = data.drop('available', axis='columns')
X.shape, Y.shape

((300000, 2), (300000,))

In [8]:
knn_predict(X,Y)

0.6521166666666667

[[1.         0.        ]
 [0.66666667 0.33333333]
 [1.         0.        ]
 ...
 [0.33333333 0.66666667]
 [1.         0.        ]
 [0.33333333 0.66666667]]


True

# Créer un model de Random Forest

In [9]:
rfc_predict(X,Y)

0.7034833333333333

[[0.99458333 0.00541667]
 [0.70698636 0.29301364]
 [0.97       0.03      ]
 ...
 [0.80341895 0.19658105]
 [0.44451491 0.55548509]
 [0.74378004 0.25621996]]


True

# Utilisation de RadiusNeighborsClassifier

In [10]:
rnc_predict(X,Y)

ValueError: No neighbors found for test samples array([   109,    744,    771,   1173,   1332,   1363,   1732,   2119,
         2404,   2507,   2859,   2866,   3001,   3262,   3286,   3322,
         3341,   3555,   3791,   3900,   4487,   4998,   5113,   5210,
         5288,   5560,   5618,   5631,   6057,   6074,   6138,   6457,
         6744,   6851,   7259,   7597,   7863,   8204,   8370,   8793,
         9098,   9117,   9220,  10054,  10101,  10188,  10195,  10221,
        10331,  10404,  10871,  10957,  11062,  11176,  11396,  11729,
        12049,  12056,  12141,  12837,  13096,  13454,  13589,  13688,
        13798,  13855,  14224,  14916,  14955,  15218,  15676,  15733,
        15750,  16829,  16941,  17052,  17128,  17381,  17629,  17927,
        18076,  18173,  18207,  18466,  18796,  19097,  19493,  19606,
        19855,  20345,  20488,  20718,  20869,  20997,  21089,  21523,
        22838,  23024,  23359,  23427,  23451,  23567,  23872,  24154,
        24297,  24375,  24440,  24551,  24581,  25174,  25385,  25749,
        25761,  26293,  26307,  26808,  26977,  27134,  27436,  27629,
        27724,  28027,  28406,  28733,  29002,  29726,  29958,  30009,
        30683,  30834,  30841,  30904,  31324,  31407,  31458,  31485,
        31698,  31912,  32011,  32504,  33536,  34016,  34046,  34062,
        34620,  34799,  34956,  35013,  35299,  35347,  35369,  35437,
        35447,  35628,  36431,  36476,  36774,  36880,  37175,  37534,
        37554,  37616,  37671,  37899,  38293,  38852,  39064,  39130,
        39654,  39720,  39738,  39760,  40714,  41224,  41663,  41848,
        41906,  41958,  42321,  43452,  43622,  43733,  44141,  44339,
        44507,  45079,  45436,  45670,  46030,  46106,  46287,  46305,
        46317,  46680,  46750,  48070,  48573,  48644,  49037,  49572,
        50652,  51284,  51425,  51838,  53822,  54072,  54103,  54475,
        54723,  54947,  55343,  55492,  56088,  56193,  56347,  56659,
        56738,  57507,  57649,  58758,  58903,  59061,  59082,  59133,
        59351,  59421,  59871,  59933,  60589,  60656,  60782,  60978,
        61527,  61600,  61768,  61885,  62001,  62154,  62230,  62807,
        63094,  63163,  63690,  64289,  64513,  64678,  64819,  64986,
        65253,  65792,  66224,  66290,  66454,  66547,  66764,  66779,
        66975,  67034,  67231,  67252,  67546,  68152,  68868,  68880,
        69307,  70268,  70317,  70364,  70687,  70730,  70737,  71028,
        71246,  71340,  71594,  71753,  72156,  72413,  72529,  72807,
        72985,  73443,  73691,  74041,  74052,  74383,  74474,  74533,
        74567,  74616,  74726,  74817,  74899,  74918,  75225,  75458,
        75750,  75962,  76181,  76322,  76587,  76804,  76911,  77024,
        77044,  77379,  77483,  77819,  77936,  77994,  78087,  78091,
        78116,  78203,  78235,  78757,  79023,  79074,  80202,  80369,
        80948,  81067,  81489,  81665,  81900,  82132,  82139,  82195,
        82302,  82357,  82405,  82473,  82734,  82991,  83333,  83792,
        83806,  83859,  84135,  84491,  84943,  85214,  85359,  85475,
        85849,  85896,  86663,  86749,  86845,  87002,  87250,  87381,
        87391,  87673,  87982,  88210,  88297,  88523,  88538,  89206,
        89306,  89309,  89339,  90051,  90123,  90377,  90502,  90511,
        90795,  91184,  91985,  92091,  92140,  92165,  92212,  92278,
        92485,  92708,  92721,  93442,  94002,  94057,  94118,  94301,
        94903,  95019,  95038,  95514,  95848,  95995,  96309,  96381,
        96515,  96593,  97291,  98657,  99526,  99543,  99868,  99929,
       100180, 100585, 100609, 100717, 100777, 101011, 101616, 101643,
       102001, 102291, 102551, 102598, 102710, 102973, 104115, 104535,
       105103, 105149, 106367, 106426, 106611, 106615, 106667, 107199,
       107648, 107767, 108184, 108295, 108310, 108395, 109045, 109262,
       109303, 109654, 109684, 109818, 109854, 109865, 109959, 110047,
       110247, 110373, 110721, 110845, 110938, 111114, 111210, 111599,
       112375, 113189, 113396, 113610, 113970, 114041, 114351, 114737,
       114959, 115065, 115114, 115247, 115445, 115597, 116054, 116392,
       116497, 116726, 117229, 117458, 117462, 117517, 117730, 118111,
       118255, 118496, 119215, 119458, 119807, 120075, 120089, 120688,
       120809, 121393, 121417, 121463, 121536, 121658, 121762, 122068,
       122164, 122268, 122351, 122524, 122697, 122816, 123079, 123160,
       123266, 123350, 124092, 124198, 125124, 125279, 125346, 125640,
       125822, 126122, 126238, 126650, 126866, 128257, 128705, 128793,
       128794, 128822, 128857, 129497, 129543, 129576, 129642, 129756,
       129895, 130233, 130362, 131177, 132199, 132394, 132888, 133111,
       133125, 133188, 133636, 134125, 134206, 134212, 134711, 135166,
       136091, 136187, 136469, 136494, 136507, 136664, 136666, 137694,
       137747, 137972, 138551, 138585, 139133, 139151, 139251, 139344,
       139385, 139606, 140252, 140280, 140503, 140590, 141240, 141452,
       141539, 141819, 141991, 142450, 142638, 142763, 142820, 143099,
       143245, 143273, 143300, 143311, 143685, 144150, 144456, 144774,
       145393, 145406, 145428, 145533, 145631, 146045, 146691, 146886,
       147222, 147733, 148125, 148624, 148689, 148892, 149334, 149349,
       149432, 150146, 150149, 150201, 150227, 151074, 151146, 151204,
       151744, 151888, 152011, 152155, 152157, 152896, 152936, 153023,
       153111, 153448, 153455, 153640, 153647, 153777, 154144, 154376,
       154442, 154695, 155531, 156099, 156220, 156731, 157014, 157122,
       157209, 157918, 158197, 158239, 158465, 158607, 159120, 159345,
       159347, 159370, 159498, 159517, 160257, 160567, 160589, 160701,
       160844, 161103, 161500, 161544, 161564, 161746, 161861, 162022,
       162171, 162235, 162289, 162919, 162927, 163021, 163164, 163315,
       163383, 163450, 163546, 163962, 164079, 165478, 165695, 165779,
       166209, 166295, 166784, 167178, 167181, 167941, 168351, 168535,
       169090, 169475, 169613, 170556, 170701, 171083, 171283, 172627,
       172764, 172944, 173053, 173343, 173527, 173571, 173842, 173932,
       174171, 174230, 174397, 174711, 174920, 175348, 175443, 176197,
       176514, 176585, 176591, 176910, 177170, 177447, 177556, 177645,
       177679, 177896, 178152, 179107, 179156, 179179, 179456, 179834,
       180023, 180077, 180158, 180766, 180780, 180795, 181209, 182038,
       182453, 182891, 183184, 183323, 183361, 183474, 183534, 183784,
       183819, 184290, 184699, 184746, 184936, 185073, 185467, 185484,
       186197, 186582, 187489, 187620, 187710, 188127, 188227, 188245,
       188294, 188461, 188828, 189328, 189339, 189456, 189536, 189904,
       190079, 190128, 190295, 190892, 191026, 191146, 191600, 192048,
       192071, 192103, 192514, 192893, 193234, 193346, 193633, 193730,
       193764, 193814, 194707, 194781, 194969, 195125, 195129, 195239,
       195554, 195768, 196044, 196739, 197212, 197530, 197560, 197737,
       197838, 198133, 198742, 198828, 199657, 199762, 199823, 199902,
       199906, 199929, 199967, 200412, 200465, 200516, 200573, 201164,
       201590, 201595, 201807, 201902, 202065, 202506, 202617, 202645,
       203191, 204081, 204388, 204848, 204850, 204886, 205073, 205101,
       205822, 205930, 206127, 206175, 206277, 206646, 206721, 206998,
       207574, 207581, 207696, 207869, 208083, 208511, 208672, 208887,
       208893, 209468, 209507, 209621, 210762, 210787, 211457, 211751,
       212083, 212353, 212726, 212955, 213334, 213514, 213860, 214177,
       214179, 214382, 214738, 214756, 215220, 215262, 215589, 216248,
       216645, 217440, 217788, 218491, 218555, 218818, 219102, 219413,
       219464, 219736, 219824, 220314, 220346, 220637, 220747, 220822,
       221099, 221323, 221521, 221523, 221939, 222396, 223220, 223527,
       223649, 223650, 223702, 223999, 224017, 224365, 224559, 224656,
       225502, 225568, 225901, 226060, 226818, 226964, 227127, 227604,
       227649, 227734, 227947, 228203, 228352, 228577, 229126, 229831,
       230729, 230915, 230987, 231358, 231533, 231851, 231946, 232041,
       232208, 232456, 232506, 233051, 233243, 233336, 234128, 234174,
       234284, 234526, 234962, 235282, 235337, 235842, 235923, 235965,
       236164, 236295, 236723, 236731, 236837, 236862, 236983, 237432,
       237661, 237862, 238090, 238168, 238319, 238345, 238421, 238827,
       239789]), you can try using larger radius, giving a label for outliers, or considering removing them from your dataset.