# Naive Bayes

### 1. Pressupõe que os atributos são independentes entre si, e que contribuem igualmente para a previsão
### 2. Infelizmente, os problemas reais nem sempre atendem estas condições

In [None]:
import pandas as pd

data = {'Dia': ['D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'D13', 'D14'],
        'Clima': ['Sol', 'Sol', 'Nublado', 'Chuva', 'Chuva', 'Chuva', 'Nublado', 'Sol', 'Sol', 'Chuva', 'Sol', 'Nublado', 'Nublado', 'Chuva'],
        'Temperatura': ['Quente', 'Quente', 'Quente', 'Suave', 'Legal', 'Legal', 'Legal', 'Suave', 'Legal', 'Suave', 'Suave', 'Suave', 'Quente', 'Suave'],
        'Umidade': ['Alta', 'Alta', 'Alta', 'Alta', 'Normal', 'Normal', 'Normal', 'Alta', 'Normal', 'Normal', 'Normal', 'Alta', 'Normal', 'Alta'],
        'Vento': ['Fraco', 'Forte', 'Fraco', 'Fraco', 'Fraco', 'Forte', 'Forte', 'Fraco', 'Fraco', 'Fraco', 'Forte', 'Forte', 'Fraco', 'Forte'],
        'Jogar Tênis': ['Não', 'Não', 'Sim', 'Sim', 'Sim', 'Não', 'Sim', 'Não', 'Sim', 'Sim', 'Sim', 'Sim', 'Sim', 'Não']}

df = pd.DataFrame(data)
df

Unnamed: 0,Dia,Clima,Temperatura,Umidade,Vento,Jogar Tênis
0,D1,Sol,Quente,Alta,Fraco,Não
1,D2,Sol,Quente,Alta,Forte,Não
2,D3,Nublado,Quente,Alta,Fraco,Sim
3,D4,Chuva,Suave,Alta,Fraco,Sim
4,D5,Chuva,Legal,Normal,Fraco,Sim
5,D6,Chuva,Legal,Normal,Forte,Não
6,D7,Nublado,Legal,Normal,Forte,Sim
7,D8,Sol,Suave,Alta,Fraco,Não
8,D9,Sol,Legal,Normal,Fraco,Sim
9,D10,Chuva,Suave,Normal,Fraco,Sim


In [None]:
df.groupby('Jogar Tênis').indices

{'Não': array([ 0,  1,  5,  7, 13]),
 'Sim': array([ 2,  3,  4,  6,  8,  9, 10, 11, 12])}

In [None]:
pd.DataFrame(df.groupby('Jogar Tênis').value_counts())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,0
Jogar Tênis,Dia,Clima,Temperatura,Umidade,Vento,Unnamed: 6_level_1
Não,D1,Sol,Quente,Alta,Fraco,1
Não,D14,Chuva,Suave,Alta,Forte,1
Não,D2,Sol,Quente,Alta,Forte,1
Não,D6,Chuva,Legal,Normal,Forte,1
Não,D8,Sol,Suave,Alta,Fraco,1
Sim,D10,Chuva,Suave,Normal,Fraco,1
Sim,D11,Sol,Suave,Normal,Forte,1
Sim,D12,Nublado,Suave,Alta,Forte,1
Sim,D13,Nublado,Quente,Normal,Fraco,1
Sim,D3,Nublado,Quente,Alta,Fraco,1


In [None]:
x = df.groupby('Jogar Tênis').size()

In [None]:
x

Jogar Tênis
Não    5
Sim    9
dtype: int64

In [None]:
prior = x.div(len(df))
prior

Jogar Tênis
Não    0.357143
Sim    0.642857
dtype: float64

In [None]:
df.groupby(['Jogar Tênis', 'Clima']).size().div(x)

Jogar Tênis  Clima  
Não          Chuva      0.400000
             Sol        0.600000
Sim          Chuva      0.333333
             Nublado    0.444444
             Sol        0.222222
dtype: float64

In [None]:
df.groupby(['Jogar Tênis', 'Temperatura']).size().div(x)

Jogar Tênis  Temperatura
Não          Legal          0.200000
             Quente         0.400000
             Suave          0.400000
Sim          Legal          0.333333
             Quente         0.222222
             Suave          0.444444
dtype: float64

In [None]:
df.columns

Index(['Dia', 'Clima', 'Temperatura', 'Umidade', 'Vento', 'Jogar Tênis'], dtype='object')

In [None]:
df.columns[1:-1]

Index(['Clima', 'Temperatura', 'Umidade', 'Vento'], dtype='object')

In [None]:
likelihood = {}
for column in df.columns[1:-1]:
  likelihood[column] = df.groupby(['Jogar Tênis', column]).size().div(x)
likelihood

{'Clima': Jogar Tênis  Clima  
 Não          Chuva      0.400000
              Sol        0.600000
 Sim          Chuva      0.333333
              Nublado    0.444444
              Sol        0.222222
 dtype: float64,
 'Temperatura': Jogar Tênis  Temperatura
 Não          Legal          0.200000
              Quente         0.400000
              Suave          0.400000
 Sim          Legal          0.333333
              Quente         0.222222
              Suave          0.444444
 dtype: float64,
 'Umidade': Jogar Tênis  Umidade
 Não          Alta       0.800000
              Normal     0.200000
 Sim          Alta       0.333333
              Normal     0.666667
 dtype: float64,
 'Vento': Jogar Tênis  Vento
 Não          Forte    0.600000
              Fraco    0.400000
 Sim          Forte    0.333333
              Fraco    0.666667
 dtype: float64}

In [None]:
likelihood['Vento']['Não']['Fraco']

0.4

# Naive Bayes Gaussiano

### 1. Estima a distribuição de probabilidade mais verossímil entre cada atributo e cada label
### 2. Pode ser usado com valores contínuos

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()

In [None]:
data

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0

In [None]:
data['data']

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [None]:
import numpy as np
np.array(data['data'])[:, -2]

array([0.4601, 0.275 , 0.3613, 0.6638, 0.2364, 0.3985, 0.3063, 0.3196,
       0.4378, 0.4366, 0.2948, 0.3792, 0.3176, 0.2809, 0.3596, 0.4218,
       0.3029, 0.3706, 0.2768, 0.2977, 0.3184, 0.245 , 0.4667, 0.2822,
       0.3613, 0.4066, 0.4264, 0.2341, 0.4027, 0.2756, 0.3444, 0.4761,
       0.353 , 0.3672, 0.427 , 0.4863, 0.3591, 0.1987, 0.1565, 0.2807,
       0.2994, 0.2964, 0.467 , 0.3739, 0.3693, 0.3799, 0.3105, 0.39  ,
       0.2747, 0.2871, 0.2433, 0.2346, 0.2785, 0.3021, 0.2675, 0.3306,
       0.3537, 0.3698, 0.2439, 0.322 , 0.3557, 0.2972, 0.2844, 0.3282,
       0.3383, 0.3321, 0.2878, 0.24  , 0.4228, 0.2383, 0.2551, 0.2254,
       0.3313, 0.2589, 0.2618, 0.265 , 0.271 , 0.3751, 0.544 , 0.2779,
       0.2762, 0.3527, 0.2355, 0.2311, 0.3379, 0.3695, 0.302 , 0.3956,
       0.2972, 0.3151, 0.2522, 0.2556, 0.2027, 0.2678, 0.2834, 0.3689,
       0.2227, 0.1934, 0.2772, 0.2718, 0.2651, 0.2932, 0.2694, 0.2622,
       0.2826, 0.3147, 0.2806, 0.2983, 0.4055, 0.2829, 0.2533, 0.2226,
      

In [None]:
data['target']

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [None]:
data = load_breast_cancer(as_frame=True)

In [None]:
data['data']

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [None]:
data['target']

0      0
1      0
2      0
3      0
4      0
      ..
564    0
565    0
566    0
567    0
568    1
Name: target, Length: 569, dtype: int64

In [None]:
data['target'].sum()

357

In [None]:
train_X, test_X, train_Y, test_Y = train_test_split(
    data['data'], data['target'], test_size=0.25, random_state=42
)

In [None]:
# normaliza uma coluna
train_X['mean radius'].apply(lambda x: (x - train_X['mean radius'].mean()) / train_X['mean radius'].std())

287   -0.348728
512   -0.204446
402   -0.328925
446    1.026196
210    1.826821
         ...   
71    -1.480919
106   -0.702361
270    0.047340
435   -0.040361
102   -0.549592
Name: mean radius, Length: 426, dtype: float64

In [None]:
train_X.apply(lambda column: column.apply(
    lambda x: (x - column.mean())/column.std()
))

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
287,-0.348728,-1.436824,-0.411242,-0.390021,-1.861474,-1.267117,-0.825200,-0.951747,-1.727337,-0.940435,...,-0.542048,-1.653710,-0.589171,-0.524943,-1.508895,-0.890453,-0.749336,-0.915634,-0.923999,-0.807462
512,-0.204446,0.312273,-0.133516,-0.275556,1.076806,0.862532,0.725461,0.897385,1.177385,1.472646,...,0.041438,0.689088,0.193900,-0.051873,1.128089,0.922857,1.220782,1.434873,1.148209,1.567269
402,-0.328925,-0.214820,-0.317021,-0.363930,-1.578024,-0.456914,-0.596608,-0.763691,0.275020,-0.500435,...,-0.435389,-0.148811,-0.319783,-0.445508,-1.632477,-0.106626,-0.539257,-0.722863,0.534342,-0.618621
446,1.026196,2.087370,1.045692,0.916506,0.315932,0.561377,1.047296,0.929345,-0.325314,-0.476913,...,1.112207,2.162463,1.164424,0.996524,0.383154,0.859937,1.870619,1.309152,0.152704,0.421141
210,1.826821,0.695183,1.761610,1.781726,-0.333282,0.627437,0.973516,1.264254,-0.131418,-1.711127,...,1.469828,0.387113,1.554448,1.383968,-0.577081,0.296331,0.595068,1.231547,0.050393,-1.404699
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,-1.480919,-1.077550,-1.361685,-1.152840,0.162754,0.982994,-0.017941,-0.502472,0.379426,3.762582,...,-1.354954,-1.632140,-1.340194,-1.042595,-0.452207,-0.027641,-0.624696,-1.005656,-1.013319,1.425069
106,-0.702361,-0.205365,-0.687735,-0.680111,1.334488,-0.015674,-0.213332,-0.340564,0.002818,0.358810,...,-0.642432,0.622720,-0.646314,-0.622776,1.618971,0.122296,0.098518,0.141970,-0.116878,0.433655
270,0.047340,-0.562276,-0.065215,-0.062618,-2.237974,-1.471902,-1.020464,-1.095959,-1.089715,-1.224083,...,-0.272264,-0.805856,-0.376321,-0.334582,-1.960593,-1.321122,-1.151099,-1.231177,-0.682025,-1.259656
435,-0.040361,0.099545,-0.031477,-0.155480,0.747547,0.209706,0.319668,0.445998,-0.489381,0.392017,...,0.173193,0.878238,0.212040,-0.007821,1.296022,0.730080,0.699605,1.087201,0.488870,1.260118


In [None]:
# versão user friendly
dataset = []
for row in train_X.iterrows():
  aux = []
  for column_name, column_value in row[1].items():
    aux.append((column_value - train_X[column_name].mean()) / train_X[column_name].std())
  dataset.append(aux)

[[-0.34872845789219237, -1.4368239636071252, -0.41124241533732414, -0.3900208525015171, -1.8614736060384163, -1.267117190330809, -0.8252002663002076, -0.9517468073852469, -1.7273370864022914, -0.940435156793855, -0.8686921638798919, -1.3570578709040586, -0.8338346518991997, -0.5716346180082692, -0.7449925176443783, -0.6532151556641307, -0.5252176963901221, -0.9456595824641146, -0.5371856655298594, -0.6337494332363395, -0.5420475299157882, -1.6537101186875827, -0.5891712740732437, -0.5249426309924555, -1.5088951243641453, -0.8904529644083746, -0.7493360937277982, -0.9156340093225773, -0.923999435190348, -0.8074617509768242], [-0.20444626301743624, 0.31227294473554695, -0.13351557350465745, -0.2755559561169054, 1.0768064888898066, 0.8625319062493231, 0.7254607636795649, 0.8973854948752126, 1.1773852812181127, 1.4726456537560564, -0.04017550927480558, -0.5090240306950309, 0.10934865367634201, -0.13457015617077828, -0.5242784369383212, -0.14916936335301562, 0.07451267394217784, 0.237193552

In [None]:
train_Y

287    1
512    0
402    1
446    0
210    0
      ..
71     1
106    1
270    1
435    0
102    1
Name: target, Length: 426, dtype: int64

In [None]:
X = train_X.apply(lambda column: column.apply(
    lambda x: (x - column.mean())/column.std()
))

In [None]:
Y = train_Y

In [None]:
X.values

array([[-0.34872846, -1.43682396, -0.41124242, ..., -0.91563401,
        -0.92399944, -0.80746175],
       [-0.20444626,  0.31227294, -0.13351557, ...,  1.43487253,
         1.14820885,  1.56726867],
       [-0.32892502, -0.21481977, -0.31702101, ..., -0.72286267,
         0.53434151, -0.61862091],
       ...,
       [ 0.04734031, -0.56227551, -0.06521534, ..., -1.23117679,
        -0.68202527, -1.25965593],
       [-0.04036063,  0.09954494, -0.03147667, ...,  1.0872012 ,
         0.48886985,  1.26011791],
       [-0.54959191,  0.31227294, -0.60338825, ..., -0.5951245 ,
        -0.29876418, -0.82850727]])

In [None]:
Y.values

array([1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,
       1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1,
       1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1,
       1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0,

In [None]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X.values, Y.values)

In [None]:
test_X.apply(
    lambda column: column.apply(
        lambda x: (x - train_X[column.name].mean())/train_X[column.name].std()
    )
)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
204,-0.467549,-0.141547,-0.444158,-0.485408,0.293027,0.063986,-0.094392,-0.251818,0.465188,0.155413,...,-0.259716,-0.143833,-0.327644,-0.346436,0.490803,-0.066464,-0.003003,-0.173107,0.220912,0.236282
70,1.362854,0.499001,1.304904,1.332844,-0.391260,0.007641,0.261153,0.839014,-0.813785,-1.106473,...,1.808626,0.178053,1.784229,1.755489,-0.512491,-0.094578,0.005038,1.028221,-0.530994,-0.992890
131,0.378341,0.066454,0.403834,0.263663,0.976597,0.384570,0.752174,0.874935,0.487560,-0.642951,...,0.637472,0.081819,0.544619,0.499394,1.007521,-0.055755,0.559888,0.601393,-0.066534,-0.179509
431,-0.487353,-0.359002,-0.428523,-0.524966,0.704600,0.565263,-0.127975,-0.521753,0.040106,1.164092,...,-0.696807,-0.430875,-0.522353,-0.633214,0.594147,0.101546,-0.137695,-0.604592,-0.522874,0.582680
540,-0.730652,-1.124823,-0.709130,-0.707044,0.306626,0.184448,-0.255692,-0.575898,0.066208,0.721325,...,-0.826471,-0.966799,-0.849792,-0.735825,0.142019,-0.240499,-0.442259,-0.674747,-0.891520,-0.114097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,0.146358,-0.935731,0.159846,-0.008471,1.262909,0.609951,0.155062,0.604736,1.177385,0.118055,...,0.026799,-1.205725,0.075986,-0.124054,-0.150788,0.409454,-0.036676,0.419796,0.443398,0.078725
199,0.092605,0.241364,0.107181,-0.034282,0.226459,0.351540,0.388361,0.318426,0.558407,0.284093,...,0.442977,0.765412,0.332978,0.301249,1.033357,1.056730,1.150923,1.104274,3.045026,1.021223
411,-0.872105,-0.559912,-0.862600,-0.790367,0.869230,-0.475372,-0.725215,-0.606008,-0.321586,0.109753,...,-0.795100,0.154824,-0.815022,-0.711765,0.245362,-0.666216,-0.809143,-0.595125,0.194928,-0.258003
18,1.608983,0.697547,1.568230,1.697561,0.197112,0.003755,0.768711,1.247614,-0.813785,-1.197794,...,2.323097,0.891512,2.416128,2.696675,0.861118,0.450285,1.354469,1.957932,-0.178589,-0.409303


In [None]:
tX = test_X.apply(
    lambda column: column.apply(
        lambda x: (x - train_X[column.name].mean())/train_X[column.name].std()
    )
)

In [None]:
tX.values

array([[-0.46754909, -0.14154679, -0.44415819, ..., -0.17310738,
         0.22091188,  0.23628204],
       [ 1.36285444,  0.49900086,  1.30490374, ...,  1.02822124,
        -0.53099442, -0.9928898 ],
       [ 0.37834064,  0.06645392,  0.40383443, ...,  0.6013926 ,
        -0.06653394, -0.17950908],
       ...,
       [-0.87210505, -0.55991186, -0.86259996, ..., -0.5951245 ,
         0.19492808, -0.25800317],
       [ 1.60898289,  0.69754699,  1.56822993, ...,  1.95793163,
        -0.17858909, -0.40930336],
       [-1.09277193, -1.64955197, -1.07531815, ..., -0.70920416,
         0.09586483, -0.348442  ]])

In [None]:
model.predict(tX.values)

array([1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1])

In [None]:
test_Y.values

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1])

In [None]:
y_pred = model.predict(tX.values)

In [None]:
result = (y_pred == test_Y.values)

In [None]:
result

array([ True,  True,  True,  True,  True,  True,  True,  True, False,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True, False,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True,  True,
        True,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,

In [None]:
sum(result)

136

In [None]:
len(result)

143

In [None]:
sum(result)/len(result)

0.951048951048951

In [None]:
10 * "oi"

'oioioioioioioioioioi'