## Métodos de validação-cruzada

### Import

In [1]:
import numpy as np

### Dados

In [2]:
data_x = np.array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
data_y = np.array([ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
groups = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3, 3])

assert data_x.shape[0] == data_y.shape[0]

data = np.stack((data_x, data_y), axis=1)

print("Tamanho do dataset:", data.shape[0])

print(data)


Tamanho do dataset: 10
[[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]]


### Holdout

In [3]:
from sklearn.model_selection import train_test_split

train, valid = train_test_split(data, test_size=0.2, random_state=2 )

print('Treino \n', train)
print('\nValidação \n', valid)

Treino 
 [[5 1]
 [0 0]
 [7 1]
 [2 0]
 [3 0]
 [6 1]
 [9 1]
 [8 1]]

Validação 
 [[4 0]
 [1 0]]


### Stratified Holdout

In [4]:
train, valid = train_test_split(data, test_size=0.2, random_state=0, stratify=data_y)

print('Treino \n', train)
print('\nValidação \n', valid)

Treino 
 [[0 0]
 [6 1]
 [3 0]
 [9 1]
 [2 0]
 [5 1]
 [1 0]
 [7 1]]

Validação 
 [[4 0]
 [8 1]]


### K-fold

In [5]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5)

iteration = 0
for train, valid in kf.split(data):
    train_ = data[train]
    valid_ = data[valid]
    print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\n")
    iteration += 1
   

it -  0 
Train
 [[2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [1 0]] 

it -  1 
Train
 [[0 0]
 [1 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[2 0]
 [3 0]] 

it -  2 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[4 0]
 [5 1]] 

it -  3 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [8 1]
 [9 1]] 
Valid
 [[6 1]
 [7 1]] 

it -  4 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]] 
Valid
 [[8 1]
 [9 1]] 



### Stratified K-fold

In [6]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits = 5, shuffle=True)

iteration = 0
for train, valid in skf.split(data_x, data_y):
    train_ = data[train]
    valid_ = data[valid]
    print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\n")
    iteration += 1

it -  0 
Train
 [[1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [6 1]] 

it -  1 
Train
 [[0 0]
 [1 0]
 [2 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [9 1]] 
Valid
 [[3 0]
 [8 1]] 

it -  2 
Train
 [[0 0]
 [1 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [8 1]
 [9 1]] 
Valid
 [[2 0]
 [7 1]] 

it -  3 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[4 0]
 [5 1]] 

it -  4 
Train
 [[0 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]] 
Valid
 [[1 0]
 [9 1]] 



### Repeated K-fold

In [7]:
from sklearn.model_selection import RepeatedKFold

rkf = RepeatedKFold(n_splits=5, n_repeats=2, random_state=0)

iteration = 0
for train, valid in rkf.split(data_x, data_y):
    train_ = data[train]
    valid_ = data[valid]
    print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\n")
    iteration += 1

it -  0 
Train
 [[0 0]
 [1 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [9 1]] 
Valid
 [[2 0]
 [8 1]] 

it -  1 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]] 
Valid
 [[4 0]
 [9 1]] 

it -  2 
Train
 [[0 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[1 0]
 [6 1]] 

it -  3 
Train
 [[0 0]
 [1 0]
 [2 0]
 [4 0]
 [5 1]
 [6 1]
 [8 1]
 [9 1]] 
Valid
 [[3 0]
 [7 1]] 

it -  4 
Train
 [[1 0]
 [2 0]
 [3 0]
 [4 0]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [5 1]] 

it -  5 
Train
 [[0 0]
 [1 0]
 [2 0]
 [4 0]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[3 0]
 [5 1]] 

it -  6 
Train
 [[0 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[1 0]
 [2 0]] 

it -  7 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]] 
Valid
 [[8 1]
 [9 1]] 

it -  8 
Train
 [[1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [6 1]] 

it -  9 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [5 1]
 [6 1]
 [8 1]
 [9 1]] 
Valid
 [[4 0]
 [7 1]] 



### Group k-fold

In [8]:
from sklearn.model_selection import GroupKFold

gkf = GroupKFold(n_splits=3)

iteration = 0
for train, valid in gkf.split(data_x, data_y, groups=groups):
    train_ = data[train]
    valid_ = data[valid]
    print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\n")
    iteration += 1
    

it -  0 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]] 
Valid
 [[6 1]
 [7 1]
 [8 1]
 [9 1]] 

it -  1 
Train
 [[0 0]
 [1 0]
 [2 0]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[3 0]
 [4 0]
 [5 1]] 

it -  2 
Train
 [[3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [1 0]
 [2 0]] 



### Nested k-fold

In [9]:
from sklearn.model_selection import StratifiedKFold

outer_skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
inner_skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=0)

iteration = 0
for train_outer, test in outer_skf.split(data_x, data_y):
    aux_data_inner_x = data_x[train_outer]
    aux_data_inner_y = data_y[train_outer]
    aux_data_inner = data[train_outer]
    test_ = data[test]
    #print("a\n",aux_data_inner_x, "b\n",  test_)
    for train_inner, valid in inner_skf.split(aux_data_inner_x, aux_data_inner_y):
        train_ = aux_data_inner[train_inner]
        valid_ = aux_data_inner[valid]
    
        print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\nTest\n", test_)
        iteration += 1


it -  0 
Train
 [[0 0]
 [2 0]
 [3 0]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[4 0]
 [6 1]] 
Test
 [[1 0]
 [5 1]]
it -  1 
Train
 [[0 0]
 [2 0]
 [4 0]
 [6 1]
 [7 1]
 [9 1]] 
Valid
 [[3 0]
 [8 1]] 
Test
 [[1 0]
 [5 1]]
it -  2 
Train
 [[2 0]
 [3 0]
 [4 0]
 [6 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [7 1]] 
Test
 [[1 0]
 [5 1]]
it -  3 
Train
 [[0 0]
 [3 0]
 [4 0]
 [6 1]
 [7 1]
 [8 1]] 
Valid
 [[2 0]
 [9 1]] 
Test
 [[1 0]
 [5 1]]
it -  4 
Train
 [[0 0]
 [1 0]
 [3 0]
 [6 1]
 [8 1]
 [9 1]] 
Valid
 [[4 0]
 [5 1]] 
Test
 [[2 0]
 [7 1]]
it -  5 
Train
 [[0 0]
 [1 0]
 [4 0]
 [5 1]
 [6 1]
 [9 1]] 
Valid
 [[3 0]
 [8 1]] 
Test
 [[2 0]
 [7 1]]
it -  6 
Train
 [[1 0]
 [3 0]
 [4 0]
 [5 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [6 1]] 
Test
 [[2 0]
 [7 1]]
it -  7 
Train
 [[0 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [8 1]] 
Valid
 [[1 0]
 [9 1]] 
Test
 [[2 0]
 [7 1]]
it -  8 
Train
 [[1 0]
 [2 0]
 [3 0]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[4 0]
 [5 1]] 
Test
 [[0 0]
 [6 1]]
it -  9 
Train
 [[1 0]
 [2 0]
 [4 0]
 [5 1]
 [7 1]
 [9 1]] 
Vali

### Monte Carlo

In [10]:
from sklearn.model_selection import ShuffleSplit

shuffle = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)

iteration = 0
for train, valid in shuffle.split(data):
    train_ = data[train]
    valid_ = data[valid]
    print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\n")
    iteration += 1

it -  0 
Train
 [[4 0]
 [9 1]
 [1 0]
 [6 1]
 [7 1]
 [3 0]
 [0 0]
 [5 1]] 
Valid
 [[2 0]
 [8 1]] 

it -  1 
Train
 [[1 0]
 [2 0]
 [9 1]
 [8 1]
 [0 0]
 [6 1]
 [7 1]
 [4 0]] 
Valid
 [[3 0]
 [5 1]] 

it -  2 
Train
 [[8 1]
 [4 0]
 [5 1]
 [1 0]
 [0 0]
 [6 1]
 [9 1]
 [7 1]] 
Valid
 [[2 0]
 [3 0]] 

it -  3 
Train
 [[9 1]
 [2 0]
 [7 1]
 [5 1]
 [8 1]
 [0 0]
 [3 0]
 [4 0]] 
Valid
 [[6 1]
 [1 0]] 

it -  4 
Train
 [[7 1]
 [4 0]
 [1 0]
 [0 0]
 [6 1]
 [8 1]
 [9 1]
 [3 0]] 
Valid
 [[5 1]
 [2 0]] 



### Stratified Monte Carlo

In [11]:
from sklearn.model_selection import StratifiedShuffleSplit

sshuffle = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=0)

iteration = 0
for train, valid in sshuffle.split(data_x, data_y):
    train_ = data[train]
    valid_ = data[valid]
    print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\n")
    iteration += 1

it -  0 
Train
 [[0 0]
 [6 1]
 [3 0]
 [9 1]
 [2 0]
 [5 1]
 [1 0]
 [7 1]] 
Valid
 [[4 0]
 [8 1]] 

it -  1 
Train
 [[9 1]
 [2 0]
 [7 1]
 [3 0]
 [6 1]
 [8 1]
 [1 0]
 [4 0]] 
Valid
 [[0 0]
 [5 1]] 

it -  2 
Train
 [[8 1]
 [1 0]
 [7 1]
 [4 0]
 [9 1]
 [2 0]
 [0 0]
 [6 1]] 
Valid
 [[5 1]
 [3 0]] 

it -  3 
Train
 [[2 0]
 [6 1]
 [0 0]
 [5 1]
 [8 1]
 [3 0]
 [7 1]
 [1 0]] 
Valid
 [[4 0]
 [9 1]] 

it -  4 
Train
 [[5 1]
 [2 0]
 [8 1]
 [4 0]
 [9 1]
 [3 0]
 [7 1]
 [1 0]] 
Valid
 [[0 0]
 [6 1]] 



### Time Series

In [12]:
from sklearn.model_selection import TimeSeriesSplit

tscv = TimeSeriesSplit(n_splits=3)

iteration = 0
for train, valid in tscv.split(data):
    train_ = data[train]
    valid_ = data[valid]
    print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\n")
    iteration += 1

it -  0 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]] 
Valid
 [[4 0]
 [5 1]] 

it -  1 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]] 
Valid
 [[6 1]
 [7 1]] 

it -  2 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]] 
Valid
 [[8 1]
 [9 1]] 



### Leave-P-Out 

In [13]:
from sklearn.model_selection import LeavePOut

lpo = LeavePOut(p=2)

iteration = 0 
for train, valid in lpo.split(data):
    train_ = data[train]
    valid_ = data[valid]
    print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\n")
    iteration += 1

it -  0 
Train
 [[2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [1 0]] 

it -  1 
Train
 [[1 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [2 0]] 

it -  2 
Train
 [[1 0]
 [2 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [3 0]] 

it -  3 
Train
 [[1 0]
 [2 0]
 [3 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [4 0]] 

it -  4 
Train
 [[1 0]
 [2 0]
 [3 0]
 [4 0]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [5 1]] 

it -  5 
Train
 [[1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [6 1]] 

it -  6 
Train
 [[1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [7 1]] 

it -  7 
Train
 [[1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [9 1]] 
Valid
 [[0 0]
 [8 1]] 

it -  8 
Train
 [[1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]] 
Valid
 [[0 0]
 [9 1]] 

it -  9 
Train
 [[0 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[1 0]
 [2 0]] 

it -  10 
Train
 [[0

###  Leave-P-Group-Out

In [14]:
from sklearn.model_selection import LeavePGroupsOut

lpgo = LeavePGroupsOut(n_groups=2)
    
iteration = 0
for train, valid in lpgo.split(data_x, data_y, groups=groups):
    train_ = data[train]
    valid_ = data[valid]
    print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\n")
    iteration += 1

it -  0 
Train
 [[6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]] 

it -  1 
Train
 [[3 0]
 [4 0]
 [5 1]] 
Valid
 [[0 0]
 [1 0]
 [2 0]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 

it -  2 
Train
 [[0 0]
 [1 0]
 [2 0]] 
Valid
 [[3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 



### Leave-One-Out

In [15]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()

iteration = 0
for train, valid in loo.split(data):
    train_ = data[train]
    valid_ = data[valid]
    print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\n")
    iteration += 1

it -  0 
Train
 [[1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]] 

it -  1 
Train
 [[0 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[1 0]] 

it -  2 
Train
 [[0 0]
 [1 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[2 0]] 

it -  3 
Train
 [[0 0]
 [1 0]
 [2 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[3 0]] 

it -  4 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[4 0]] 

it -  5 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[5 1]] 

it -  6 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[6 1]] 

it -  7 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [8 1]
 [9 1]] 
Valid
 [[7 1]] 

it -  8 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [9 1]] 
Valid
 [[8 1]] 

it -  9 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]] 
Valid
 [[9 1]] 



### Leave-One-Group-Out

In [16]:
from sklearn.model_selection import LeaveOneGroupOut

logo = LeaveOneGroupOut()

iteration = 0
for train, valid in logo.split(data_x, data_y, groups=groups):
    train_ = data[train]
    valid_ = data[valid]
    print("it - ",iteration,"\nTrain\n", train_, "\nValid\n", valid_, "\n")
    iteration += 1

it -  0 
Train
 [[3 0]
 [4 0]
 [5 1]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[0 0]
 [1 0]
 [2 0]] 

it -  1 
Train
 [[0 0]
 [1 0]
 [2 0]
 [6 1]
 [7 1]
 [8 1]
 [9 1]] 
Valid
 [[3 0]
 [4 0]
 [5 1]] 

it -  2 
Train
 [[0 0]
 [1 0]
 [2 0]
 [3 0]
 [4 0]
 [5 1]] 
Valid
 [[6 1]
 [7 1]
 [8 1]
 [9 1]] 

