## **``KFold``**
##### **It returns the indices of the data points for the training and testing datasets.**

In [1]:
from sklearn.model_selection import KFold

In [2]:
import numpy as np

In [21]:
X = np.random.randint(low=1,high=10,size=4)

In [204]:
X = ['a','b','c','d']

In [84]:
sorted(X)

['a', 'b', 'c', 'd']

In [206]:
kfold = KFold(n_splits=4,shuffle=True)

In [207]:
for train ,test in kfold.split(X):
    print(train)
    print(test)
    print("######")

[1 2 3]
[0]
######
[0 1 3]
[2]
######
[0 1 2]
[3]
######
[0 2 3]
[1]
######


In [87]:
kfold = KFold(n_splits=3,shuffle=True,random_state=33) ## We can add the random_state to get the same result every time

In [88]:
for train ,test in kfold.split(X):
    print(train)
    print(test)
    print("######")

[0 3]
[1 2]
######
[1 2 3]
[0]
######
[0 1 2]
[3]
######


##### **KFold using Numpy indexing**

In [55]:
X = [['a','b'],['c','d'],['c','a'],['e','f']]
y= [0,1,0,1]

In [56]:
X

[['a', 'b'], ['c', 'd'], ['c', 'a'], ['e', 'f']]

In [57]:
y

[0, 1, 0, 1]

In [60]:
X_train, X_test, y_train, y_test = X[0], X[1], y[0], y[1]

In [61]:
X_train

['a', 'b']

In [62]:
X_test

['c', 'd']

In [63]:
y_train

0

In [64]:
y_test

1

## **``Repeated K-Fold``**
##### **It is used when we have to perform the K-Fold multiple number of times. Each time getting a different result.**

In [66]:
from sklearn.model_selection import RepeatedKFold

In [209]:
X = ['Ram','Rahul','Vishu','Raman','Jia','Jai','Jeetu']

In [149]:
rkf = RepeatedKFold(n_splits=2,n_repeats=3)

In [150]:
for train , test in rkf.split(X):
    print(train)
    print(test)
    print('###')

[1 5 6]
[0 2 3 4]
###
[0 2 3 4]
[1 5 6]
###
[1 4 5]
[0 2 3 6]
###
[0 2 3 6]
[1 4 5]
###
[0 3 4]
[1 2 5 6]
###
[1 2 5 6]
[0 3 4]
###


In [101]:
rkf = RepeatedKFold(n_splits=3,n_repeats=3,random_state=2) ## We can random_state to get the same kind of result every time

In [116]:
for train , test in rkf.split(X):
    print(train)
    print(test)
    print('$$$')

[1 2 5 6]
[0 3 4]
$$$
[0 3 4 5 6]
[1 2]
$$$
[0 1 2 3 4]
[5 6]
$$$
[1 2 5 6]
[0 3 4]
$$$
[0 2 3 4 5]
[1 6]
$$$
[0 1 3 4 6]
[2 5]
$$$
[2 3 4 5]
[0 1 6]
$$$
[0 1 3 4 6]
[2 5]
$$$
[0 1 2 5 6]
[3 4]
$$$


## **``Leave One Out``**
##### **In this type of cross-validation technique n different training sets and n different test sets are formed. Test set will always be n -1. Therefore, every time we are training the model in an identical dataset. Because every time we are leaving only 1 sample for the testing purpose.**

In [117]:
from sklearn.model_selection import LeaveOneOut

In [122]:
loo = LeaveOneOut()

In [119]:
X

['Ram', 'Rahul', 'Vishu', 'Raman', 'Jia', 'Jai', 'Jeetu']

In [123]:
for train ,test in loo.split(X):
    print(train)
    print(test)
    print('###')

[1 2 3 4 5 6]
[0]
###
[0 2 3 4 5 6]
[1]
###
[0 1 3 4 5 6]
[2]
###
[0 1 2 4 5 6]
[3]
###
[0 1 2 3 5 6]
[4]
###
[0 1 2 3 4 6]
[5]
###
[0 1 2 3 4 5]
[6]
###


## **``Leave P Out``**
##### **It is similar to LeaveOneOut and it works on the principle of removing p samples fron the entire dataset. The number of samples which we mentioned as p will go into one set.**

In [124]:
from sklearn.model_selection import LeavePOut

In [212]:
lpo = LeavePOut(2)

In [213]:
X

['Ram', 'Rahul', 'Vishu', 'Raman', 'Jia', 'Jai', 'Jeetu']

In [215]:
for train ,test in lpo.split(X[0:4]):
    print(train)
    print(test)
    print('###')

[2 3]
[0 1]
###
[1 3]
[0 2]
###
[1 2]
[0 3]
###
[0 3]
[1 2]
###
[0 2]
[1 3]
###
[0 1]
[2 3]
###


In [129]:
lpo =LeavePOut(5)

In [130]:
X

['Ram', 'Rahul', 'Vishu', 'Raman', 'Jia', 'Jai', 'Jeetu']

In [132]:
for a , b in lpo.split(X):
    print(a)
    print(b)
    print('***')

[5 6]
[0 1 2 3 4]
***
[4 6]
[0 1 2 3 5]
***
[4 5]
[0 1 2 3 6]
***
[3 6]
[0 1 2 4 5]
***
[3 5]
[0 1 2 4 6]
***
[3 4]
[0 1 2 5 6]
***
[2 6]
[0 1 3 4 5]
***
[2 5]
[0 1 3 4 6]
***
[2 4]
[0 1 3 5 6]
***
[2 3]
[0 1 4 5 6]
***
[1 6]
[0 2 3 4 5]
***
[1 5]
[0 2 3 4 6]
***
[1 4]
[0 2 3 5 6]
***
[1 3]
[0 2 4 5 6]
***
[1 2]
[0 3 4 5 6]
***
[0 6]
[1 2 3 4 5]
***
[0 5]
[1 2 3 4 6]
***
[0 4]
[1 2 3 5 6]
***
[0 3]
[1 2 4 5 6]
***
[0 2]
[1 3 4 5 6]
***
[0 1]
[2 3 4 5 6]
***


In [133]:
lpo = LeavePOut(p=3)

In [134]:
X

['Ram', 'Rahul', 'Vishu', 'Raman', 'Jia', 'Jai', 'Jeetu']

In [156]:
rkf = RepeatedKFold(n_splits=2,n_repeats=2)

In [157]:
for a , b in lpo.split(X):
    for a1, b1 in rkf.split(a):
        print(a1)
        print(b1)
        print('%%%%')
    print(b)
    print('&&&&')

[0 2]
[1 3]
%%%%
[1 3]
[0 2]
%%%%
[0 2]
[1 3]
%%%%
[1 3]
[0 2]
%%%%
[0 1 2]
&&&&
[0 2]
[1 3]
%%%%
[1 3]
[0 2]
%%%%
[1 2]
[0 3]
%%%%
[0 3]
[1 2]
%%%%
[0 1 3]
&&&&
[0 1]
[2 3]
%%%%
[2 3]
[0 1]
%%%%
[2 3]
[0 1]
%%%%
[0 1]
[2 3]
%%%%
[0 1 4]
&&&&
[2 3]
[0 1]
%%%%
[0 1]
[2 3]
%%%%
[0 3]
[1 2]
%%%%
[1 2]
[0 3]
%%%%
[0 1 5]
&&&&
[1 2]
[0 3]
%%%%
[0 3]
[1 2]
%%%%
[2 3]
[0 1]
%%%%
[0 1]
[2 3]
%%%%
[0 1 6]
&&&&
[0 3]
[1 2]
%%%%
[1 2]
[0 3]
%%%%
[0 3]
[1 2]
%%%%
[1 2]
[0 3]
%%%%
[0 2 3]
&&&&
[2 3]
[0 1]
%%%%
[0 1]
[2 3]
%%%%
[2 3]
[0 1]
%%%%
[0 1]
[2 3]
%%%%
[0 2 4]
&&&&
[2 3]
[0 1]
%%%%
[0 1]
[2 3]
%%%%
[0 2]
[1 3]
%%%%
[1 3]
[0 2]
%%%%
[0 2 5]
&&&&
[0 2]
[1 3]
%%%%
[1 3]
[0 2]
%%%%
[0 2]
[1 3]
%%%%
[1 3]
[0 2]
%%%%
[0 2 6]
&&&&
[0 1]
[2 3]
%%%%
[2 3]
[0 1]
%%%%
[1 2]
[0 3]
%%%%
[0 3]
[1 2]
%%%%
[0 3 4]
&&&&
[0 3]
[1 2]
%%%%
[1 2]
[0 3]
%%%%
[0 2]
[1 3]
%%%%
[1 3]
[0 2]
%%%%
[0 3 5]
&&&&
[2 3]
[0 1]
%%%%
[0 1]
[2 3]
%%%%
[0 1]
[2 3]
%%%%
[2 3]
[0 1]
%%%%
[0 3 6]
&&&&
[2 3]
[0 1]
%%%%
[0 1]
[2 3]

In [145]:
for a , b in lpo.split(X):
    print(a)
    print(b)
    print('&&&&')

[3 4 5 6]
[0 1 2]
&&&&
[2 4 5 6]
[0 1 3]
&&&&
[2 3 5 6]
[0 1 4]
&&&&
[2 3 4 6]
[0 1 5]
&&&&
[2 3 4 5]
[0 1 6]
&&&&
[1 4 5 6]
[0 2 3]
&&&&
[1 3 5 6]
[0 2 4]
&&&&
[1 3 4 6]
[0 2 5]
&&&&
[1 3 4 5]
[0 2 6]
&&&&
[1 2 5 6]
[0 3 4]
&&&&
[1 2 4 6]
[0 3 5]
&&&&
[1 2 4 5]
[0 3 6]
&&&&
[1 2 3 6]
[0 4 5]
&&&&
[1 2 3 5]
[0 4 6]
&&&&
[1 2 3 4]
[0 5 6]
&&&&
[0 4 5 6]
[1 2 3]
&&&&
[0 3 5 6]
[1 2 4]
&&&&
[0 3 4 6]
[1 2 5]
&&&&
[0 3 4 5]
[1 2 6]
&&&&
[0 2 5 6]
[1 3 4]
&&&&
[0 2 4 6]
[1 3 5]
&&&&
[0 2 4 5]
[1 3 6]
&&&&
[0 2 3 6]
[1 4 5]
&&&&
[0 2 3 5]
[1 4 6]
&&&&
[0 2 3 4]
[1 5 6]
&&&&
[0 1 5 6]
[2 3 4]
&&&&
[0 1 4 6]
[2 3 5]
&&&&
[0 1 4 5]
[2 3 6]
&&&&
[0 1 3 6]
[2 4 5]
&&&&
[0 1 3 5]
[2 4 6]
&&&&
[0 1 3 4]
[2 5 6]
&&&&
[0 1 2 6]
[3 4 5]
&&&&
[0 1 2 5]
[3 4 6]
&&&&
[0 1 2 4]
[3 5 6]
&&&&
[0 1 2 3]
[4 5 6]
&&&&


## **``Shuffle Split``**
##### **It is a better replacement of Kfold as it shuffles the data before splitting into the pairs of train and test sets.**

In [158]:
from sklearn.model_selection import ShuffleSplit

In [160]:
X

['Ram', 'Rahul', 'Vishu', 'Raman', 'Jia', 'Jai', 'Jeetu']

In [159]:
ss = ShuffleSplit(n_splits=3)

In [162]:
for a , b in ss.split(X):
    print(a)
    print(b)
    print('***')

[1 5 6 0 2 3]
[4]
***
[2 3 4 5 1 0]
[6]
***
[5 0 4 6 1 3]
[2]
***


In [163]:
X1 = np.random.randint(low=21,high=50,size=6)

In [164]:
X1

array([33, 36, 32, 30, 47, 41])

In [175]:
ss1 = ShuffleSplit(n_splits=4, test_size=0.20,train_size=0.80)

In [176]:
for a,b in ss1.split(X1):
    print(a)
    print(b)
    print('@@@@')

[4 1 3 2]
[0 5]
@@@@
[5 4 2 3]
[0 1]
@@@@
[0 5 2 4]
[3 1]
@@@@
[1 4 5 3]
[0 2]
@@@@


In [177]:
ss2 = ShuffleSplit(n_splits=2,test_size=0.40,train_size=0.60,random_state=33)

In [183]:
for a,b in ss2.split(X1):
    print(a)
    print(b)
    print('@@@@')

[2 0 4]
[5 1 3]
@@@@
[2 3 1]
[0 4 5]
@@@@


In [186]:
import pandas as pd

In [193]:
df = pd.DataFrame({'col1':['Sachin','Rahul','Saurav','Virat','Dhoni'],
             'col2':['Steve','Ricky','Lara','Klusner','Warne'],
              'Label':['Opener','Opener','Opener','Non-Opener','Non-Opener']})

In [194]:
df

Unnamed: 0,col1,col2,Label
0,Sachin,Steve,Opener
1,Rahul,Ricky,Opener
2,Saurav,Lara,Opener
3,Virat,Klusner,Non-Opener
4,Dhoni,Warne,Non-Opener


In [195]:
ss3 = ShuffleSplit(n_splits=2,test_size=0.20,train_size=0.80)

In [202]:
df_train_indices = []
df_test_indices = []

for a , b in ss3.split(df[['col1','col2']]):
    df_train_indices.append(a)
    df_test_indices.append(b)
    print(a)
    print(b)

[3 1 4 0]
[2]
[2 3 4 1]
[0]


In [200]:
df_train_indices

[array([0, 2, 1, 3]), array([4, 3, 1, 0])]

In [201]:
df_test_indices

[array([4]), array([2])]