In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
import numpy as np
import pandas as pd

from sklearn import datasets, model_selection

In [6]:
def create_folds(data):
    # We Create a new column called kfold and fill it with -1
    data["kfold"] = -1
    
    # The Next Step is to randomize the rows of the data
    data = data.sample(frac=1).reset_index(drop=True)
    
    # Calculate the Number of Bins by Sturge's Rule
    num_bins = np.floor(1 + np.log2(len(data))).astype(int)
    
    # Bin Targets
    data.loc[:, "bins"] = pd.cut(data["target"], bins=num_bins, labels=False)
    
    # Initiate the KFold class from Model_Selection Module
    kf = model_selection.StratifiedKFold(n_splits=5)
    
    # Fill the new KFold Column
    for f, (t_,v_) in enumerate(kf.split(X=data, y=data.bins.values)):
        data.loc[v_,'kfold'] = f
        
    # Drop the Bins Column
    data = data.drop("bins", axis=1)
    
    # Return the Dataframe with Folds
    return data

In [7]:
if __name__ == "__main__":
    # We Create a Sample Dataset with 15000 Samples and 100 Features and 1 Target
    X, y = datasets.make_regression(n_samples=15000, n_features=100, n_targets=1)
    
    df = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])
    
    df.loc[:, 'target'] = y
    
    # Create Folds
    df = create_folds(df)

In [8]:
df.head()

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_92,f_93,f_94,f_95,f_96,f_97,f_98,f_99,target,kfold
0,2.346663,1.564876,-0.171583,-1.354725,1.10181,-0.005706,-1.342539,1.968819,-0.462563,-0.724678,...,-0.909193,-0.150237,0.014994,0.5186,-0.551854,-1.055975,0.648005,1.066946,164.030699,0
1,-0.580335,-0.623197,1.753047,0.58403,0.237739,1.593367,-0.347985,-0.836488,0.169856,1.770541,...,-0.451706,0.262256,-0.81961,0.087452,-0.675803,0.516049,-0.204114,0.507375,90.401744,0
2,1.682355,0.749295,-0.11112,0.614155,-1.156698,0.2,0.799236,1.220454,1.18947,0.65547,...,0.411171,2.161125,0.662132,0.859978,-0.302763,-2.249242,-0.410581,0.236484,26.765317,0
3,0.318067,-0.53795,-0.242779,0.897248,1.42538,-1.259144,-0.704107,-1.143334,1.734103,-0.359828,...,-1.722307,-0.438386,-0.766818,-0.111498,-0.85539,-0.068397,1.220803,0.207716,-315.128405,0
4,0.50448,0.541658,-0.973608,-0.293459,1.403779,0.214683,-1.840469,-0.69729,1.357404,0.009545,...,1.142538,-1.189715,-1.357749,-1.311221,-1.027347,1.418731,-0.539109,0.531824,-150.124725,0
