In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd

from sklearn import datasets, model_selection

In [3]:
def create_folds(data):
    # We Create a new column called kfold and fill it with -1
    data["kfold"] = -1
    
    # The Next Step is to randomize the rows of the data
    data = data.sample(frac=1).reset_index(drop=True)
    
    # Calculate the Number of Bins by Sturge's Rule
    num_bins = np.floor(1 + np.log2(len(data))).astype(int)
    
    # Bin Targets
    data.loc[:, "bins"] = pd.cut(data["target"], bins=num_bins, labels=False)
    
    # Initiate the KFold class from Model_Selection Module
    kf = model_selection.StratifiedKFold(n_splits=5)
    
    # Fill the new KFold Column
    for f, (t_,v_) in enumerate(kf.split(X=data, y=data.bins.values)):
        data.loc[v_,'kfold'] = f
        
    # Drop the Bins Column
    data = data.drop("bins", axis=1)
    
    # Return the Dataframe with Folds
    return data

In [4]:
if __name__ == "__main__":
    # We Create a Sample Dataset with 15000 Samples and 100 Features and 1 Target
    X, y = datasets.make_regression(n_samples=15000, n_features=100, n_targets=1)
    
    df = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])
    
    df.loc[:, 'target'] = y
    
    # Create Folds
    df = create_folds(df)

In [5]:
df.head()

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_92,f_93,f_94,f_95,f_96,f_97,f_98,f_99,target,kfold
0,1.587565,0.327385,-0.638824,-0.975825,-0.096222,-0.941838,-0.631028,-0.632221,-2.899928,0.271795,...,0.451587,-0.956746,0.042781,-0.185384,-1.813822,0.745272,-0.741588,2.407092,-15.707728,0
1,0.573021,0.570493,0.642328,0.510449,0.707373,-0.128932,0.458503,0.448267,-0.731004,-0.356891,...,1.016884,-1.649782,0.548072,-0.47034,-2.863078,1.642912,1.260446,0.164154,243.737331,0
2,1.198259,-0.560621,-0.384093,-1.431594,-1.239035,-0.271638,-0.517354,1.042655,0.040729,0.55479,...,0.121923,0.262836,0.53411,-0.242738,-2.258183,0.079556,-0.400833,-0.92532,-49.505239,0
3,-0.525476,0.527694,1.50025,-0.945797,-2.425933,-0.286057,-0.196163,-0.084249,-0.253263,-1.347066,...,-0.591152,-1.493825,0.519337,0.666604,-0.230961,0.190143,-0.023868,0.691379,-80.051604,0
4,-1.111608,0.601834,-1.697506,0.744915,0.474379,-1.0476,-1.674248,0.681145,-0.158304,0.340176,...,-0.124428,0.078212,1.387199,1.265827,0.247887,-0.604471,0.100135,0.190857,-3.701363,0
