In [1]:
!ls

cat_kfolds.ipynb  cat_train.csv        OHE_LogisticReg.ipynb
cat_test.csv	  cat_train_folds.csv  Untitled.ipynb


In [3]:
import pandas as pd

from sklearn import ensemble
from sklearn import metrics
from sklearn import preprocessing

def run(fold):
    
    df = pd.read_csv("/home/anand/catinthedatii/cat_train_folds.csv")
    
    features =[
        f for f in df.columns if f not in ("id","kfold","target")
    ]
    
    # fill NaN values with NONE. Not that all columns are being converted to string as they are categories.
    for col in features:
        df.loc[:,col] = df[col].astype(str).fillna("NONE")
        
    # now its time to label encode the features
    for col in features:
        
        # initialize Labelencoder for each feature column
        lbl = preprocessing.LabelEncoder()
        
        # fit label encoder on all the data. Note that we are fitting on all data and not just Train
        lbl.fit(df[col])
        
        # transform all the data
        df.loc[:,col] = lbl.transform(df[col])
        
    # get training data using folds
    df_train = df[df.kfold != fold].reset_index(drop=True)
    
    # get validation data using folds
    df_valid = df[df.kfold == fold].reset_index(drop=True)
    
    
    # get training data
    x_train = df_train[features].values
    
    # transform validation data
    x_valid = df_valid[features].values
    
    # initialize logistic regression model
    model = ensemble.RandomForestClassifier(n_jobs=-1)
    
    # fit model on training data (ohe)
    model.fit(x_train,df_train.target.values)
    
    #predict on validation data
    # we need the probability values as we are calcuating AUC
    # we will use the probability of 1s
    valid_preds = model.predict_proba(x_valid)[:,1]
    
    # get roc auc score
    auc = metrics.roc_auc_score(df_valid.target.values,valid_preds)
    
    # print auc
    print(f"Fold: {fold}, AUC = {auc}")
    
if __name__ == "__main__":
       for fold_ in range(5):
            run(fold_)

KeyboardInterrupt: 