In [370]:
#| default_exp core

# module name here

> API details.

In [371]:
#| export
from nbdev.showdoc import *
from fastcore.basics import *
from fastcore.utils import *
from fastcore.test import *
from fastcore.parallel import *
from sklearn.tree import DecisionTreeClassifier
from functools import partial
import pandas as pd
import os

In [372]:
#| export
class RootOneR:
    def __init__(self, data, target, metric, n_jobs):
        self.data = data 
        self.target = target
        self.metric = metric
        self.n_jobs = n_jobs or os.cpu_count()//2
        self.cats = list(data.select_dtypes(include='category').columns.values)
        self.conts = list(data.columns.difference([self.target]+self.cats).values)
        self.X_train = None
        self.y_train = None
        self.X_test = None
        self.y_test = None        
        self.lookup = {c: self.data[c].cat.categories for c in self.cats}        
        self.data[self.cats] = self.data[self.cats].apply(lambda x: x.cat.codes)
        if target in self.cats:
            self.cats.remove(target)
        self.result_df = None

    def __str__(self): 
        return f"Categoricals {self.cats}\nContinuous {self.conts}\nTarget {self.target}"

In [374]:
#| export
def _xs_y(df_, targ): 
    if not isinstance(targ, list):
        xs = df_[df_.columns.difference([targ])].copy()
    else:
        xs = df_[df_.columns.difference(targ)].copy()
    y = df_[targ].copy()
    return xs, y

In [375]:
#| export
@patch
def train_test_split(self:RootOneR, fraction: float, fix_rng: int):
    self.X_train, self.y_train = _xs_y(df_=self.data.sample(frac=fraction, random_state=fix_rng), targ=self.target)    
    self.X_test, self.y_test = _xs_y(df_=self.data.drop(self.X_train.index), targ=self.target)

In [376]:
#| export
def evaluate_one_feature(feature, index, oner: RootOneR):    
    rootnode = DecisionTreeClassifier(max_depth=1, criterion='gini')
    rootnode.fit(oner.X_train[feature].array.reshape(-1,1), oner.y_train)
    preds = rootnode.predict(oner.X_test[feature].array.reshape(-1,1))
    ras = round(oner.metric(oner.y_test, preds), 4)
    return feature, ras, rootnode, preds

In [377]:
#| export
def evaluate_all_features(oner: RootOneR):
    features, metrics, rootnodes, predictions = zip(*parallel(f=partial(evaluate_one_feature, index='', oner=oner), items=oner.conts+oner.cats, n_workers=oner.n_jobs))
    oner.result_df = pd.DataFrame(index=features, data=metrics, columns=[oner.metric.__name__]).sort_values(by=oner.metric.__name__, ascending=False)
