In [2]:
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.ensemble import BaseEnsemble
from scipy.stats import mode
import numpy as np

In [3]:
from rolling_lookahead_dt_pulp import rollo_oct_pulp
from sklearn.utils.validation import check_is_fitted
#from rolling_lookahead_dt_pulp.oct.optimal_tree_pulp import predict_model_pulp

from rolling_lookahead_dt_pulp.rolling_tree.rolling_optimize_pulp import rolling_optimize_pulp
from rolling_lookahead_dt_pulp.oct.tree import *
from rolling_lookahead_dt_pulp.oct.optimal_tree_pulp import *
from helpers.helpers import preprocess_dataframes

# was hiermit eben nicht geht ist, dass man auf Trainingsdaten trainiert (was einem das reine Modell geben sollte). Dabei werden aber leider gleichzeitig
# die Testdaten auf diesen Modell predicted
# Das Resultat ist also, dass man nicht andere Testdaten auf dem fertigen modell testen kann

class CustomTreeWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, train_data, test_data, depth=None, criterion='gini', target_label=None, features=None, time_limit = 1800, big_m = 99):
        self.depth = depth
        self.criterion = criterion
        self.test_data = test_data
        self.train_data = train_data
        self.target_label = target_label
        self.features = features
        self.time_limit = time_limit
        self.big_m = big_m
        self.construct()

    def construct(self):
        train, test = preprocess_dataframes( #./rollo_oct/utils/helpers.py
        train_df = self.train_data,
        test_df = self.test_data,
        target_label = self.target_label,
        features = self.features)

        df = pd.concat([train, test])
        self.P = [int(i) for i in
            list(train.loc[:, train.columns != 'y'].columns)]
        train.columns = ["y", *self.P]
        test.columns = ["y", *self.P]
        self.K = sorted(list(set(df.y)))

        self.result_dict = {} #adding dict to store solutions for every level
        self.result_dict['tree'] = {}
        self.result_dict['tree'][2] = {}
        
        # generate model
        self.main_model = generate_model_pulp(P=self.P, K=self.K, data=train, y_idx=0, big_m=self.big_m, criterion=self.criterion)
    
    def fit(self, X, y):

        self.train_data = pd.concat([y, X], axis=1, ignore_index=False)

        train, test = preprocess_dataframes( #./rollo_oct/utils/helpers.py
                                            train_df = self.train_data,
                                            test_df = self.test_data,
                                            target_label = self.target_label,
                                            features = self.features)
        
        self.P = [int(i) for i in 
            list(train.loc[:, train.columns != 'y'].columns)]
        
        self.main_model = train_model_pulp(model_dict=self.main_model, data=train, P=self.P)

        self.result_dict['tree'][2]['trained_dict'] = self.main_model

        # predict model
        result_train = predict_model_pulp(data=train, model_dict=self.main_model, P=self.P)

        misclassified_leafs = find_misclassification(df=result_train)

        result_test = predict_model_pulp(data=test, model_dict=self.main_model, P=self.P)
        
        
        train_acc = len(result_train.loc[result_train["prediction"] == result_train["y"]]) / \
                    len(result_train["y"])

        test_acc = len(result_test.loc[result_test["prediction"] == result_test["y"]]) / \
                len(result_test["y"])
        
        
        self.result_dict['tree'][2]['train'] = result_train[['y', 'prediction', 'leaf']]
        self.result_dict['tree'][2]['test'] = result_test[['y', 'prediction', 'leaf']]

        self.result_dict[2] = {
        "training_accuracy": train_acc,
        "test_accuracy": test_acc
        }

        train = train.drop(["prediction", "leaf"], axis=1)
        test = test.drop(["prediction", "leaf"], axis=1)

        if self.depth > 2:
            self.result_dict = rolling_optimize_pulp(predefined_model=self.main_model,
                                            train_data=train,
                                            test_data=test,
                                            main_depth=2,
                                            target_depth=self.depth,
                                            features=self.P,
                                            time_limit=self.time_limit,
                                            to_go_deep_nodes=misclassified_leafs,
                                            result_dict=self.result_dict,
                                            criterion=self.criterion)

        self.is_fitted_ = True
        return self
    
    def predict(self, X):
        check_is_fitted(self, 'is_fitted_')

        #print(X)

        model_dict = self.result_dict['tree'][self.depth]['trained_dict']

        dummy = pd.DataFrame({'y': [None]*len(X)}, index=X.index)

        test = pd.concat([dummy, X], axis=1)

        #print(test)

        res = predict_model_pulp(data=test, model_dict=model_dict, P=self.P)

        #print(res)
        
        preds = res['prediction']
        if preds is None:
            raise RuntimeError("No stored predictions found. Run fit first.")
        
        #check = self.result_dict['tree'][self.depth]['test']
        #check = check.drop(columns=['y', 'leaf'])
        #print(check)

        #print(preds.equals(check['prediction']))
        return preds

Forest

In [4]:
def fit_single_tree(args):
    X, y, tree_kwargs, random_state = args
    rng = np.random.RandomState(random_state)
    indices = np.array(X.index)
    sample_indices = rng.choice(indices, size=len(indices), replace=True)
    #sample_indices = rng.choice(indices, size=int(np.sqrt(len(indices))), replace=False) # Picks len(indices) samples with replacement => means some rows will repeat, others will be omitted → those omitted are "out-of-bag" (OOB)
    oob_mask = ~np.isin(indices, sample_indices)
    oob_indices = indices[oob_mask]

    X_boot = X.loc[sample_indices]
    y_boot = y.loc[sample_indices]
    train_data = pd.concat([y_boot, X_boot], axis=1)
    train_data.columns = ['y'] + list(range(X.shape[1]))

    if len(oob_indices) > 0:
        X_oob = X.loc[oob_indices]
        y_oob = y.loc[oob_indices]
        test_data = pd.concat([y_oob, X_oob], axis=1)
        test_data.columns = ['y'] + list(range(X.shape[1]))
    else:
        test_data = train_data.copy()

    tree = CustomTreeWrapper(train_data=train_data, test_data=test_data, **tree_kwargs)
    tree.fit(X_boot, y_boot)
    return tree, (sample_indices, oob_indices)

In [5]:
from sklearn.utils import resample
from scipy.stats import mode
import multiprocessing

class CustomEnsembleClassifier:
    def __init__(self, n_estimators=10, tree_kwargs=None, random_state=None, cores_to_use = 1):
        """
        n_estimators: number of trees in the ensemble
        tree_kwargs: dictionary of keyword args for CustomTreeWrapper (except train_data and test_data)
        random_state: seed for reproducible bootstrap sampling
        """
        self.n_estimators = n_estimators
        self.tree_kwargs = tree_kwargs if tree_kwargs is not None else {}
        self.random_state = random_state
        self.trees_ = []
        self.bootstrap_indices_ = []
        self.cores_to_use = cores_to_use



    def fit(self, X, y):
        """
        # this should gibe option to just give full dataset split into X and y or to do extra preprocessing first (eg crossvalidation) if needed
        X: pd.DataFrame of features
        y: pd.Series of target labels
        """
        
        rng = np.random.RandomState(self.random_state)
        self.trees_ = []
        self.bootstrap_indices_ = []

        indices = np.array(X.index)
        for i in range(self.n_estimators):
            # Bootstrap sample indices
            # returns an array of row indices (can repeat).
            sample_indices = rng.choice(indices, size=len(indices), replace=True) # Picks len(indices) samples with replacement => means some rows will repeat, others will be omitted → those omitted are "out-of-bag" (OOB)
            #sample_indices = rng.choice(indices, size=int(np.sqrt(len(indices))), replace=False) # Picks len(indices) samples with replacement => means some rows will repeat, others will be omitted → those omitted are "out-of-bag" (OOB)
            #oob_mask finds rows not included in sample_indices
            #oob_mask = ~np.in1d(indices, sample_indices) #True for each index not present in the bootstrap sample
            oob_mask = ~np.isin(indices, sample_indices) #True for each index not present in the bootstrap sample
            
            oob_indices = indices[oob_mask] # out-of-bag (OOB) indices for the bootstrap sampling process
            self.bootstrap_indices_.append((sample_indices, oob_indices)) #for this tree: indices chosen as the tree's bootstrap (training) sample; indices not chosen, used for out-of-bag validation

            # Create train_data DataFrame: target as first column, features with integer columns
            #Selects bootstrap feature rows for training
            X_boot = X.loc[sample_indices]
            y_boot = y.loc[sample_indices]
            train_data = pd.concat([y_boot, X_boot], axis=1)
            train_data.columns = ['y'] + list(range(X.shape[1])) # Feature columns are renamed to integers 0, 1, 2, ... so that the underlying tree implementation gets standardized input

            # Out-of-bag for test_data
            if len(oob_indices) > 0:
                X_oob = X.loc[oob_indices]
                y_oob = y.loc[oob_indices]
                test_data = pd.concat([y_oob, X_oob], axis=1)
                test_data.columns = ['y'] + list(range(X.shape[1]))
            else:
                # If somehow no oob sample, just use train_data (edge case)
                test_data = train_data.copy()

            # Initialize and fit the tree
            tree = CustomTreeWrapper(train_data=train_data,
                                     test_data=test_data,
                                     **self.tree_kwargs) # ** operator "unpacks" a dictionary so that each key-value pair becomes a separate keyword 
            tree.fit(X_boot, y_boot)
            self.trees_.append(tree)

        return self
    


    def parallel_fit(self, X, y):
        #num_cores_to_use = 6
        pool = multiprocessing.Pool(processes=self.cores_to_use)
        random_states = [self.random_state + i for i in range(self.n_estimators)] if self.random_state is not None else [None]*self.n_estimators
        args = [(X, y, self.tree_kwargs, rs) for rs in random_states]

        results = pool.map(fit_single_tree, args)
        pool.close()
        pool.join()

        self.trees_ = [res[0] for res in results] #rebuilds the classifier’s internal list of trained trees from the results returned by pool.map()
        self.bootstrap_indices_ = [res[1] for res in results]
        return self
  

    def predict(self, X):
        """
        Majority-vote ensemble prediction.
        Returns: pd.Series with predictions, aligned to X.index
        """
        # Aggregate predictions (each as Series aligned to X.index)
        all_preds = pd.DataFrame()
        for tree in self.trees_:
            pred = tree.predict(X)
            all_preds = pd.concat([all_preds, pred], axis=1)
        # row-wise majority vote (handle multiple modes by picking first)
        maj_vote = all_preds.mode(axis=1)[0]
        maj_vote.index = X.index  # ensure correct alignment
        maj_vote = maj_vote.astype(int)
        return maj_vote

    def predict_proba(self, X):
        """
        For binary classification:
        Returns an array of shape (n_samples, 2)
        """
        all_preds = []
        for tree in self.trees_:
            pred = tree.predict(X)
            if not isinstance(pred, pd.Series):
                pred = pd.Series(pred, index=X.index)
            all_preds.append(pred)
        preds_matrix = pd.concat(all_preds, axis=1)

        # Works for binary or multiclass
        classes_ = np.unique(preds_matrix.values)
        proba = np.zeros((X.shape[0], len(classes_)))
        for i, c in enumerate(classes_):
            proba[:, i] = (preds_matrix == c).sum(axis=1) / self.n_estimators
        return proba

    def oob_score(self, X, y):
        """
        Returns out-of-bag score.
        """
        # Prepare OOB predictions
        oob_votes = {idx: [] for idx in X.index}
        for (sample_ind, oob_ind), tree in zip(self.bootstrap_indices_, self.trees_):
            if len(oob_ind) == 0:
                continue
            X_oob = X.loc[oob_ind]
            preds = tree.predict(X_oob)
            for idx, pred in preds.items():
                oob_votes[idx].append(pred)
        # Only score samples with at least one OOB prediction
        final_oob_preds = []
        final_oob_true = []
        for idx, votes in oob_votes.items():
            if votes:
                final_oob_preds.append(mode(votes)[0][0])
                final_oob_true.append(y.loc[idx])
        if not final_oob_preds:
            raise ValueError("No OOB predictions collected.")
        accuracy = np.mean(np.array(final_oob_preds) == np.array(final_oob_true))
        return accuracy

Testing Tree

In [None]:
from sklearn.model_selection import train_test_split

#data_seismic = pd.read_csv("datasets/seismic/seismic_bin.csv")
data_test = pd.read_csv("datasets/example_datasets/stacked.csv")

data = data_test


X = data.drop(columns=['y'])  # All columns except the target
y = data['y']                 # Only the target column

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=100, stratify=y, random_state=42)

stacked_train = pd.concat([y_train, X_train], axis=1, ignore_index=False)
stacked_test = pd.concat([y_test, X_test],axis=1, ignore_index=False)

train_data = stacked_train
test_data = stacked_test

#feature_columns = train_data.columns[1:]

feature_columns = X_train.columns
#print(feature_columns)

In [7]:
"""
# Usage outside:
wrapper = CustomTreeWrapper(
    depth=8, #breaks predict depth = 6 and upwards
    criterion='gini',
    test_data=test_data,
    train_data= train_data,
    target_label='y',
    features=feature_columns
)
wrapper.fit(X_train, y_train); #semicolon for suppressing when interactive environment (Jupyter) tries to display the returned object, suppresses repr(self)
"""

"\n# Usage outside:\nwrapper = CustomTreeWrapper(\n    depth=8, #breaks predict depth = 6 and upwards\n    criterion='gini',\n    test_data=test_data,\n    train_data= train_data,\n    target_label='y',\n    features=feature_columns\n)\nwrapper.fit(X_train, y_train); #semicolon for suppressing when interactive environment (Jupyter) tries to display the returned object, suppresses repr(self)\n"

In [8]:
#predictions = wrapper.predict(test_data)
#predictions = wrapper.predict(X_test)
#print(predictions)

Forest

In [9]:
# Assume you already have: X_train, X_test, y_train, y_test (pandas DataFrame/Series)
tree_kwargs = dict(depth=8, criterion='gini', target_label='y', features=list(range(X_train.shape[1])))

ensemble = CustomEnsembleClassifier(n_estimators=2, tree_kwargs=tree_kwargs, random_state=42, cores_to_use=6)

In [10]:
#ensemble.fit(X_train, y_train)
ensemble.parallel_fit(X_train, y_train)

{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}



  x = np.array(i[P])
  x = np.array(i[P])


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/97a7e8817bcb491a96bd28d506061e11-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/97a7e8817bcb491a96bd28d506061e11-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13708 RHS
At line 13748 BOUNDS
At line 16487 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0.109862 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts at root node changed objective from 0.109862 to -1.79769e+308
Probing was tried 0 times and cr

  x = np.array(i[P])
  x = np.array(i[P])


{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/03b63351dd214609999684094997d043-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/03b63351dd214609999684094997d043-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13719 RHS
At line 13759 BOUNDS
At line 16498 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0.115207 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 i

  x = np.array(i[P])
  x = np.array(i[P])
  x = np.array(i[P])
  x = np.array(i[P])


{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/58f730289d6842eb81df38fd57301c2f-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/58f730289d6842eb81df38fd57301c2f-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13704 RHS
At line 13744 BOUNDS
At line 16483 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous 

  x = np.array(i[P])


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/c802cf58e2b3433f8e3d3b7589bdc3fe-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/c802cf58e2b3433f8e3d3b7589bdc3fe-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13609 RHS
At line 13649 BOUNDS
At line 16388 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0.069843 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts at root node changed objective from 0.069843 to -1.79769e+308
Probing was tried 0 times and cr

  x = np.array(i[P])
  x = np.array(i[P])
  x = np.array(i[P])


{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/f7a713d81eb740129d35e3f2873b645f-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/f7a713d81eb740129d35e3f2873b645f-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13716 RHS
At line 13756 BOUNDS
At line 16495 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
secon

  x = np.array(i[P])
  x = np.array(i[P])
  x = np.array(i[P])
  x = np.array(i[P])


{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/03ce5a282d6f497ebdd8eaed802fe0bc-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/03ce5a282d6f497ebdd8eaed802fe0bc-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13708 RHS
At line 13748 BOUNDS
At line 16487 ENDATA
Prob

  x = np.array(i[P])
  x = np.array(i[P])


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/272039f01b734360a5f861bffc9a5d48-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/272039f01b734360a5f861bffc9a5d48-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13561 RHS
At line 13601 BOUNDS
At line 16340 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0.0706848 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts at root node changed objective from 0.0706848 to -1.79769e+308
Probing was tried 0 times and 

  x = np.array(i[P])
  x = np.array(i[P])


{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/11a0ebb170864e51aeea39cc577f620a-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/11a0ebb170864e51aeea39cc577f620a-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13723 RHS
At line 13763 BOUNDS
At line 16502 ENDATA
Prob

  x = np.array(i[P])


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/4d36a9462e494e0faceefc95984c8182-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/4d36a9462e494e0faceefc95984c8182-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13377 RHS
At line 13417 BOUNDS
At line 16156 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0.00757576 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts at root node changed objective from 0.00757576 to -1.79769e+308
Probing was tried 0 times an

  x = np.array(i[P])


{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/eb87de7a33274304ab2965ab0a77f364-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/eb87de7a33274304ab2965ab0a77f364-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13656 RHS
At line 13696 BOUNDS
At line 16435 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous 

  x = np.array(i[P])


{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/0775087144a048bd9d874397e79bc5e3-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/0775087144a048bd9d874397e79bc5e3-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13733 RHS
At line 13773 BOUNDS
At line 16512 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous objective value is 99.125 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts at

  x = np.array(i[P])


{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/4a5bc870d074481f9d396481e7ce53f1-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/4a5bc870d074481f9d396481e7ce53f1-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13680 RHS
At line 13720 BOUNDS
At line 16459 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0.187654 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts 

  x = np.array(i[P])


{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/d080c99f66134b4387b0f5bec84be154-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/d080c99f66134b4387b0f5bec84be154-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13541 RHS
At line 13581 BOUNDS
At line 16320 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer 

  x = np.array(i[P])


{'leaf_nodes': [4, 5, 6, 7], 'leaf_nodes_path': {4: [1, 1], 5: [1, 0], 6: [0, 1], 7: [0, 0]}}
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/cd35f81e6af54da58952e4bfcb007f8d-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/cd35f81e6af54da58952e4bfcb007f8d-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13727 RHS
At line 13767 BOUNDS
At line 16506 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0.190476 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts 

  x = np.array(i[P])


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/drood/Obsidian/Files/Bachelorarbeit/rlrf_my_try/.venv/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/8fb13238c7b14787a2a5d2a0c8e2bb3f-pulp.mps -sec 1800 -timeMode elapsed -branch -printingOptions all -solution /tmp/8fb13238c7b14787a2a5d2a0c8e2bb3f-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 44 COLUMNS
At line 13713 RHS
At line 13753 BOUNDS
At line 16492 ENDATA
Problem MODEL has 39 rows, 2738 columns and 5476 elements
Coin0008I MODEL read with 0 errors
seconds was changed from 1e+100 to 1800
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0.25 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts at root node changed objective from 0.25 to -1.79769e+308
Probing was tried 0 times and created 0 

  x = np.array(i[P])


<__main__.CustomEnsembleClassifier at 0x7495612bd250>

In [11]:
y_pred = ensemble.predict(X_test)
print(y_pred)

1660    1
1216    1
2483    1
248     1
455     1
       ..
21      1
1073    1
665     1
1192    1
1250    1
Name: 0, Length: 100, dtype: int64


  x = np.array(i[P])
  x = np.array(i[P])


In [12]:
res = pd.DataFrame()
print(y_test)
res['y'] = y_test
res['prediction'] = y_pred
print(res)
unique_values = res['prediction'].unique()
print(unique_values)

1660    1
1216    1
2483    1
248     1
455     1
       ..
21      1
1073    1
665     1
1192    2
1250    1
Name: y, Length: 100, dtype: int64
      y  prediction
1660  1           1
1216  1           1
2483  1           1
248   1           1
455   1           1
...  ..         ...
21    1           1
1073  1           1
665   1           1
1192  2           1
1250  1           1

[100 rows x 2 columns]
[1 2]
