In [54]:
!pip install proglearn
!pip install git+https://github.com/neurodata/treeple.git

Collecting git+https://github.com/neurodata/treeple.git
  Cloning https://github.com/neurodata/treeple.git to /tmp/pip-req-build-w7n461wi
  Running command git clone --filter=blob:none --quiet https://github.com/neurodata/treeple.git /tmp/pip-req-build-w7n461wi
  Resolved https://github.com/neurodata/treeple.git to commit 75c2cf919939574e4240fe261f053162039495cf
  Running command git submodule update --init --recursive -q
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [1]:
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns
from treeple import ObliqueRandomForestClassifier
from proglearn.sims import generate_gaussian_parity
from sklearn.metrics import accuracy_score
from proglearn.sims import generate_spirals
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.datasets import mnist, fashion_mnist
from sklearn.model_selection import train_test_split

In [2]:
class MultiTaskForestClassifier:
    def __init__(self, clf_type="SPORF", **kwargs):
        if clf_type == "SPORF":
            self.model_cls = ObliqueRandomForestClassifier
            self.default_params = {
                "n_estimators": 200,
                "feature_combinations": 2.0,
                "max_depth": 20,
                "min_samples_split": 5,
                "min_samples_leaf": 1,
                "max_features": 0.5,
                "bootstrap": True
            }
        elif clf_type == "MORF":
            self.model_cls = MORFClassifier  # Liora
            self.default_params = { ... }
        elif clf_type == "HonestForest":
            self.model_cls = HonestForestClassifier  # Riya
            self.default_params = { ... }
        else:
            raise ValueError(f"Unsupported tree: {clf_type}")

        self.params = {**self.default_params, **kwargs}
        self.model = None
        self.task_data = {}

    def add_task(self, task_id, X, y):
        self.task_data[task_id] = (X, y)

    def fit(self, task_ids):
        X_all, y_all, task_labels = [], [], []
        for task_id in task_ids:
            X, y = self.task_data[task_id]
            X_all.append(X)
            y_all.append(y)
            task_labels.append(np.full(len(y), task_id))

        X_all = np.vstack(X_all)
        y_all = np.concatenate(y_all)
        task_labels = np.concatenate(task_labels)
        X_all = np.column_stack((X_all, task_labels))

        self.model = self.model_cls(**self.params, random_state=42)
        self.model.fit(X_all, y_all)

    def predict(self, X, task_id):
        X_task = np.column_stack((X, np.full(len(X), task_id)))
        return self.model.predict(X_task)

    def score(self, X, y, task_id):
        return accuracy_score(y, self.predict(X, task_id))

    def evaluate_transfer_general(self, forward_train_ids, forward_test_id, backward_train_ids, backward_test_ids, do_reverse=False):
        """
        - Forward: train on `forward_train_ids`, test on `forward_test_id`
        - Backward: train on `backward_train_ids`, test individually on each in `backward_test_ids`
        - Reverse (not mandatory. default as False but can change to true): train on `forward_test_id`, test on `forward_train_ids`
        """
        results = {}

        # Forward
        self.fit(forward_train_ids)
        X_test, y_test = self.task_data[forward_test_id]
        forward_acc = self.score(X_test, y_test, task_id=forward_test_id)
        results["forward_transfer"] = {
            "train_on": forward_train_ids,
            "test_on": forward_test_id,
            "accuracy": forward_acc
        }

        # Backward
        self.fit(backward_train_ids)
        backward_accuracies = {}
        for tid in backward_test_ids:
            X, y = self.task_data[tid]
            backward_accuracies[f"task{tid}"] = {
                "train_on": backward_train_ids,
                "test_on": tid,
                "accuracy": self.score(X, y, task_id=tid)
            }
        results["backward_transfer"] = backward_accuracies

        # Reverse
        if do_reverse:
            self.fit([forward_test_id])
            reverse_accuracies = {}
            for tid in forward_train_ids:
                X, y = self.task_data[tid]
                reverse_accuracies[f"task{tid}"] = {
                    "train_on": [forward_test_id],
                    "test_on": tid,
                    "accuracy": self.score(X, y, task_id=tid)
                }
            results["reverse_transfer"] = reverse_accuracies

        return results


#TESTing

##XOR/RXOR


In [4]:
X_xor, y_xor = generate_gaussian_parity(1000)
X_rxor, y_rxor = generate_gaussian_parity(1000, angle_params=np.pi / 4)
X_train_xor, X_test_xor, y_train_xor, y_test_xor = train_test_split(X_xor, y_xor, test_size=0.2, random_state=42)
X_train_rxor, X_test_rxor, y_train_rxor, y_test_rxor = train_test_split(X_rxor, y_rxor, test_size=0.2, random_state=42)

###Train on same task and test on the same task (like a base line)

In [12]:
clf = MultiTaskForestClassifier(clf_type="SPORF")
clf.add_task(0, X_train_xor, y_train_xor)
clf.add_task(1, X_train_rxor, y_train_rxor)
clf.fit([0, 1])
acc_xor = clf.score(X_test_xor, y_test_xor, task_id=0)
acc_rxor = clf.score(X_test_rxor, y_test_rxor, task_id=1)

print(f"XOR Accuracy: {acc_xor:.3f}")
print(f"RXOR Accuracy: {acc_rxor:.3f}")

XOR Accuracy: 0.935
RXOR Accuracy: 0.970


In [6]:
help(clf.evaluate_transfer_general)

Help on method evaluate_transfer_general in module __main__:

evaluate_transfer_general(forward_train_ids, forward_test_id, backward_train_ids, backward_test_ids, do_reverse=False) method of __main__.MultiTaskForestClassifier instance
    - Forward: train on `forward_train_ids`, test on `forward_test_id`
    - Backward: train on `backward_train_ids`, test individually on each in `backward_test_ids`
    - Reverse (optional): train on `forward_test_id`, test on `forward_train_ids`



### Forward training, backward training, reverse training

In [11]:
results = clf.evaluate_transfer_general(
    forward_train_ids=[0],
    forward_test_id=1,
    backward_train_ids=[0, 1],
    backward_test_ids=[0, 1],
    do_reverse=True
)

print("Transfer Learning Results:")
fwd = results["forward_transfer"]
print(f"\n Forward Transfer:\n  Train on: {fwd['train_on']}\n  Test on: {fwd['test_on']}\n  Accuracy: {fwd['accuracy']:.3f}")

print("\n Backward Transfer:")
for task, stats in results["backward_transfer"].items():
    print(f"  {task}:")
    print(f"    Train on: {stats['train_on']}")
    print(f"    Test on : {stats['test_on']}")
    print(f"    Accuracy: {stats['accuracy']:.3f}")

if "reverse_transfer" in results:
    print("\n Reverse Transfer:")
    for task, stats in results["reverse_transfer"].items():
        print(f"  {task}:")
        print(f"    Train on: {stats['train_on']}")
        print(f"    Test on : {stats['test_on']}")
        print(f"    Accuracy: {stats['accuracy']:.3f}")

Transfer Learning Results:

 Forward Transfer:
  Train on: [0]
  Test on: 1
  Accuracy: 0.546

 Backward Transfer:
  task0:
    Train on: [0, 1]
    Test on : 0
    Accuracy: 0.978
  task1:
    Train on: [0, 1]
    Test on : 1
    Accuracy: 0.991

 Reverse Transfer:
  task0:
    Train on: [1]
    Test on : 0
    Accuracy: 0.516
