In [1]:
for name in dir():
    if not name.startswith('_'):
        del globals()[name]
        
        
%load_ext autoreload
%autoreload 2

#magic commands 

In [2]:
import unittest
import warnings
import numpy as np
import pandas as pd
from QRF.utils.uncertainty_utils import Result
from QRF.wrapper.qrf_wrapper import QuantileRegressionForest
from sklearn.utils.validation import check_is_fitted

from QRF.utils.uncertainty_utils import entropy, entropy_based_uncertainty_decomposition, calculate_quantiles, Uncertainty, Result

In [3]:
# several test to assure computations and error free wrapper

In [4]:

class TestQuantileRegressionForest(unittest.TestCase):

    def setUp(self):
        # Suppress specific warnings
        warnings.filterwarnings("ignore", category=DeprecationWarning, message="np.find_common_type is deprecated")
        
        self.X_train = np.random.rand(1000, 10)
        self.y_train = np.random.rand(1000)
        self.sample_weight = np.random.rand(1000)
        self.X_test = np.random.rand(100, 10)
        self.regressor = QuantileRegressionForest(n_estimators=100)
        self.regressor.fit(self.X_train, self.y_train)

    def test_fit(self):
        self.assertIsNotNone(self.regressor.model)

    def test_predict_original(self):
        predictions = self.regressor.predict(self.X_test, prediction_type='RF')
        self.assertEqual(len(predictions), len(self.X_test))
        
    def test_predict_rf_vs_qrf(self):
        predictions_rf = self.regressor.predict(self.X_test, prediction_type='RF')        
        results_df = pd.DataFrame(index=range(len(self.X_test)))
        updated_df = self.regressor.calculate_metrics_and_uncertainties(self.X_test, results_df, results_df.index)
        predictions_avg = updated_df['AVG']
        self.assertTrue((predictions_rf == predictions_avg.values).all())

    def test_predict_quantiles(self):
        quantiles = [50, 25, 75]
        prediction = self.regressor.predict(self.X_test, quantile=quantiles)
        self.assertIsInstance(prediction, Result)
        self.assertTrue(hasattr(prediction, 'median'))
        self.assertTrue(hasattr(prediction, 'lower'))
        self.assertTrue(hasattr(prediction, 'upper'))

    def test_get_params(self):
        params = self.regressor.get_params()
        self.assertIn('n_estimators', params)
        self.assertEqual(params['class_min'], 0)
        self.assertEqual(params['class_max'], 100)

    def test_set_params(self):
        self.regressor.set_params(class_min=10, class_max=90, n_estimators=100)
        params = self.regressor.get_params()
        self.assertEqual(params['class_min'], 10)
        self.assertEqual(params['class_max'], 90)
        self.assertEqual(params['n_estimators'], 100)

    def test_retrieve_sample_values(self):
        values = self.regressor.retrieve_values(self.X_test)
        self.assertEqual(values.shape[0], len(self.X_test))

    def test_retrieve_sample_counts(self):
        counts = self.regressor.retrieve_counts(self.X_test)
        self.assertEqual(counts.shape[0], len(self.X_test))

    def test_calculate_metrics_and_uncertainties(self):
        results_df = pd.DataFrame(index=range(len(self.X_test)))
        sample_indices = np.arange(len(self.X_test))
        updated_df = self.regressor.calculate_metrics_and_uncertainties(self.X_test, results_df, sample_indices)
        self.assertIn('AVG', updated_df.columns)
        self.assertIn('UT', updated_df.columns)
        
    def test_fit_without_sample_weight(self):
        self.regressor.fit(self.X_train, self.y_train)
        check_is_fitted(self.regressor.model)

    def test_fit_with_sample_weight(self):
        self.regressor.fit(self.X_train, self.y_train, sample_weight=self.sample_weight)
        check_is_fitted(self.regressor.model)

    def test_prediction_with_sample_weight_vs_no_sampe_weight(self):
        self.regressor.fit(self.X_train, self.y_train)
        predictions_nw = self.regressor.predict(self.X_train)[0]
        self.regressor.fit(self.X_train, self.y_train, sample_weight=self.sample_weight)
        predictions_w = self.regressor.predict(self.X_train)[0]
        self.assertFalse(np.array_equal(predictions_nw, predictions_w))
        
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)


Aggregating values: 100%|██████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 5304.34it/s]
Aggregating counts: 100%|██████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 4144.53it/s]
Estimating for samples: 100%|██████████████████████████████████████████████████████| 100/100 [00:00<00:00, 4219.11it/s]
2024-08-14 23:10:25,447 - INFO - Calculating Quantiles has been terminated
2024-08-14 23:10:25,452 - INFO - Calculating uncertainties has been terminated - Elapsed time 0.00 s
Aggregating values: 100%|██████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 2581.27it/s]
Estimating for samples: 100%|██████████████████████████████████████████████████████| 100/100 [00:00<00:00, 4958.69it/s]
2024-08-14 23:10:35,914 - INFO - Calculating Quantiles has been terminated - Elapsed time 10.46 s
Aggregating values: 100%|██████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 4694.61it/s]
Aggreg

In [5]:
class TestUtilityFunctions(unittest.TestCase):

    def setUp(self):
        self.sample_counts_matrix = np.array([
            [10, 20, 30],
            [5, 15, 25],
            [20, 20, 20]
        ])
        self.aggregated_leaf_values = np.random.rand(100, 50)  # Increased size for more robust testing

    def test_entropy(self):
        probabilities = np.array([
            [0.1, 0.2, 0.7],
            [0.3, 0.3, 0.4]
        ])
        expected_entropies = -np.sum(probabilities * np.log(probabilities), axis=-1)
        calculated_entropies = entropy(probabilities)
        np.testing.assert_almost_equal(calculated_entropies, expected_entropies, decimal=6)

    def test_entropy_based_uncertainty_decomposition(self):
        uncertainty = entropy_based_uncertainty_decomposition(self.sample_counts_matrix)
        self.assertIsInstance(uncertainty, Uncertainty)
        self.assertEqual(uncertainty.total.shape[0], self.sample_counts_matrix.shape[0])

    def test_calculate_quantiles(self):
        quantiles = calculate_quantiles(self.aggregated_leaf_values, quantile=[50, 25, 75])
        self.assertIsInstance(quantiles, Result)
        self.assertEqual(quantiles.median.shape[0], self.aggregated_leaf_values.shape[0])
        self.assertEqual(quantiles.lower.shape[0], self.aggregated_leaf_values.shape[0])
        self.assertEqual(quantiles.upper.shape[0], self.aggregated_leaf_values.shape[0])

    def test_entropy_calculation_correctness(self):
        # Test case with known entropy values
        probabilities = np.array([
            [0.25, 0.25, 0.25, 0.25],  # Uniform distribution
            [0.1, 0.1, 0.1, 0.7]       # Non-uniform distribution
        ])
        expected_entropies = np.array([
            1.386294361,  # Entropy of uniform distribution
            0.940246      # Corrected entropy for the given non-uniform distribution
        ])
        calculated_entropies = entropy(probabilities)
        np.testing.assert_almost_equal(calculated_entropies, expected_entropies, decimal=3)



if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)


Aggregating values: 100%|██████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 4391.94it/s]
Aggregating counts: 100%|██████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 4963.09it/s]
Estimating for samples: 100%|██████████████████████████████████████████████████████| 100/100 [00:00<00:00, 7925.59it/s]
2024-08-14 23:10:46,775 - INFO - Calculating Quantiles has been terminated - Elapsed time 5.09 s
2024-08-14 23:10:46,780 - INFO - Calculating uncertainties has been terminated - Elapsed time 0.01 s
Aggregating values: 100%|██████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 4293.79it/s]
Estimating for samples: 100%|██████████████████████████████████████████████████████| 100/100 [00:00<00:00, 4818.77it/s]
2024-08-14 23:10:56,687 - INFO - Calculating Quantiles has been terminated - Elapsed time 9.91 s
Aggregating values: 100%|██████████████████████████████████████████████████████████| 100/100 [00:00<00:00