## Home assignment 05: Bagging and OOB score

Please, fill the lines in the code below.
This is a simplified version of `BaggingRegressor` from `sklearn`. Please, notice, that `sklearn` API is **not preserved**.

Your algorithm should be able to train different instances of the same model class on bootstrapped datasets and to provide [OOB score](https://en.wikipedia.org/wiki/Out-of-bag_error) for the training set.

The model should be passed as model class with no explicit parameters and no parentheses.

Example:
```
import numpy as np
from sklearn.linear_model import LinearRegression

bagging_regressor = SimplifiedBaggingRegressor(num_bags=10, oob=True)
bagging_regressor.fit(LinearRegression, X, y)

```

In [1]:
import numpy as np

In [8]:
a = np.array([[1,2,3], [1,2], [1,2,3,4]], dtype=object)
a.shape

(3,)

In [56]:
class SimplifiedBaggingRegressor:
    def __init__(self, num_bags, oob=False):
        self.num_bags = num_bags
        self.oob = oob
        
    def _generate_splits(self, data: np.ndarray):
        '''
        Generate indices for every bag and store in self.indices_list list
        '''
        self.indices_list = []
        data_length = len(data)
        for bag in range(self.num_bags):
            # Your Code Here
            self.indices_list.append(np.random.choice(data_length, size=data_length))
        
    def fit(self, model_constructor, data, target):
        '''
        Fit model on every bag.
        Model constructor with no parameters (and with no ()) is passed to this function.
        
        example:
        
        bagging_regressor = SimplifiedBaggingRegressor(num_bags=10, oob=True)
        bagging_regressor.fit(LinearRegression, X, y)
        '''
        self.data = None
        self.target = None
        self._generate_splits(data)
        assert len(set(list(map(len, self.indices_list)))) == 1, 'All bags should be of the same length!'
        assert list(map(len, self.indices_list))[0] == len(data), 'All bags should contain `len(data)` number of elements!'
        self.models_list = []
        for bag in range(self.num_bags):
            model = model_constructor()
            data_bag, target_bag = data[self.indices_list[bag]], target[self.indices_list[bag]]  # Your Code Here
            self.models_list.append(model.fit(data_bag, target_bag)) # store fitted models here
        if self.oob:
            self.data = data
            self.target = target
        
    def predict(self, data):
        '''
        Get average prediction for every object from passed dataset
        '''
        # Your code here
        preds_list = []
        for model in self.models_list:
            preds_list.append(model.predict(data))
        return sum(preds_list) / len(preds_list)
    
    def _get_oob_predictions_from_every_model(self):
        '''
        Generates list of lists, where list i contains predictions for self.data[i] object
        from all models, which have not seen this object during training phase
        '''
        list_of_predictions_lists = [[] for _ in range(len(self.data))]
        # Your Code Here
        for i in range(len(self.data)):
            for indices, model in zip(self.indices_list, self.models_list):
                if i not in indices:
                    list_of_predictions_lists[i].append(model.predict(self.data[i].reshape(1,-1)))
        self.list_of_predictions_lists = np.array(list_of_predictions_lists, dtype=object)
    
    def _get_averaged_oob_predictions(self):
        '''
        Compute average prediction for every object from training set.
        If object has been used in all bags on training phase, return None instead of prediction
        '''
        self._get_oob_predictions_from_every_model()
        self.oob_predictions = [np.mean(preds) for preds in self.list_of_predictions_lists]  # Your Code Here
        
    def OOB_score(self):
        '''
        Compute mean square error for all objects, which have at least one prediction
        '''
        self._get_averaged_oob_predictions()
        diff = self.target - self.oob_predictions
        diff = diff[~np.isnan(diff)]
        return np.mean(np.square(diff))  # Your Code Here

### Local tests:

In [33]:
from sklearn.linear_model import LinearRegression
from tqdm import tqdm

#### Simple tests:

In [57]:
for _ in tqdm(range(100)):
    X = np.random.randn(2000, 10)
    y = np.mean(X, axis=1)
    bagging_regressor = SimplifiedBaggingRegressor(num_bags=10, oob=True)
    bagging_regressor.fit(LinearRegression, X, y)
    predictions = bagging_regressor.predict(X)
    assert np.mean((predictions - y)**2) < 1e-6, 'Linear dependency should be fitted with almost zero error!'
    assert bagging_regressor.oob, 'OOB feature must be turned on'
    oob_score = bagging_regressor.OOB_score()
    print(oob_score)
    assert oob_score < 1e-6, 'OOB error for linear dependency should be also close to zero!'
    assert abs(
        np.mean(
            list(map(len, bagging_regressor.list_of_predictions_lists))
        ) / bagging_regressor.num_bags - 1/np.exp(1)) < 0.1, 'Probability of missing a bag should be close to theoretical value!'
    
print('Simple tests done!')

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  1%|          | 1/100 [00:00<01:08,  1.45it/s]

5.844748230903955e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  2%|▏         | 2/100 [00:01<01:06,  1.47it/s]

1.0307589170219728e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  3%|▎         | 3/100 [00:02<01:05,  1.47it/s]

3.056134383400811e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  4%|▍         | 4/100 [00:02<01:04,  1.48it/s]

1.7379787667383027e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  5%|▌         | 5/100 [00:03<01:03,  1.49it/s]

5.324095566532521e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  6%|▌         | 6/100 [00:04<01:02,  1.51it/s]

7.359691600665538e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  7%|▋         | 7/100 [00:04<01:01,  1.51it/s]

1.8108921745834769e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  8%|▊         | 8/100 [00:05<01:00,  1.52it/s]

6.97038161765862e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  9%|▉         | 9/100 [00:05<00:59,  1.52it/s]

6.49760801211635e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 10%|█         | 10/100 [00:06<00:59,  1.52it/s]

1.2091068996831164e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 11%|█         | 11/100 [00:07<00:58,  1.53it/s]

8.947511385666275e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 12%|█▏        | 12/100 [00:07<00:57,  1.53it/s]

2.3989585800095597e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 13%|█▎        | 13/100 [00:08<00:57,  1.52it/s]

1.2393088017311886e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 14%|█▍        | 14/100 [00:09<00:57,  1.49it/s]

1.5047004540529958e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 15%|█▌        | 15/100 [00:09<00:56,  1.49it/s]

5.571574785536839e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 16%|█▌        | 16/100 [00:10<00:57,  1.46it/s]

5.390655122653469e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 17%|█▋        | 17/100 [00:11<00:56,  1.47it/s]

1.091983267502134e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 18%|█▊        | 18/100 [00:12<00:55,  1.49it/s]

3.7219996948970705e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 19%|█▉        | 19/100 [00:12<00:54,  1.49it/s]

6.556960525474232e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 20%|██        | 20/100 [00:13<00:53,  1.50it/s]

4.040747252031724e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 21%|██        | 21/100 [00:14<00:53,  1.49it/s]

3.63842273023438e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 22%|██▏       | 22/100 [00:14<00:53,  1.45it/s]

7.150810434525528e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 23%|██▎       | 23/100 [00:15<00:52,  1.46it/s]

7.851847905509214e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 24%|██▍       | 24/100 [00:16<00:51,  1.47it/s]

9.829845222567404e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 25%|██▌       | 25/100 [00:16<00:50,  1.49it/s]

4.636728524637244e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 26%|██▌       | 26/100 [00:17<00:50,  1.47it/s]

6.474569174281196e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 27%|██▋       | 27/100 [00:18<00:49,  1.46it/s]

4.216043254135423e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 28%|██▊       | 28/100 [00:18<00:49,  1.47it/s]

6.121949337036714e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 29%|██▉       | 29/100 [00:19<00:48,  1.47it/s]

5.024019307358675e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 30%|███       | 30/100 [00:20<00:47,  1.48it/s]

2.673358296706369e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 31%|███       | 31/100 [00:20<00:46,  1.48it/s]

6.438789368932723e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 32%|███▏      | 32/100 [00:21<00:45,  1.50it/s]

1.0971073783452495e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 33%|███▎      | 33/100 [00:22<00:44,  1.49it/s]

1.1117589914833325e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 34%|███▍      | 34/100 [00:22<00:44,  1.50it/s]

8.78289061704495e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 35%|███▌      | 35/100 [00:23<00:43,  1.50it/s]

1.1426805156208434e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 36%|███▌      | 36/100 [00:24<00:42,  1.51it/s]

9.168705246019087e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 37%|███▋      | 37/100 [00:24<00:43,  1.45it/s]

4.541583913845464e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 38%|███▊      | 38/100 [00:25<00:43,  1.43it/s]

1.306170586742569e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 39%|███▉      | 39/100 [00:26<00:42,  1.43it/s]

3.7454766904204567e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 40%|████      | 40/100 [00:26<00:41,  1.46it/s]

4.060997022950753e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 41%|████      | 41/100 [00:27<00:39,  1.49it/s]

8.433715488546833e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 42%|████▏     | 42/100 [00:28<00:38,  1.50it/s]

1.4841121813450396e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 43%|████▎     | 43/100 [00:28<00:37,  1.51it/s]

2.2416255358655844e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 44%|████▍     | 44/100 [00:29<00:36,  1.52it/s]

4.3896879810597403e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 45%|████▌     | 45/100 [00:30<00:35,  1.53it/s]

1.6783506663271074e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 46%|████▌     | 46/100 [00:30<00:35,  1.52it/s]

5.891797515947737e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 47%|████▋     | 47/100 [00:31<00:35,  1.50it/s]

3.217038051405693e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 48%|████▊     | 48/100 [00:32<00:34,  1.50it/s]

5.967523517455475e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 49%|████▉     | 49/100 [00:32<00:33,  1.51it/s]

7.069420760350106e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 50%|█████     | 50/100 [00:33<00:32,  1.52it/s]

1.0922730673447666e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 51%|█████     | 51/100 [00:34<00:31,  1.53it/s]

6.759256116119573e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 52%|█████▏    | 52/100 [00:34<00:31,  1.53it/s]

1.773844137366504e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 53%|█████▎    | 53/100 [00:35<00:30,  1.53it/s]

5.077666283596917e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 54%|█████▍    | 54/100 [00:36<00:29,  1.53it/s]

4.71567029643058e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 55%|█████▌    | 55/100 [00:36<00:29,  1.52it/s]

5.425371366299545e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 56%|█████▌    | 56/100 [00:37<00:28,  1.52it/s]

1.2809544107562255e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 57%|█████▋    | 57/100 [00:38<00:28,  1.52it/s]

3.83015078923976e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 58%|█████▊    | 58/100 [00:38<00:27,  1.51it/s]

3.872626408973026e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 59%|█████▉    | 59/100 [00:39<00:27,  1.51it/s]

1.4259176368096631e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 60%|██████    | 60/100 [00:40<00:26,  1.50it/s]

3.448076377838284e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 61%|██████    | 61/100 [00:40<00:25,  1.51it/s]

8.725831067432837e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 62%|██████▏   | 62/100 [00:41<00:24,  1.53it/s]

4.95278722712243e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 63%|██████▎   | 63/100 [00:42<00:24,  1.52it/s]

1.3265351427562433e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 64%|██████▍   | 64/100 [00:42<00:23,  1.53it/s]

1.0522885161213495e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 65%|██████▌   | 65/100 [00:43<00:22,  1.53it/s]

5.312929100054783e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 66%|██████▌   | 66/100 [00:44<00:22,  1.51it/s]

6.334313464087076e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 67%|██████▋   | 67/100 [00:44<00:22,  1.49it/s]

5.384788759382419e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 68%|██████▊   | 68/100 [00:45<00:21,  1.47it/s]

5.552888540133227e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 69%|██████▉   | 69/100 [00:46<00:20,  1.49it/s]

9.177402052388996e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 70%|███████   | 70/100 [00:46<00:20,  1.49it/s]

5.014863472175339e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 71%|███████   | 71/100 [00:47<00:19,  1.50it/s]

1.2709826179642064e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 72%|███████▏  | 72/100 [00:48<00:18,  1.51it/s]

1.0292644132534558e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 73%|███████▎  | 73/100 [00:48<00:17,  1.52it/s]

2.781856263234612e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 74%|███████▍  | 74/100 [00:49<00:17,  1.53it/s]

6.689899340147804e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 75%|███████▌  | 75/100 [00:50<00:16,  1.52it/s]

6.168045275423414e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 76%|███████▌  | 76/100 [00:50<00:15,  1.50it/s]

1.363442139592408e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 77%|███████▋  | 77/100 [00:51<00:15,  1.50it/s]

1.4157395073188795e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 78%|███████▊  | 78/100 [00:52<00:14,  1.50it/s]

5.019151145276817e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 79%|███████▉  | 79/100 [00:52<00:13,  1.50it/s]

8.189663677767717e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 80%|████████  | 80/100 [00:53<00:13,  1.49it/s]

5.126847700804536e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 81%|████████  | 81/100 [00:54<00:12,  1.48it/s]

1.7629890479620037e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 82%|████████▏ | 82/100 [00:54<00:12,  1.44it/s]

1.1180982793603845e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 83%|████████▎ | 83/100 [00:55<00:11,  1.43it/s]

5.858021845241591e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 84%|████████▍ | 84/100 [00:56<00:11,  1.43it/s]

2.791580143061041e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 85%|████████▌ | 85/100 [00:56<00:10,  1.43it/s]

3.739590354470222e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 86%|████████▌ | 86/100 [00:57<00:09,  1.44it/s]

3.148141263140613e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 87%|████████▋ | 87/100 [00:58<00:08,  1.45it/s]

7.189645639491486e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 88%|████████▊ | 88/100 [00:58<00:08,  1.46it/s]

9.766122669528067e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 89%|████████▉ | 89/100 [00:59<00:07,  1.47it/s]

5.089965162097375e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 90%|█████████ | 90/100 [01:00<00:06,  1.48it/s]

2.107567565356267e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 91%|█████████ | 91/100 [01:00<00:06,  1.49it/s]

7.949546471799247e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 92%|█████████▏| 92/100 [01:01<00:05,  1.50it/s]

7.052391934421971e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 93%|█████████▎| 93/100 [01:02<00:04,  1.52it/s]

6.928356967742788e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 94%|█████████▍| 94/100 [01:02<00:03,  1.52it/s]

4.6676515261049723e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 95%|█████████▌| 95/100 [01:03<00:03,  1.51it/s]

6.81713975166424e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 96%|█████████▌| 96/100 [01:04<00:02,  1.51it/s]

6.07944449568701e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 97%|█████████▋| 97/100 [01:04<00:02,  1.49it/s]

5.000147021474016e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 98%|█████████▊| 98/100 [01:05<00:01,  1.49it/s]

4.785132475975383e-32


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
 99%|█████████▉| 99/100 [01:06<00:00,  1.51it/s]

1.7767438983281645e-31


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 100/100 [01:06<00:00,  1.49it/s]

7.06760776127293e-32
Simple tests done!





#### Medium tests

In [58]:
for _ in tqdm(range(10)):
    X = np.random.randn(200, 150)
    y = np.random.randn(len(X))
    bagging_regressor = SimplifiedBaggingRegressor(num_bags=20, oob=True)
    bagging_regressor.fit(LinearRegression, X, y)
    predictions = bagging_regressor.predict(X)
    average_train_error = np.mean((predictions - y)**2)
    assert bagging_regressor.oob, 'OOB feature must be turned on'
    oob_score = bagging_regressor.OOB_score()
    assert oob_score > average_train_error, 'OOB error must be higher than train error due to overfitting!'
    assert abs(
        np.mean(
            list(map(len, bagging_regressor.list_of_predictions_lists))
        ) / bagging_regressor.num_bags - 1/np.exp(1)) < 0.1, 'Probability of missing a bag should be close to theoretical value!'
    
print('Medium tests done!')

100%|██████████| 10/10 [00:05<00:00,  1.94it/s]

Medium tests done!





#### Complex tests:

In [59]:
for _ in tqdm(range(10)):
    X = np.random.randn(2000, 15)
    y = np.random.randn(len(X))
    bagging_regressor = SimplifiedBaggingRegressor(num_bags=100, oob=True)
    bagging_regressor.fit(LinearRegression, X, y)
    predictions = bagging_regressor.predict(X)
    oob_score = bagging_regressor.OOB_score()
    assert abs(
        np.mean(
            list(map(len, bagging_regressor.list_of_predictions_lists))
        ) / bagging_regressor.num_bags - 1/np.exp(1)) < 1e-2, 'Probability of missing a bag should be close to theoretical value!'
    
print('Complex tests done!')

100%|██████████| 10/10 [01:04<00:00,  6.49s/it]

Complex tests done!





In [60]:
np.mean(
            list(map(len, bagging_regressor.list_of_predictions_lists))
        ) / bagging_regressor.num_bags - 1/np.exp(1)

0.0004505588285576567

Great job! Please, save `SimplifiedBaggingRegressor` to  `bagging.py` and submit your solution to the grading system!