In [1]:
#https://www.thekerneltrip.com/machine/learning/computational-complexity-learning-algorithms/

In [2]:
import numpy as np
import pandas as pd
import time
from sklearn.linear_model import LinearRegression
import math


class ComplexityEvaluator:

    def __init__(self, nrow_samples, ncol_samples):
        self._nrow_samples = nrow_samples
        self._ncol_samples = ncol_samples

    def _time_samples(self, model, random_data_generator):
        rows_list = []
        for nrow in self._nrow_samples:
            for ncol in self._ncol_samples:
                train, labels = random_data_generator(nrow, ncol)

                start_time = time.time()
                model.fit(train, labels)
                elapsed_time = time.time() - start_time

                result = {"N": nrow, "P": ncol, "Time": elapsed_time}
                rows_list.append(result)

        return rows_list

    def Run(self, model, random_data_generator):
        data = pd.DataFrame(self._time_samples(model, random_data_generator))
        print(data)
        data = data.applymap(math.log)
        linear_model = LinearRegression(fit_intercept=True)
        linear_model.fit(data[["N", "P"]], data[["Time"]])
        return linear_model.coef_

In [3]:
if __name__ == "__main__":
    class TestModel:

        def __init__(self):
            pass

        def fit(self, x, y):
            time.sleep(x.shape[0] / 1000.)

    def random_data_generator(n, p):
        return np.random.rand(n, p), np.random.rand(n, 1)

    model = TestModel()

    complexity_evaluator = ComplexityEvaluator(
            [200, 500, 1000, 2000, 3000], [1,5,10])

    res = complexity_evaluator.Run(model, random_data_generator)

    print(res)

       N   P      Time
0    200   1  0.212075
1    200   5  0.209584
2    200  10  0.201247
3    500   1  0.505614
4    500   5  0.508529
5    500  10  0.502159
6   1000   1  1.004710
7   1000   5  1.012068
8   1000  10  1.018260
9   2000   1  2.003834
10  2000   5  2.010090
11  2000  10  2.020752
12  3000   1  3.000833
13  3000   5  3.008579
14  3000  10  3.010802
[[ 0.9885217  -0.00229185]]


In [4]:
pip install ComplexityEvaluator

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement ComplexityEvaluator (from versions: none)
ERROR: No matching distribution found for ComplexityEvaluator


In [5]:
import numpy as np
import ComplexityEvaluator
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.svm import SVR, SVC
from sklearn.linear_model import LogisticRegression


def random_data_regression(n, p):
    return np.random.rand(n, p), np.random.rand(n)


def random_data_classification(n, p):
    return np.random.rand(n, p), np.random.binomial(1, 0.5, n)


regression_models = [RandomForestRegressor(),
                     ExtraTreesRegressor(),
                     AdaBoostRegressor(),
                     LinearRegression(),
                     SVR()]

classification_models = [RandomForestClassifier(),
                         ExtraTreesClassifier(),
                         AdaBoostClassifier(),
                         SVC(),
                         LogisticRegression(),
                         LogisticRegression(solver='sag')]

names = ["RandomForestRegressor",
         "ExtraTreesRegressor",
         "AdaBoostRegressor",
         "LinearRegression",
         "SVR",
         "RandomForestClassifier",
         "ExtraTreesClassifier",
         "AdaBoostClassifier",
         "SVC",
         "LogisticRegression(solver=liblinear)",
         "LogisticRegression(solver=sag)"]

complexity_evaluator = ComplexityEvaluator.ComplexityEvaluator(
    [500, 1000, 2000, 5000, 10000, 15000, 20000],
    [5, 10, 20, 50, 100, 200])

i = 0
for model in regression_models:
    res = complexity_evaluator.Run(model, random_data_regression)[0]
    print(names[i] + ' | ' + str(round(res[0], 2)) +
          ' | ' + str(round(res[1], 2)))
    i = i + 1

for model in classification_models:
    res = complexity_evaluator.Run(model, random_data_classification)[0]
    print(names[i] + ' | ' + str(round(res[0], 2)) +
          ' | ' + str(round(res[1], 2)))
    i = i + 1


ModuleNotFoundError: No module named 'ComplexityEvaluator'