In [1]:
from core.datasets import Datasets
from core.analysis import Analyst
from core.model import Method

import pandas as pd
import numpy as np

In [2]:
# add a dataset from DataFrame Object

data_1 = np.random.random(36).reshape(9, 4)
data_1 = pd.DataFrame(data_1, columns=['A', 'B', 'C', 'D',])
data_1

Unnamed: 0,A,B,C,D
0,0.989227,0.99468,0.111318,0.457317
1,0.958126,0.091807,0.185156,0.652838
2,0.4759,0.578298,0.03888,0.513681
3,0.278391,0.418653,0.243745,0.265278
4,0.233014,0.117707,0.338359,0.681781
5,0.214279,0.230793,0.936803,0.71517
6,0.970462,0.809302,0.441916,0.398462
7,0.469319,0.62425,0.493615,0.478274
8,0.786126,0.472068,0.659113,0.262878


In [3]:
data_2 = np.random.randint(4, size=(9, 4))
data_2 = pd.DataFrame(data_2, columns=['A', 'B', 'C', 'D',])
data_2

Unnamed: 0,A,B,C,D
0,2,0,3,0
1,1,0,3,2
2,2,3,1,2
3,2,0,2,2
4,1,0,3,1
5,1,2,3,2
6,1,0,1,1
7,1,3,2,2
8,3,1,3,3


In [4]:
test_data = Datasets(name='test')  # data will be saved under the 'test' folder
test_data.add_item_from_dataframe(data=data_1, name="basic", description="A random generated dataset for multiply.", save=True)
test_data.add_item_from_dataframe(data=data_2, name='multiplier', category='source', description="A random generated multiplier.")

test_data.report();

+------------+--------------------------------+---------------------+----------+
|    Name    |          Description           |         Path        | Category |
+------------+--------------------------------+---------------------+----------+
|   basic    | A random generated dataset for |    test/basic.csv   |  assets  |
|            |           multiply.            |                     |          |
| multiplier | A random generated multiplier. | test/multiplier.csv |  source  |
+------------+--------------------------------+---------------------+----------+


In [5]:
def multiply_one_by_one(dataset):
    # Retrieve data from dataset
    data = dataset.get_item('basic').obj
    multiplier = dataset.get_item('multiplier').obj
    if not data.shape == multiplier.shape:
        # TODO: check feature
        raise ValueError("Not suitable multiplier")
    else:
        return(data * multiplier)

def find_extreme_multiplier(dataset, how):
    # Retrieve data from dataset
    data = dataset.get_item('basic').obj
    multiplier = dataset.get_item('multiplier').obj

    if how not in ('max', 'min', 'mean', 'median'):
        raise ValueError("Not correct statistic")
    func = getattr(np, how)
    found_multiplier = func(multiplier, axis=0)
    return data * found_multiplier

methods = Method(unit_base='test', name='multiply')
methods.add_function_item(
    # name='multiply_one_by_one',  Default name is just function name.
    function=multiply_one_by_one,
    description='Multiply every element of the data and corresponding multiplier.'
)
methods.add_function_item(
    name='statistical_multiply',
    function=find_extreme_multiplier,
    description='Find a statistic of multiplier data and apply multiply.', 
    parameters={'how': 'Choose a statistical way to sort multiplier data.'}
)

methods.report();

+----------------------+--------------------------------+-------------------------+----------+
|         Name         |          Description           |           Func          |  Params  |
+----------------------+--------------------------------+-------------------------+----------+
| multiply_one_by_one  | Multiply every element of the  |   multiply_one_by_one   |          |
|                      |     data and corresponding     |                         |          |
|                      |          multiplier.           |                         |          |
| statistical_multiply | Find a statistic of multiplier | find_extreme_multiplier | (1) how. |
|                      |    data and apply multiply.    |                         |          |
+----------------------+--------------------------------+-------------------------+----------+


In [6]:
methods.statistical_multiply.parameters

{'how': 'Choose a statistical way to sort multiplier data.'}

In [7]:
analysis = Analyst(unit_base='test', name='multiply')
compare = Analyst(unit_base='test', name='compare_multiply')

analysis.add_analyst_item(
    name='simple_multiply',
    description='Simplest approach: multiply one by one.',
    data_item=test_data,  # Using Datasets object as input here.
    method_item=methods.multiply_one_by_one  # Using MethodItem object as input.
)

# TODO: now use iter to change parameters, expected to use function.
for how in ('max', 'min', 'mean', 'median'):
    method = methods.statistical_multiply
    analysis.add_analyst_item(
        name=f'{how}_multiply',
        description=f'Statistical multiply: {how}.',
        data_item=test_data,  # Using Datasets object as input here.
        method_item=methods.statistical_multiply, # Using MethodItem object as input.
        parameters={'how': how}
    )

analysis.dump_metadata()  # save metadata
analysis.report();

+-----------------+--------------------------------+------+----------------------+--------+
|       Name      |          Description           | Data |        Method        | Check? |
+-----------------+--------------------------------+------+----------------------+--------+
| simple_multiply |  Simplest approach: multiply   | test | multiply_one_by_one  |  None  |
|                 |          one by one.           |      |                      |        |
|   max_multiply  |   Statistical multiply: max.   | test | statistical_multiply |  None  |
|   min_multiply  |   Statistical multiply: min.   | test | statistical_multiply |  None  |
|  mean_multiply  |  Statistical multiply: mean.   | test | statistical_multiply |  None  |
| median_multiply | Statistical multiply: median.  | test | statistical_multiply |  None  |
+-----------------+--------------------------------+------+----------------------+--------+


In [8]:
analysis.do_all()

['simple_multiply',
 'max_multiply',
 'min_multiply',
 'mean_multiply',
 'median_multiply']

In [9]:
analysis.max_multiply.results

Unnamed: 0,A,B,C,D
0,2.96768,2.984041,0.333954,1.371951
1,2.874378,0.27542,0.555469,1.958515
2,1.427701,1.734894,0.116641,1.541042
3,0.835174,1.255959,0.731234,0.795834
4,0.699042,0.353121,1.015076,2.045342
5,0.642836,0.692379,2.810409,2.14551
6,2.911387,2.427905,1.325747,1.195387
7,1.407957,1.872749,1.480846,1.434823
8,2.358378,1.416204,1.977338,0.788635
