In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd

import kraft

## error_axes

In [None]:
for labels in (
    (-1, 0),
    (1, None),
    (2, np.nan),
    (3, np.nan, np.nan),
    ("a", "b", ""),
    ("c", "c", "d"),
):

    print("=" * 80)

    dataframe = pd.DataFrame(0, index=labels, columns=labels)

    dataframe.index.name = "Axis 0"

    dataframe.columns.name = "Axis 1"

    print(dataframe)

    try:

        kraft.dataframe.error_axes(dataframe)

    except (AssertionError, TypeError) as error:

        print(error)

## drop_axis_label

In [None]:
dataframe = pd.DataFrame(np.random.randint(0, high=4, size=(10, 10)))

dataframe.index.name = "Axis 0"

dataframe.columns.name = "Axis 1"

dataframe.iloc[0, :] = None

dataframe.iloc[1, :5] = None

dataframe.iloc[:, -1] = None

dataframe.iloc[-5:, -2] = None

print(dataframe)

for axis in (0, 1):

    for min_good_value in (None, 4, 5):

        for min_good_unique_value in (None, 4):

            if min_good_value is None and min_good_unique_value is None:

                continue

            print("=" * 80)

            print(axis, min_good_value, min_good_unique_value)

            print(
                kraft.dataframe.drop_axis_label(
                    dataframe,
                    axis,
                    min_good_value=min_good_value,
                    min_good_unique_value=min_good_unique_value,
                )
            )

In [None]:
dataframe = pd.DataFrame(np.full((8, 8), 0))

dataframe.index.name = "Axis 0"

dataframe.columns.name = "Axis 1"

print(dataframe)

for axis in (0, 1):

    print("=" * 80)

    print(axis)

    print(
        kraft.dataframe.drop_axis_label(
            dataframe, axis, min_good_value=0, min_good_unique_value=0,
        )
    )

## drop_axes_label

In [None]:
dataframe = pd.DataFrame(np.random.randint(0, high=4, size=(10, 10)))

dataframe.index.name = "Axis 0"

dataframe.columns.name = "Axis 1"

for i in range(dataframe.shape[0]):

    dataframe.iloc[i, :i] = None

print(dataframe)

for min_good_value in (None, 4, 5):

    for min_good_unique_value in (None, 3):

        if min_good_value is None and min_good_unique_value is None:

            continue

        for axis in (0, 1):

            print("=" * 80)

            print(axis, min_good_value, min_good_unique_value)

            print(
                kraft.dataframe.drop_axis_label(
                    dataframe,
                    axis,
                    min_good_value=min_good_value,
                    min_good_unique_value=min_good_unique_value,
                )
            )

In [None]:
dataframe = pd.DataFrame(np.full((8, 8), 0))

dataframe.index.name = "Axis 0"

dataframe.columns.name = "Axis 1"

print(dataframe)

for axis in (0, 1):

    print("=" * 80)

    print(axis)

    print(
        kraft.dataframe.drop_axes_label(
            dataframe, axis, min_good_value=0, min_good_unique_value=0,
        )
    )

## sample

In [None]:
dataframe = pd.DataFrame(np.full((10, 10), 0))

dataframe.index.name = "Axis 0"

dataframe.columns.name = "Axis 1"

print(dataframe)

for axis_0_n, axis_1_n in ((1, None), (None, 1), (1, 1), (0.5, 0.5)):

    print("=" * 80)

    axis_0_p = np.arange(dataframe.shape[0])

    axis_0_p = axis_0_p / axis_0_p.sum()

    axis_1_p = np.arange(dataframe.shape[1])

    axis_1_p = axis_1_p / axis_1_p.sum()

    print(
        kraft.dataframe.sample(
            dataframe,
            axis_0_n,
            axis_1_n,
            axis_0_choice_keyword_arguments=dict(replace=False, p=axis_0_p),
            axis_1_choice_keyword_arguments=dict(replace=False, p=axis_1_p),
        )
    )

## sync_axis

In [None]:
dataframe_0 = pd.DataFrame(0, index=(-1, 0, 8), columns=("a", "b", "k"))

dataframe_1 = pd.DataFrame(1, index=(0, 1, 8), columns=("b", "c", "k"))

dataframe_2 = pd.DataFrame(2, index=(1, 2, 8), columns=("c", "d", "k"))

for dataframe in (dataframe_0, dataframe_1, dataframe_2):

    print("=" * 80)

    dataframe.index.name = "Axis 0"

    dataframe.columns.name = "Axis 1"

    print(dataframe)

for axis in (0, 1):

    for function in ("union", "intersection"):

        print("=" * 80)

        print(axis, function)

        for dataframe in kraft.dataframe.sync_axis(
            (dataframe_0, dataframe_1, dataframe_2), axis, function
        ):

            print(dataframe)

## normalize

In [None]:
dataframe = pd.DataFrame(((np.nan,) * 4, (np.nan, 0, 1, 2), (np.nan, 0.1, 10, 20)))

dataframe.index.name = "Axis 0"

dataframe.columns.name = "Axis 1"

print(dataframe)

for axis in (None, 0, 1):

    print("=" * 80)

    print(axis)

    print(kraft.dataframe.normalize(dataframe, axis, "rank", rank_method="dense"))

## summarize

In [None]:
for size in (
    (3, 3),
    (1000, 1000),
):

    matrix = np.random.normal(size=size)

    matrix[::2, ::4] = np.nan

    dataframe = pd.DataFrame(
        matrix,
        index=("Label {}".format(i) for i in range(matrix.shape[0])),
        columns=("Label {}".format(i) for i in range(matrix.shape[1])),
    )

    dataframe.index.name = "Axis 0"

    dataframe.columns.name = "Axis 1"

    kraft.dataframe.summarize(dataframe)

## pivot

In [None]:
array = np.asarray(
    (
        ("a0", "b0", 0),
        ("a0", "b0", -1),
        ("a0", "b1", 1),
        ("a0", "b2", 2),
        ("a0", "b3", 3),
        ("a0", "b3", None),
        ("a1", "b0", 4),
        ("a1", "b1", 5),
        ("a1", "b2", 6),
        ("a1", "b3", 7),
        ("a1", "b3", np.nan),
        ("a2", "b0", 8),
        ("a2", "b1", 9),
        ("a2", "b2", 10),
        ("a2", "b3", 11),
    )
)


def numpy_max(number_0, number_1):

    return np.max((number_0, number_1))


for function in (None, max, numpy_max):

    print("=" * 80)

    print(function)

    print(
        kraft.dataframe.pivot(
            array[:, 0], array[:, 1], array[:, 2].astype(float), function=function
        )
    )