In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd

import kraft

## error_axes

In [None]:
for axis_labels in (
    (-1, 0),
    (1, None),
    (2, np.nan),
    (3, np.nan, np.nan),
    ("a", "b", ""),
    ("c", "c", "d"),
):

    print("=" * 80)

    dataframe = pd.DataFrame(0, index=axis_labels, columns=axis_labels)

    print(dataframe)

    try:

        kraft.dataframe.error_axes(dataframe)

    except (AssertionError, TypeError) as error:

        print(error)

## sync_axis

In [None]:
dataframe0 = pd.DataFrame(0, index=(-1, 0, 8), columns=("a", "b", "k"))

dataframe1 = pd.DataFrame(1, index=(0, 1, 8), columns=("b", "c", "k"))

dataframe2 = pd.DataFrame(2, index=(1, 2, 8), columns=("c", "d", "k"))

for axis in (0, 1):

    for function in ("union", "intersection"):

        print("=" * 80)

        print(axis, function)

        print(
            kraft.dataframe.sync_axis(
                (dataframe0, dataframe1, dataframe2), axis, function
            )
        )

## drop_axis_label

In [None]:
dataframe = pd.DataFrame(np.random.randint(0, high=3, size=(10, 10)))

dataframe.iloc[0, :] = None

dataframe.iloc[1, :5] = None

dataframe.iloc[:, -1] = None

dataframe.iloc[-5:, -2] = None

print(dataframe)

for axis in (0, 1):

    for min_good_value in (None, 4, 5):

        for min_good_unique_value in (None, 3):

            if min_good_value is None and min_good_unique_value is None:

                continue

            print("=" * 80)

            print(axis, min_good_value, min_good_unique_value)

            print(
                kraft.dataframe.drop_axis_label(
                    dataframe,
                    axis,
                    min_good_value=min_good_value,
                    min_good_unique_value=min_good_unique_value,
                )
            )

## drop_axes_label

In [None]:
dataframe = pd.DataFrame(np.random.randint(0, high=3, size=(10, 10)))

for i in range(0, dataframe.shape[0]):

    dataframe.iloc[i, :i] = None

print(dataframe)

for min_good_value in (None, 4, 5):

    for min_good_unique_value in (None, 3):

        if min_good_value is None and min_good_unique_value is None:

            continue

        for axis in (0, 1):

            print("=" * 80)

            print(axis, min_good_value, min_good_unique_value)

            print(
                kraft.dataframe.drop_axis_label(
                    dataframe,
                    axis,
                    min_good_value=min_good_value,
                    min_good_unique_value=min_good_unique_value,
                )
            )

## pivot

In [None]:
dataframe = pd.DataFrame(
    (
        ("a0", "b0", 0),
        ("a0", "b1", 1),
        ("a0", "b2", 2),
        ("a1", "b0", 3),
        ("a1", "b1", 4),
        ("a1", "b2", 5),
        ("a2", "b2", -10),
        ("a2", "b2", 10),
    )
)

kraft.dataframe.pivot(dataframe, 0, 1, 2, min)

## normalize

In [None]:
dataframe = pd.DataFrame(((0, 1, 2), (0.1, 10, 20)))

for axis in (None, 0, 1):

    for method in ("-0-", "0-1", "sum", "rank"):

        print("=" * 80)

        print(axis, method)

        print(kraft.dataframe.normalize(dataframe, axis, method))

## sample

## summarize

In [None]:
for size in (
    (3, 3),
    (10, 10),
    (100, 100),
):

    matrix = np.random.normal(size=size)

    matrix[::2, ::4] = np.nan

    dataframe = pd.DataFrame(
        matrix,
        index=pd.Index(
            ("Index{}".format(i) for i in range(matrix.shape[0])), name="Index",
        ),
        columns=pd.Index(
            ("Column{}".format(i) for i in range(matrix.shape[1])), name="Column",
        ),
    )

    kraft.dataframe.summarize(dataframe)

## separate_type

In [None]:
n_row_per_type = 1

n_column = 8

continuous = pd.DataFrame(
    data=np.random.normal(size=(n_row_per_type, n_column)),
    index=pd.Index(
        data=("Continuous {}".format(i) for i in range(n_row_per_type)),
        name="Continuous",
    ),
)

continuous.iloc[0, 0] = np.nan

categorical = pd.DataFrame(
    data=np.random.randint(3, size=(n_row_per_type, n_column)),
    index=pd.Index(
        data=("Categorical {}".format(i) for i in range(n_row_per_type)),
        name="Categorical",
    ),
)

categorical.iloc[0, 1] = np.nan

binary = pd.DataFrame(
    data=np.random.randint(2, size=(n_row_per_type, n_column)),
    index=pd.Index(
        data=("Binary {}".format(i) for i in range(n_row_per_type)), name="Binary",
    ),
)

binary.iloc[0, 2] = np.nan

dataframe = pd.concat((continuous, categorical, binary))

dataframe.index.name = "Index"

dataframe

In [None]:
continuous, binary = kraft.separate_type(dataframe)

In [None]:
continuous

In [None]:
binary