In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd

import kraft

## binarize

In [None]:
series = pd.Series([0, 2, 1, None, np.nan, "a"], name="Name")

kraft.binarize(series)

## cast_builtin

In [None]:
for object_ in (
    "False",
    "True",
    "0",
    "1.0",
    None,
):

    output = kraft.cast_builtin(object_)

    print(output, type(output))

## clip_by_standard_deviation

In [None]:
array = np.arange(-2, 3)

for standard_deviation in (-1, 0, 1, 2):

    print(kraft.clip_by_standard_deviation(array, standard_deviation))

## _

In [None]:
point_x_dimension = np.random.random_sample(size=(100, 100))

block_size = int(point_x_dimension.shape[0] * 0.2)

point_x_dimension[:block_size] *= 2

point_x_dimension[-block_size:] *= 1 / 2

point_x_dimension = pd.DataFrame(
    point_x_dimension,
    index=("Index{}".format(i) for i in range(point_x_dimension.shape[0])),
    columns=("Column{}".format(i) for i in range(point_x_dimension.shape[1])),
)

## cluster_hierarchical_clusterings

In [None]:
kraft.cluster_hierarchical_clusterings(point_x_dimension, 0, 3)

## cluster

In [None]:
kraft.cluster(point_x_dimension.values, n_cluster=3)

## compute_bandwidth

In [None]:
for power in range(1, 10):

    array = np.random.normal(size=2 ** power)

    print(array.size, kraft.compute_bandwidth(array))

## compute_coclustering_distance

In [None]:
clustering_x_point = np.array(
    [[1, 1, np.nan, np.nan], [0, 0, 1, np.nan], [1, 0, np.nan, 1],]
)

kraft.compute_coclustering_distance(clustering_x_point)

## compute_entropy

In [None]:
for power in range(9):

    n = 2 ** power

    print(n)

    print(kraft.compute_entropy(np.random.random_sample(size=n)))

    for fill_value in (1, 2):

        print(kraft.compute_entropy(np.full(n, fill_value)))

## compute_ic, compute_icd

In [None]:
for vector_0, vector_1 in (
    (np.asarray((0,) * 8),) * 2,
    (np.asarray((1,) * 8),) * 2,
    (np.asarray(range(8)), np.asarray(range(8))[::-1]),
    (np.asarray((0, 1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1, 0)),) * 2,
    (np.asarray((8, 7, 6, 5, 4, 3, 2, 1, 0, 1, 2, 3, 4, 5, 6, 7, 8)),) * 2,
    (np.asarray((0, 1) * 8), np.asarray((0, 1) * 8)),
    (np.asarray((0, 1) * 8), np.asarray((1, 0) * 8)),
):

    print(vector_0)

    print(vector_1)

    print(kraft.compute_ic(vector_0, vector_1))

    print(kraft.compute_icd(vector_0, vector_1))

In [None]:
for power in range(9):

    n = 2 ** power

    print(n)

    vector_0 = np.random.normal(size=n)

    vector_1 = np.random.normal(size=n)

    print(kraft.compute_ic(vector_0, vector_1))

In [None]:
n = 2

vector_0 = np.asarray((0,) * n + (1,) * n + (2,) * n)

print(vector_0)

for factor in (1, 2, 1 / 2, 10, 1 / 10):

    vector_1 = vector_0 * factor

    print(vector_1)

    print(kraft.compute_ic(vector_0, vector_1))

In [None]:
n = 100

vector_0 = np.random.random_sample(size=n)

vector_1 = np.random.random_sample(size=n)

kraft.compute_ic(vector_0, vector_1)

In [None]:
%%timeit

kraft.compute_ic(vector_0, vector_1)

## compute_kl

In [None]:
for power in range(9):

    n = 2 ** power

    print(n)

    vector_0 = np.random.normal(size=n)

    vector_1 = np.random.normal(size=n)

    print(kraft.compute_kl(vector_0, vector_1))

## compute_margin_of_error

In [None]:
for function in (np.random.random_sample, np.random.normal):

    print(function.__name__)

    for power in range(4):

        array = function(size=10 ** power)

        print(array.size, kraft.compute_margin_of_error(array))

## _

In [None]:
random_values = np.arange(10)

values = np.asarray([0, 1, 8, 9])

## compute_p_value

In [None]:
for value in values:

    print(value)

    for direction in ("<", ">"):

        print(direction)

        print(kraft.compute_p_value(value, random_values, direction))

## compute_p_values_and_q_values

In [None]:
for direction in ("<", ">", "<>"):

    print(kraft.compute_p_values_and_q_values(values, random_values, direction))

## compute_set_enrichment

In [None]:
n_element = 10

element_score = pd.Series(
    np.arange(n_element),
    index=pd.Index(("Element{}".format(i) for i in range(n_element)), name="Element"),
    name="Score",
)

for set_elements in (element_score.index[:2], element_score.index[-2:]):

    kraft.compute_set_enrichment(element_score, set_elements, plot=True)

In [None]:
for n_element in (100,):

    element_score = np.random.normal(size=n_element // 2)

    element_score = pd.Series(
        np.sort(np.concatenate((element_score, -element_score))),
        index=("Element{}".format(i) for i in range(n_element)),
    )

    for n_skip in (0,):

        if n_skip < 1:

            n_skip = int(n_skip * n_element)

        for n_top in (0.1,):

            if n_top < 1:

                n_top = int(n_top * n_element)

            for rate_top_hit in (1,):

                if rate_top_hit < 1:

                    rate_top_hit = int(rate_top_hit * n_top)

                for side in (
                    "-",
                    "+",
                ):

                    set_elements = []

                    if "-" in side:

                        set_elements += element_score.index[
                            n_skip : n_skip + n_top : rate_top_hit
                        ].tolist()

                    if "+" in side:

                        set_elements += element_score.index[
                            -(n_skip + 1) : -(n_skip + 1 + n_top) : -rate_top_hit
                        ][::-1].tolist()

                    for method in ("score pdf k1",):

                        kraft.compute_set_enrichment(
                            element_score,
                            set_elements,
                            method=method,
                            plot_analysis=True,
                            plot=True,
                        )

In [None]:
element_score = pd.read_csv(
    "benchmark4.gene.list.GSEA.txt",
    sep="\t",
    usecols=("gene.list", "gene.scores"),
    index_col=0,
    squeeze=True,
)

set_elements = kraft.read_gmt("c2.all.v5.1.symbols.gmt")["COLLER_MYC_TARGETS_UP"]

In [None]:
kraft.compute_set_enrichment(
    element_score, set_elements, method="score pdf k1", plot_analysis=True, plot=True
)

## _

In [None]:
point_x_dimensions = (
    np.asarray([[0], [1], [2],]),
    np.asarray([[0, 0], [1, 2], [2, 4],]),
    np.asarray([[0, 0, 0], [1, 2, 4], [2, 4, 8],]),
)

## estimate_density

In [None]:
for point_x_dimension in point_x_dimensions:

    kraft.estimate_density(point_x_dimension)

## estimate_pdf

In [None]:
for point_x_dimension in point_x_dimensions:

    kraft.estimate_pdf(point_x_dimension)

## estimate_posterior_pdf

In [None]:
for point_x_dimension in point_x_dimensions:

    kraft.estimate_posterior_pdf(point_x_dimension)

## _

In [None]:
v = np.random.random_sample(size=(50, 50))

r = 5

## factorize_matrices

In [None]:
for mode in ("ws", "hs"):

    ws, hs, errors = kraft.factorize_matrices((v,), r, mode)

    kraft.plot_matrix_factorization(ws, hs, errors=errors)

In [None]:
for mode in ("ws", "hs"):

    ws, hs, errors = kraft.factorize_matrices((v, v * 10), r, mode)

    kraft.plot_matrix_factorization(ws, hs, errors=errors)

## factorize_matrix_by_nmf

In [None]:
w, h, error = kraft.factorize_matrix_by_nmf(v, r)

kraft.plot_matrix_factorization((w,), (h,))

## _

In [None]:
function = kraft.compute_ic

In [None]:
def simulate_vector_and_matrix(shape, matrix_n_category=None):

    if matrix_n_category is None:

        vector = np.random.normal(size=shape[1])

        matrix = np.random.normal(size=shape)

    else:

        vector = np.random.randint(0, high=matrix_n_category, size=shape[1])

        matrix = np.random.randint(0, high=matrix_n_category, size=shape)

    columns = pd.Index(("Column{}".format(i) for i in range(shape[1])), name="Column")

    return (
        pd.Series(vector, index=columns, name="Vector"),
        pd.DataFrame(
            matrix,
            index=pd.Index(("Row{}".format(i) for i in range(shape[0])), name="Row"),
            columns=columns,
        ),
    )

## function_heat_map

In [None]:
for shape in (
    (1, 8),
    (2, 8),
    (8, 8),
    (64, 8),
    (8, 2),
    (8, 4),
    (8, 256),
):

    vector, matrix = simulate_vector_and_matrix(shape)

    kraft.function_heat_map(vector, matrix, function, n_extreme=None)

In [None]:
shape = (10, 20)

vector, matrix = simulate_vector_and_matrix(shape)

# matrix.iloc[0] = np.nan

matrix.iloc[:, 0] = np.nan

matrix.iloc[-2:, -2:] = np.nan

kraft.function_heat_map(vector, matrix, function)

In [None]:
shape = (10, 8)

matrix = simulate_vector_and_matrix(shape)[1]

for vector in (
    ((1, 2, 2, 2, 2, 0, 0, 0)),
    ((1, 1, 2, 2, 2, 0, 0, 0)),
):

    vector = pd.Series(vector, index=matrix.columns)

    for vector_ascending in (None, True, False):

        kraft.function_heat_map(
            vector,
            matrix,
            function,
            vector_ascending=vector_ascending,
            vector_data_type="categorical",
        )

In [None]:
vector = pd.read_csv(
    "function_heat_map_vector.tsv", sep="\t", index_col=0, squeeze=True
)

matrix = pd.read_csv("function_heat_map_matrix.tsv", sep="\t", index_col=0)

kraft.function_heat_map(
    vector, matrix, function, n_sampling=0, n_permutation=0, vector_data_type="binary"
)

kraft.function_heat_map(
    matrix.iloc[0], matrix.iloc[:1], function, n_sampling=0, n_permutation=0, plot_std=3
)

kraft.function_heat_map(
    matrix.iloc[0], matrix.iloc[:2], function, n_sampling=0, n_permutation=0, plot_std=3
)

## function_heat_map_summary

In [None]:
vector, matrix = simulate_vector_and_matrix((4, 10))

matrix_3 = simulate_vector_and_matrix((3, 10), 3)[1]

matrix_2 = simulate_vector_and_matrix((2, 10), 2)[1]

matrix.drop(matrix.columns[-3], axis=1, inplace=True)

matrix_3.drop(matrix_3.columns[-2], axis=1, inplace=True)

matrix_2.drop(matrix_2.columns[-1], axis=1, inplace=True)

matrix_dicts = {
    "Matrix A": {"matrix": matrix, "data_type": "continuous"},
    "Matrix B": {"matrix": matrix_3, "data_type": "categorical"},
    "Matrix C": {"matrix": matrix_2, "data_type": "binary"},
}

statistics_dicts = {
    name: kraft.function_heat_map(vector, matrix_dict["matrix"], function, plot=False)
    for name, matrix_dict in matrix_dicts.items()
}

for plot_only_shared in (False, True):

    kraft.function_heat_map_summary(
        vector, matrix_dicts, statistics_dicts, plot_only_shared=plot_only_shared,
    )

## get_clustering_index

In [None]:
matrix = np.asarray(
    [[9, 19, 9, 19, 9], [0, 10, 0, 10, 0], [1, 11, 1, 11, 1], [8, 18, 8, 18, 8],]
)

for axis in (0, 1):

    print(kraft.get_clustering_index(matrix, axis))

## get_colorscale_color

## infer_assuming_independence

## infer