In [1]:
from merger.metrics import pairwise_kolmogorov_smirnov_test, pairwise_mann_whitney_u_test, corpora, vectorizer, columns_tfidf_cosine_similarity, dataframes_tfidf_cosine_similarity, magnitude
from merger.dataframe_loader import dataframe_loader
import itertools
import os

In [2]:
dfs = dataframe_loader("./test")

In [3]:
root = "./root"

In [4]:
columns_corpus, data_corpus = corpora(dfs)

In [5]:
metrics = {
    "kolmogorov": (pairwise_kolmogorov_smirnov_test, []),
    "mann": (pairwise_mann_whitney_u_test, []),
    "columns_tfidf":(columns_tfidf_cosine_similarity, [vectorizer(columns_corpus)]),
    "dataframes_tfidf":(dataframes_tfidf_cosine_similarity, [vectorizer(data_corpus)]),
    "magnitude":(magnitude, [])
}

In [6]:
for i, (df1, df2) in enumerate(itertools.combinations(dfs, 2)):
    path = "{root}/{i}".format(root=root, i=i)
    os.makedirs(path, exist_ok=True)
    for metric, (callback, args) in metrics.items():
        callback(df1, df2, *args).to_csv("{path}/{metric}.csv".format(path=path, metric=metric))

In [1]:
import numpy as np

In [2]:
def sin(x):
    return np.sin(x)

In [3]:
from bayes_opt import BayesianOptimization

# Bounded region of parameter space
pbounds = {'x': (0, 10)}

optimizer = BayesianOptimization(
    f=sin,
    pbounds=pbounds,
    random_state=1,
)

In [4]:
optimizer.maximize()

|   iter    |  target   |     x     |
-------------------------------------
| [0m 1       [0m | [0m-0.8566  [0m | [0m 4.17    [0m |
| [95m 2       [0m | [95m 0.7956  [0m | [95m 7.203   [0m |
| [0m 3       [0m | [0m 0.001144[0m | [0m 0.001144[0m |
| [0m 4       [0m | [0m 0.118   [0m | [0m 3.023   [0m |
| [95m 5       [0m | [95m 0.9947  [0m | [95m 1.468   [0m |
| [0m 6       [0m | [0m 0.2938  [0m | [0m 9.127   [0m |
| [0m 7       [0m | [0m-0.177   [0m | [0m 6.105   [0m |
| [0m 8       [0m | [0m 0.9814  [0m | [0m 8.047   [0m |
| [0m 9       [0m | [0m 0.9077  [0m | [0m 2.004   [0m |
| [0m 10      [0m | [0m 0.99    [0m | [0m 7.713   [0m |
| [95m 11      [0m | [95m 0.9956  [0m | [95m 1.664   [0m |
| [95m 12      [0m | [95m 0.9998  [0m | [95m 7.873   [0m |
| [95m 13      [0m | [95m 1.0     [0m | [95m 1.566   [0m |
| [95m 14      [0m | [95m 1.0     [0m | [95m 7.855   [0m |
| [0m 15      [0m | [0m 1.0     [0

In [6]:
optimizer.max

{'target': 0.9999999817158507, 'params': {'x': 1.570605098400789}}