# Threshold

In [None]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from skimage.filters import threshold_otsu

plt.rcParams['font.size'] = 16
plt.rcParams['axes.facecolor'] = 'white'

%matplotlib inline

In [None]:
orig_pcc = pd.read_csv(
    './../data/train/orig_pcc_prep.csv', sep=',', index_col=0
)
sel_pcc = pd.read_csv(
    './../data/train/sel_pcc_prep.csv', sep=',', index_col=0
)

In [None]:
orig_pcc.head()

In [None]:
sel_pcc.head()

## OTSU

Apply OTSU algorithm to test for optimal threshold value.

In [None]:
thresh_orig = threshold_otsu(orig_pcc.values, nbins=100)

In [None]:
thresh_sel = threshold_otsu(sel_pcc.values, nbins=100)

In [None]:
plt.figure(figsize=(8, 6))
plt.title('Histogram of original Pearson`s correlation coefficients')
sns.kdeplot(orig_pcc.values.ravel(), shade=True)
plt.axvline(x=thresh_orig)
plt.tight_layout()

In [None]:
plt.figure(figsize=(8, 6))
plt.title('Histogram of selected Pearson`s correlation coefficients')
sns.kdeplot(sel_pcc.values.ravel(), shade=True)
plt.axvline(x=thresh_sel)
plt.tight_layout()

## Bicluster evaluation

In [None]:
orig_cl1, orig_cl2 = orig_pcc.copy(), orig_pcc.copy()

orig_cl1.values[orig_pcc.values < thresh_orig] = 0
orig_cl2.values[orig_pcc.values >= thresh_orig] = 0