In [None]:
# want to see the images inline
%matplotlib inline

In [None]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from scipy.stats import gamma
from scipy.optimize import curve_fit
from scipy import special
from scipy.signal import medfilt

from configparser import ConfigParser, ExtendedInterpolation

import utils as utils

from tqdm import tqdm_notebook as tqdm
import json

In [None]:
# read config file
config = ConfigParser(interpolation=ExtendedInterpolation())
config.read("config.ini")

In [None]:
FOLDER = config.get("main", "ROOTFOLDER")
DB_FILENAME = config.get("main", "DB_FILENAME")
DB_NEW_FILENAME = config.get("main", "DB_NEW_FILENAME")
COLOR = config.get("main", "COLOR")
MAKE_IMAGES = config.getboolean("main", "MAKE_IMAGES")
TYPES = json.loads(config.get("main","TYPES"))

In [None]:
# number of slices from top and bottom
Z_BOUND = 1
# number of detections that we consider as a cut
Z_COUNT = 10

In [None]:
# some const params for all graphs
num_bins = 120
# graph [xmin, xmax]
xmin = 0
xmax = num_bins
binwidth = (xmax - xmin) / num_bins

bins = np.arange(xmin, xmax + binwidth, binwidth)
print("bins: ", bins.shape)

In [None]:
# important indices
type_columns = ["c0_type", "c1_type", "c2_type"]
stain_prefix = np.array([["C0-", "C1-", "C2-", "C3-", "C4-"]])
filename_column = "cropped_image_file"
num_z_planes_colmumn = "num_z_planes"
is_z_cropped = "is_z_cropped"

In [None]:
os.path.join(FOLDER, "smFISH-database", DB_FILENAME)

In [None]:
# read the db and parse images that we want to process
df_path = os.path.join(FOLDER, "smFISH-database", DB_FILENAME)
df = pd.read_csv(df_path, sep=",")

In [None]:
# TODO: maybe it is necessary to fill in other values here, too
# fix missing entries in the colmns that we are planning to use
df["cropped_image_file"].fillna("", inplace=True)
df["c0_type"].fillna("", inplace=True)
df["c1_type"].fillna("", inplace=True)
df["c2_type"].fillna("", inplace=True)

In [None]:
dff = (
    df.copy()
)  # df[df['cropped_image_file'].apply(lambda x: x.startswith(EXPERIMENT))]

In [None]:
# seems to be working
row, col = np.where(dff[type_columns].applymap(lambda x: x in TYPES))
n_samples = dff.shape[0]
new_prefix = np.repeat(stain_prefix, n_samples, axis=0)[row, col]
new_filename = dff[filename_column].values[row]
num_z_planes = dff[num_z_planes_colmumn].values[row]
full_filenames = [
    "{}{}".format(a_, b_[:-4]) for a_, b_ in zip(new_prefix, new_filename)
]

In [None]:
dataset = []
pbar = tqdm(total=len(full_filenames))
for ff in full_filenames:
    if len(ff) != 3:
        tmp = os.path.join(FOLDER, "csv", ff + ".csv")
        dataset.append(tmp)
    pbar.update(1)
pbar.close()

In [None]:
def smooth_histogram(values, kernel_size=5, smooth=False):
    # use initial values, not bins!
    if smooth:
        padded_values = np.pad(
            values, 
            (kernel_size // 2, kernel_size // 2), 
            'constant', 
            constant_values=(0, 0)
        )
        return medfilt(padded_values)[kernel_size // 2:-kernel_size // 2]
    else:
        return values

In [None]:
def check_boundaries(values, z_bound=Z_BOUND, z_count=Z_COUNT):
    return np.any(values[:z_bound] >= z_count) or np.any(values[-z_bound:] >= z_count)

In [None]:
z_cut_defects = {}
print(f"Processing: {len(dataset)} files")

# actual plotting
pbar = tqdm(total=len(dataset))
for i, d in enumerate(dataset, start=0):
    pbar.update(1)
    
    if not os.path.exists(d):
        continue

    try:
        # computation
        # load the data and scale it accordingly
        I, z = utils.load_i_z(d, skiprows_=0)

        # calculate the params for gauss fit
        binned_values, real_bins = np.histogram(z, bins)

        zmin = z.min()
        zmax = z.max()
        
        smooth_binned_values = smooth_histogram(
            binned_values, 
            kernel_size=5, 
            smooth=True
        )

        z_cut_defects[d.split("/")[-1][3:-3] + "tif"] = 0
        
        if (check_boundaries(smooth_binned_values, Z_BOUND, Z_COUNT)):
            z_cut_defects[d.split("/")[-1][3:-3] + "tif"] = 1 

        # plotting
        if MAKE_IMAGES:
            # create the canvas
            fig = plt.figure(figsize=(8, 5))
            title = utils.create_title(d, name_id=-1)
            fig.suptitle(title)

            sns.distplot(
                z,
                bins=bins,
                rug=False,
                kde=True,
                norm_hist=True,
                kde_kws={
                    "shade": True,
                    "linewidth": 3,
                    # 'bw': 1/40,
                    # 'cumulative': True,
                },
            )

            plt.grid(True, lw=2, ls="--", c=".85")

            # reasonable adjustments to make the data look nicer
            plt.xlabel("z")
            plt.ylabel("# spots")

            x_limits = [0, num_z_planes[i]]
            ymax = 45  # np.max(np.histogram(z, bins)[0])

            plt.xticks(np.arange(xmin, xmax + binwidth, binwidth * 10))

            # boundary lines to check
            plt.axvline(x=0 + Z_BOUND, lw=2, ls="--", c="red")
            plt.axvline(x=num_z_planes[i] - Z_BOUND, lw=2, ls="--", c="red")

            plt.xlim(x_limits)

            folder_path = os.path.join(FOLDER, "z-counts")
            os.makedirs(folder_path, exist_ok=True)

            plt.savefig(os.path.join(folder_path, title + ".pdf"))
            # plt.show()
            # break
            plt.close()

    except (RuntimeError, TypeError, ValueError):
        print("There was an exception but we'll fix it for you")
pbar.close()

In [None]:
# merge results in one array
# for r in zip(dataset, z_cut_defects):
#     print(r[0].split("/")[-1])

In [None]:
df_out = dff.copy()

In [None]:
df_out[is_z_cropped] = df_out['cropped_image_file'][df_out['cropped_image_file'].isin(z_cut_defects)].apply(lambda x: z_cut_defects[x])

In [None]:
df_out[is_z_cropped] = np.maximum(
    df[is_z_cropped].values, df_out[is_z_cropped].fillna(-2).values
)

In [None]:
df_out.to_csv(os.path.join(FOLDER, "smFISH-database", DB_NEW_FILENAME), index=False)