In [486]:
# want to see the images inline
# %matplotlib inline

In [487]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from scipy.stats import gamma
from scipy.optimize import curve_fit
from scipy import special

from configparser import ConfigParser

import utils as utils

from tqdm import tqdm_notebook as tqdm

In [488]:
# read config file 
config = ConfigParser()
config.read('config.ini')

['config.ini']

In [489]:
PARAMS = config.get('main', 'PARAMS')

In [490]:
FOLDER = config.get(PARAMS, 'FOLDER')
EXPERIMENT = config.get(PARAMS, 'EXPERIMENT')
DB_FILENAME = config.get(PARAMS, 'DB_FILENAME')
TYPE = config.get(PARAMS, 'TYPE')
COLOR = config.get(PARAMS, 'COLOR')

In [491]:
# some const params for all graphs
num_bins = 100
# graph [xmin, xmax]
xmin = -0.2
xmax = 3.2
binwidth = (xmax - xmin)/(num_bins - 1)

bins = np.arange(xmin, xmax + binwidth, binwidth)
print ('bins: ', bins.shape)

bins:  (100,)


In [492]:
# important indices
type_columns = ['c0_type', 'c1_type', 'c2_type']
stain_prefix = np.array([['C0-', 'C1-', 'C2-', 'C3-', 'C4-']])
filename_column = 'cropped_image_file'

In [493]:
# read the db and parse images that we want to process
df_path = os.path.join(FOLDER, 'smFISH-database', DB_FILENAME)
df = pd.read_csv(df_path, 
                 sep=',', 
                )

In [494]:
# TODO: maybe it is necessary to fill in other values here, too
# fix missing entries in the colmns that we are planning to use 
df['cropped_image_file'].fillna('', inplace=True)
df['c0_type'].fillna('', inplace=True)
df['c1_type'].fillna('', inplace=True)
df['c2_type'].fillna('', inplace=True)

In [495]:
dff = df[df['cropped_image_file'].apply(lambda x: x.startswith(EXPERIMENT))]

In [496]:
# seems to be working
row, col = np.where(dff[type_columns].applymap(lambda x: x == TYPE))
n_samples = dff.shape[0]
new_prefix = np.repeat(stain_prefix, n_samples, axis=0)[row, col]
new_filename = dff[filename_column].values[row]
full_filenames = ["{}{}".format(a_, b_[:-4]) for a_, b_ in zip(new_prefix, new_filename)]

In [497]:
dataset = []
pbar = tqdm(total=len(full_filenames))
for ff in full_filenames: 
    tmp = os.path.join(FOLDER, "csv-2", ff + ".csv")
    dataset.append(tmp)
    pbar.update(1)
pbar.close()

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [498]:
# center_path = os.path.join(FOLDER, "centers", "all-centers.csv")

In [499]:
# have to perform this step multiple times and choose the best one 
# perform n_fits with different initial parameters
# n_fits = 10

center_set = {}

print(f'Processing: {len(dataset)} files') 

# actual plotting 
pbar = tqdm(total=len(dataset))
for d in dataset:    
    pbar.update(1) 
    if(not os.path.exists(d)):
        continue
        
    try:
        # create the canvas
        fig = plt.figure(figsize=(8,5))
        title = utils.create_title(d, name_id=-1)
        fig.suptitle(title + " / " + TYPE)

        # load the data and scale it accordingly
        I = utils.load_data(d, skiprows_=0)

        # calculate the params for gauss fit
        binned_values, real_bins = np.histogram(I, bins)
        use_median = np.median(I)
        # inititally there was use_median/2 
        fit_alpha, fit_loc, fit_beta = gamma.fit(I, 
                                                 loc=use_median/2, 
                                                 scale=1/np.max(binned_values),
                                                )
        # normalization factor
        factor = np.sum(binned_values*np.diff(real_bins))
        yhat = factor*gamma.pdf(real_bins, 
                                fit_alpha, 
                                fit_loc, 
                                fit_beta,
                               )
        if (np.any(np.isnan(yhat))):
            plt.close()
            continue     
            
        x = np.linspace(xmin, xmax, 1000)
        y = factor*gamma.pdf(x, 
                             fit_alpha, 
                             fit_loc, 
                             fit_beta)
        plt.hist(I, 
                 bins=bins, 
                 color=COLOR, 
                 label=TYPE, 
                 density=False,
                )
        plt.plot(x, 
                 y, 
                 linewidth=5, 
                 color='#66A5AD',
                )   
        # vertical line for center
        plt.axvline(x=real_bins[np.argmax(yhat)], 
                    linestyle="--", 
                    linewidth=5, 
                    color='#66A5AD',
                   )

        error = utils.fitter_meter(binned_values, yhat[:-1])

        # print("error: L1, L2", error)
        # print("peak center:", real_bins[np.argmax(yhat)])

        # reasonable adjustments to make the data look nicer
        plt.xlabel('intensity')
        plt.ylabel('# spots')

        info_text = "Total: " + str(I.shape[0]) + "\n" + "Peak: " +  str('%.2f' % real_bins[np.argmax(yhat)]) + "\n" + "L1: " + str('%.2f' % error[0]) + "\n" + "L2: " +  str('%.2f' % error[1]) 

        x_limits = [xmin, xmax]
        ymax = np.max(np.histogram(I, bins)[0])
        y_limits = [0, ymax]

        plt.text(x_limits[1] - (x_limits[1] - x_limits[0])*0.15, y_limits[1]*0.8, info_text, color='black', bbox=dict(facecolor='white', alpha=1))
        plt.xlim([xmin, xmax])

        # save the peak values for further 
        center_set[title] = real_bins[np.argmax(yhat)]  
        folder_path = os.path.join(FOLDER, "histograms-2", TYPE)

        os.makedirs(folder_path, exist_ok=True)
            
        plt.savefig(os.path.join(folder_path, title + ".pdf"))
        # plt.show()
        # break
        plt.close()
    except(RuntimeError, TypeError, ValueError):
        print("There was an exception but we\'ll fix it for you")
pbar.close()
# df_center = pd.DataFrame(list(center_set.items()), columns=['filename', 'center'])
# if (os.path.exists(center_path)):
#     df_center.to_csv(center_path, index=False, header=False, encoding='utf-8', mode = 'a')
# else:
#     df_center.to_csv(center_path, index=False, header=True, encoding='utf-8', mode = 'w' )
#     print (df_center)
    

Processing: 0 files


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [500]:
TYPE

'wdr-5.2_ex'

In [501]:
PARAMS

'cb428'