In [None]:
# scripts fits parabolas for mdh1 and dpy23 datasets 
# and find the mean value that can be applied to the intron channel

In [None]:
# imports
import matplotlib.pyplot as plt
import numpy as np
import csv as csv

# general 
import os
import glob
from functools import reduce
import re
import sys as sys

from numpy import polyval

import pandas as pd

%matplotlib inline

In [None]:
# function for loading one data-set
def load_data(file_path):
    data = np.loadtxt(file_path, delimiter = '\t', skiprows=0)
    # print(data.shape) 
    # handles some weird cases, e.g. when there is no data in the file
    if (len(data.shape) < 2):
        data = data[None, :]
    if (data.shape[1] == 0):
        I = np.array([0])
        z = np.array([0])
    else:
        I = data[:, -1]
        z = data[:, -2]
    return (z, I)

In [None]:
def g_x(a, x):
    return polyval(a, x)

In [None]:
# general setup for pretty images 
# num_files = 16
# lines_params = np.zeros([num_files, 2])
# channel_id = 1 # 1,2,3
# folder_path = '/Users/kkolyva/Desktop/2018-04-18-08-29-25-test/test/2018-04-24-15-33-21-median-median-first-test/csv/' 
#  image = 'N2_dpy-23_ex_int_ama-1_016'
Ia_min, Ia_max = 0, 40000
za_min, za_max = 0, 55

# TODO: make this one a global argument
folder_path = '/Volumes/Funky-space/Klim/2018-08-10-SEA-12/'
experiment_name = 'SEA-12'

In [None]:
# possible labels 
stain = ['DPY-23_EX', 'WDR-5.2', 'MDH-1']
stage = 'E' # only embryos
comment = '' # only empty ones

In [None]:
# important indices
stain_columns = ['C0_stain', 'C1_stain', 'C2_stain', 'C3_stain', 'C4_stain']
type_columns = ['C0_type', 'C1_type', 'C2_type', 'C3_type', 'C4_type']
stain_prefix = np.array([['C1-', 'C2-', 'C3-', 'C4-', 'C5-']])
ext = '.csv'
filename_column = 'new filename'

In [None]:
# read the db and parse images that we want to process
df = pd.read_csv(folder_path + "smFISH-database/" + experiment_name + "-Table 1.csv", sep=',', na_values=['']);
df.head()

In [None]:
# this is general
# filter to have only *good* and *embryo* files
good_indices = np.logical_and((df['stage'] == stage).tolist() , (df['comment'].isnull()).tolist())
good_indices.shape[0]
print(np.sum(good_indices == True))

In [None]:
# choose necessary stains
dataset1 = []
df_good = (df[type_columns].astype(np.object) == stain[0]).loc[good_indices, :]
row, col = np.where(df_good)
n_samples = df.shape[0]
new_prefix = np.repeat(stain_prefix, n_samples, axis=0)[row, col]
new_filename = df[filename_column].loc[good_indices].as_matrix()[row]
dataset1 = ["{}{}".format(a_, b_) for a_, b_ in zip(new_prefix, new_filename)]

In [None]:
# choose necessary stains
dataset2 = []
df_good = (df[type_columns].astype(np.object) == stain[1]).loc[good_indices, :]
row, col = np.where(df_good)
n_samples = df.shape[0]
new_prefix = np.repeat(stain_prefix, n_samples, axis=0)[row, col]
new_filename = df[filename_column].loc[good_indices].as_matrix()[row]
dataset2 = ["{}{}".format(a_, b_) for a_, b_ in zip(new_prefix, new_filename)]

In [None]:
# choose necessary stains
dataset3 = []
df_good = (df[type_columns].astype(np.object) == stain[2]).loc[good_indices, :]
row, col = np.where(df_good)
n_samples = df.shape[0]
new_prefix = np.repeat(stain_prefix, n_samples, axis=0)[row, col]
new_filename = df[filename_column].loc[good_indices].as_matrix()[row]
dataset3 = ["{}{}".format(a_, b_) for a_, b_ in zip(new_prefix, new_filename)]

In [None]:
def create_title(path, name_id=-1):
    # get the name of the initial image
    image_name = path.split("/")[name_id] # was 8
    # print(path.split("/"))
    # create the full title 
    title = image_name[:-4]
    return title
# create_title("/Users/kkolyva/Desktop/n2/N2-results/all/C1-N2_9.csv")

In [None]:
# actual plotting 
dataset_mdh1 = []
dataset_dpy23 = []
p_dataset_mdh1 = []
p_dataset_dpy23 = []

for j in range(0, len(dataset1)):
    # tmp = folder_path + "csv-before/" + dataset1[j] + ".csv"
    dataset_dpy23.append(dataset1[j])
    # tmp = folder_path + "csv-parameters/" + dataset1[j] + ".csv"
    p_dataset_dpy23.append(dataset1[j])
    # print(tmp)
    
for j in range(0, len(dataset3)):
    # tmp = folder_path + "csv-before/" + dataset3[j] + ".csv"
    dataset_mdh1.append(dataset3[j])
    # tmp = folder_path + "csv-parameters/" + dataset3[j] + ".csv"
    p_dataset_mdh1.append(dataset3[j])
    # print(tmp)
    
    
# to be sure that the results are consistent
# dataset_dpy23 = np.sort(dataset_dpy23)
# dataset_mdh1 = np.sort(dataset_mdh1)

# p_dataset_dpy23 = np.sort(p_dataset_dpy23)
# p_dataset_mdh1 = np.sort(p_dataset_mdh1)

In [None]:
# load the parameters from file for the fitter
def load_params(file_path):
    data = np.loadtxt(file_path, delimiter = '\t', skiprows=0)
    return data

In [None]:
labels = ['DPY-23_EX', 'MDH-1', "DPY-23_IN"] # ['DPY-23_EX', 'WDR-5.2', 'MDH-1']

In [None]:
# function to normalize the z-correction curve 
def norm_z(x, y):
    scale = np.max(x) - np.min(x)
    x_normed = (x - np.min(x))/scale
    y_normed = y / scale 
    return (x_normed, y_normed)

In [None]:
# shift so that intensity value starts at val = 0.5
def shift_z(y, top = 0.5):
    y_shift = y - np.max(y) + top 
    return y_shift

In [None]:
def find_min_max(x1, x2):
    lb = np.max([np.min(x1), np.min(x2)])
    rb = np.min([np.max(x1), np.max(x2)])
    return lb, rb

def find_min_max_idx(x1, x2):
    idx_x1 = np.argmin(x1)
    idx_x2 = np.argmin(x2)
    if (np.min(x1) > np.min(x2)):
        idx_lb = idx_x1
    else:
        idx_lb = idx_x2
        
    idx_x1 = np.argmax(x1)
    idx_x2 = np.argmax(x2)
    if (np.max(x1) > np.max(x2)):
        idx_rb = idx_x2
    else:
        idx_rb = idx_x1
        
    return idx_lb, idx_rb

In [None]:
# find the average curve of 2 curves
def compute_average_curve(coeff1, coeff2):
    return (coeff1 + coeff2)/2
compute_average_curve(np.array([1,2,3]), np.array([2,4,5]))

In [None]:
s_dataset_mdh1 = folder_path + "csv-before/" + img_name_mdh1 + ".csv"
s_dataset_dpy23 = folder_path + "csv-before/" + img_name_dpy23 + ".csv"

z_dpy23, I_dpy23 = load_data(s_dataset_dpy23)
z_mdh1, I_mdh1 = load_data(s_dataset_mdh1)

print(find_min_max(z_dpy23, z_mdh1))

In [None]:
# plot of all ovelaping images
# have to figure out what are the min-max value
z_min = sys.float_info.max
z_max = -sys.float_info.max

# dataset_dpy23 and dataset_mdh1 sizes are the same
for img_name_mdh1 in dataset_mdh1:
    s_dataset_mdh1 = folder_path + "csv-before/" + img_name_mdh1 + ".csv"
    ps_dataset_mdh1 = folder_path + "csv-parameters/" + img_name_mdh1 + ".csv"
    img_name_dpy23 = "C1-" + img_name_mdh1[3:]
    s_dataset_dpy23 = folder_path + "csv-before/" + img_name_dpy23 + ".csv"
    ps_dataset_dpy23 = folder_path + "csv-parameters/" + img_name_dpy23 + ".csv"
    
    if(not os.path.exists(s_dataset_mdh1) or not os.path.exists(s_dataset_dpy23)):
        # print("doesn't exist")
        continue
    try:
        # print("Processing:", dataset[idx]) 
        z_dpy23, I_dpy23 = load_data(s_dataset_dpy23)
        z_mdh1, I_mdh1 = load_data(s_dataset_mdh1)
        
        if (np.max(z_dpy23) > z_max):
            z_max = np.max(z_dpy23)
        if (np.max(z_mdh1) > z_max):
            z_max = np.max(z_mdh1)
        
        if (np.min(z_dpy23) < z_min):
            z_min = np.min(z_dpy23)
        if (np.min(z_mdh1)  < z_min):
            z_min = np.min(z_mdh1)
               
    except(ValueError, StopIteration, RuntimeError):
        print("Caught the error for you")

In [None]:
# actual plotting 
# dataset_dpy23 and dataset_mdh1 sizes are the same
for img_name_mdh1 in dataset_mdh1:
    s_dataset_mdh1 = folder_path + "csv-before/" + img_name_mdh1 + ".csv"
    ps_dataset_mdh1 = folder_path + "csv-parameters/" + img_name_mdh1 + ".csv"
    img_name_dpy23 = "C1-" + img_name_mdh1[3:]
    s_dataset_dpy23 = folder_path + "csv-before/" + img_name_dpy23 + ".csv"
    ps_dataset_dpy23 = folder_path + "csv-parameters/" + img_name_dpy23 + ".csv"
    
    if(not os.path.exists(s_dataset_mdh1) or not os.path.exists(s_dataset_dpy23)):
        # print("doesn't exist")
        continue
    try:
        print(s_dataset_mdh1)
        print(s_dataset_dpy23)
        
        # print("Processing:", dataset[idx]) 
        z_dpy23, I_dpy23 = load_data(s_dataset_dpy23)
        z_mdh1, I_mdh1 = load_data(s_dataset_mdh1)

        # load the parameters from file
        coeff_dpy23 = load_params(ps_dataset_dpy23)
        coeff_mdh1 = load_params(ps_dataset_mdh1)

        # print(title, ":", coeff)  
        # print('from {0} to {1}'.format(np.min(z), np.max(z)))

        x_dpy23_fit = np.linspace(np.min(z_dpy23), np.max(z_dpy23)) 
        y_dpy23_fit = g_x(coeff_dpy23[::-1], x_dpy23_fit) 
        
        x_mdh1_fit = np.linspace(np.min(z_mdh1), np.max(z_mdh1))
        y_mdh1_fit = g_x(coeff_mdh1[::-1], x_mdh1_fit) 

        # construct the intron curve
        coeff_intron = compute_average_curve(coeff_dpy23, coeff_mdh1)
        z_intron_min, z_intron_max = find_min_max(z_dpy23, z_mdh1)
        x_intron_fit = np.linspace(z_intron_min, z_intron_max)
        y_intron_fit = g_x(coeff_intron[::-1], x_intron_fit) 
        
        # feedback
        plt.figure(figsize=(8,5))
        title = create_title(s_dataset_dpy23, name_id=5)[3:]
        plt.title(title)
        
        plt.plot(x_dpy23_fit, y_dpy23_fit, linewidth=5, label=labels[0])
        plt.plot(x_mdh1_fit, y_mdh1_fit, linewidth=5, label=labels[1])
        
        plt.plot(x_intron_fit, y_intron_fit, linewidth=5, label=labels[2])
        
        x_limits = [z_min, z_max]
        y_limits = [-0.05, 1.05]
        
        axes = plt.gca()
        axes.set_xlim(x_limits)
        axes.set_ylim(y_limits)

        plt.xlabel('z-slice')
        plt.ylabel('intensity')
        
        info_text_dpy23 = "Coeff (dpy23): " + str(['%.2e' % elem for elem in coeff_dpy23])
        info_text_mdh1 = "Coeff (mdh1): " + str(['%.2e' % elem for elem in coeff_mdh1])
        info_text = info_text_dpy23 + "\n" + info_text_mdh1
        plt.text(x_limits[0] + (x_limits[1] - x_limits[0])*0.02, y_limits[0] + (y_limits[1] - y_limits[0])*0.04, info_text, color='black', bbox=dict(facecolor='white', alpha=1))

        plt.legend(loc = 'upper right')
        full_path_to_use = folder_path + "img-overlapped/" 
        if not os.path.exists(full_path_to_use):
            os.makedirs(full_path_to_use)
        plt.savefig(full_path_to_use + title + ".pdf")
        
        plt.show()
    except(RuntimeError, TypeError, ValueError, StopIteration):
        print("There was an exception but we\'ll fix it for you")