In [169]:
import pandas as pd
import numpy as np
import re
import os
from glob import glob
PATH = '../Results_All'
EXT = "*.csv"

# final pipeline for all features in all results

In [168]:
nan = np.nan
def using_clump(a):
    return [a[s] for s in np.ma.clump_unmasked(np.ma.masked_where(a.astype(str)==str(np.nan),a))]
def centroids_zero_center(tracks_arr):
    for centroids_arr in tracks_arr:
        centroids_arr-=centroids_arr[0]
    return tracks_arr
numeric_const_pattern = '[-+]? (?: (?: \d* \. \d+ ) | (?: \d+ \.? ) )(?: [Ee] [+-]? \d+ ) ?'
rx = re.compile(numeric_const_pattern, re.VERBOSE)
def str_array_to_float(arr_of_arr_of_str):
    final_mat = []
    for arr_of_arr in arr_of_arr_of_str:
        float_ts = []
        for str in arr_of_arr:
            float_arr = [float(i) for i in rx.findall(str)]
            if(len(float_arr)>=2):
                float_ts.append(float_arr)
        if(len(float_ts)>=1):
            final_mat.append(float_ts)
    return np.array(final_mat,dtype=object)
def get_lens(tracks):
    return pd.Series([len(trk) for trk in tracks]).value_counts()
def get_feature_index(feature_type,features):
    for idx,fet in enumerate(features):
        if fet in feature_type:
            return idx
    return 0
def cut_feture_vecs_and_preprocess(tracks,feature_type,ts_len,cut_longer_ts=False):
    if(cut_longer_ts):
        track_cut = np.array([trk[:ts_len] for trk in tracks if len(trk)>=ts_len])
    else:
        tracks_cut = np.array([trk for trk in tracks if len(trk)==ts_len])
    if 'centroids' in feature_type:
        track_cut = centroids_zero_center(track_cut)
    return track_cut
def save(tracks_final,well_name):
    np.save('../npy_files/'+well_name+'.npy',tracks_final)

In [201]:
def from_results_folder_PATH_to_arrays(features=['centroids','morphologies','embeddings'],ts_len=10,cut_longer_ts=False,save=False,name_ext=""):
    all_tracks = []
    wells = []
    all_paths = [path for path, subdir, files in os.walk(PATH)]
    for path in all_paths:
        feature_vecs_cut = []
        all_files = [file for file in glob(os.path.join(path, EXT))]
        if(len(all_files)<1):
            continue
        for file in all_files:
            file_name = file.split('_')
            well_name = file_name[1]
            feature_type = file_name[-1]
            if(not any(fet in feature_type for fet in features)):
                continue
            df_str = pd.read_csv(file,index_col=[0])
            splitted = []
            for cell_id, series in df_str.iterrows():
                tracks = np.array(using_clump(np.array(series)),dtype=object)
                for tr in tracks:
                    splitted.append(tr)
            tracks_str = np.array(splitted,dtype=object) 
            #print("tracks_str shape: ",tracks_str.shape)
            tracks = str_array_to_float(tracks_str)
            tracks_cut = cut_feture_vecs_and_preprocess(tracks,feature_type,ts_len,cut_longer_ts)
            feature_vecs_cut.append(tracks_cut)
        feature_vecs_cut = np.dstack(feature_vecs_cut)
        if(len(feature_vecs_cut[0])>0):
            print(feature_vecs_cut.shape)
            all_tracks.append(feature_vecs_cut)
            wells.append(well_name)
    #return all_tracks,wells
    labels = []
    for well_name,tracks_vec in zip(wells,all_tracks):
        labels.append(np.repeat(well_name,len(tracks_vec)))

    results_tracks = np.vstack(all_tracks)
    results_labels = np.concatenate(labels)
    if(save):
        np.save('../npy_files/features'+name_ext+'.npy',results_tracks)
        np.save('../npy_files/labels'+name_ext+'.npy',results_labels)
    return results_tracks,results_labels

In [206]:
x,y = from_results_folder_PATH_to_arrays(features=['centroids','morphologies'],ts_len=12,cut_longer_ts=True,save=True,name_ext="_All12")

(47, 12, 5)
(47, 12, 5)
(31, 12, 5)
(43, 12, 5)
(16, 12, 5)
(67, 12, 5)
(21, 12, 5)
(34, 12, 5)
(50, 12, 5)
(93, 12, 5)
(161, 12, 5)
(47, 12, 5)
(211, 12, 5)
(44, 12, 5)
(51, 12, 5)
(22, 12, 5)
(206, 12, 5)
(22, 12, 5)
(103, 12, 5)
(162, 12, 5)
(16, 12, 5)
(91, 12, 5)
(14, 12, 5)
(99, 12, 5)
(44, 12, 5)
(180, 12, 5)
(31, 12, 5)
(113, 12, 5)
(20, 12, 5)
(135, 12, 5)
(27, 12, 5)
(61, 12, 5)
(295, 12, 5)
(63, 12, 5)
(2, 12, 5)
(107, 12, 5)
(21, 12, 5)
(53, 12, 5)
(93, 12, 5)
(17, 12, 5)
(293, 12, 5)
(55, 12, 5)
(5, 12, 5)
(25, 12, 5)
(29, 12, 5)
(10, 12, 5)
(26, 12, 5)
(38, 12, 5)
(78, 12, 5)
(9, 12, 5)
(79, 12, 5)
(183, 12, 5)
(34, 12, 5)
(39, 12, 5)
(94, 12, 5)
(14, 12, 5)
(186, 12, 5)
(9, 12, 5)
(52, 12, 5)
(105, 12, 5)
(69, 12, 5)
(43, 12, 5)
(58, 12, 5)
(68, 12, 5)
(33, 12, 5)
(80, 12, 5)
(58, 12, 5)
(5, 12, 5)
(115, 12, 5)
(17, 12, 5)
(48, 12, 5)
(178, 12, 5)
(12, 12, 5)
(20, 12, 5)
(56, 12, 5)
(6, 12, 5)
(15, 12, 5)
(56, 12, 5)
(4, 12, 5)
(4, 12, 5)
(18, 12, 5)
(37, 12, 5)
(75, 12,

In [207]:
x.shape

(14330, 12, 5)

## tests and single fetures

In [3]:
nan = np.nan
def using_clump(a):
    return [a[s] for s in np.ma.clump_unmasked(np.ma.masked_where(a.astype(str)==str(np.nan),a))]
def normalize_centroids_in_tracks(tracks_arr):
    for centroids_arr in tracks_arr:
        centroids_arr-=centroids_arr[0]
    return tracks_arr
def str_array_to_float(arr_of_arr_of_str):
    final_mat = []
    for arr_of_arr in arr_of_arr_of_str:
        float_ts = []
        for str in arr_of_arr:
            float_arr = [float(i) for i in re.findall("\d+\.\d+",str)]
            if(len(float_arr)>=2):
                float_ts.append(float_arr)
        if(len(float_ts)>=1):
            final_mat.append(float_ts)
    return np.array(final_mat)
def get_lens(tracks):
    return pd.Series([len(trk) for trk in tracks]).value_counts()
def cut_tracks_and_save(tracks,well_name,ts_len,cut_longer_ts=False,save=True):
    if(cut_longer_ts):
        tracks_final = np.array([trk[:ts_len] for trk in tracks if len(trk)>=ts_len])
    else:
        tracks_final = np.array([trk for trk in tracks if len(trk)==ts_len])
    track_final_norm=normalize_centroids_in_tracks(tracks_final)
    if(save):
        np.save('../npy_files/'+well_name+'.npy',track_final_norm)
    return track_final_norm

In [15]:
def from_csv_path_to_npy(csv_path,well_name,ts_len,cut_longer_ts=False):
    df_str = pd.read_csv(csv_path,index_col=[0])
    splitted = []
    id_well_index = []
    for cell_id, series in df_str.iterrows():
        tracks = np.array(using_clump(np.array(series)))
        for tr in tracks:
            splitted.append(tr)
    tracks_str = np.array(splitted) 
    print("tracks_str shape: ",tracks_str.shape)
    tracks = str_array_to_float(tracks_str)
    cut_tracks_and_save(tracks,well_name,10,False)

In [5]:
def from_results_folder_PATH_to_arrays():
    all_tracks = []
    wells = []
    all_csv_files = [file
                 for path, subdir, files in os.walk(PATH)
                 for file in glob(os.path.join(path, EXT))]
    for file in all_csv_files:
        file_name = file.split('_')
        well_name = file_name[1]
        df_str = pd.read_csv(file,index_col=[0])
        splitted = []
        id_well_index = []
        for cell_id, series in df_str.iterrows():
            tracks = np.array(using_clump(np.array(series)))
            for tr in tracks:
                splitted.append(tr)
        tracks_str = np.array(splitted) 
        print("tracks_str shape: ",tracks_str.shape)
        tracks = str_array_to_float(tracks_str)
        all_tracks.append(tracks)
        wells.append(well_name)
    return all_tracks,wells

### tracks


In [None]:
PATH = '../Results'
EXT = "*centroids.csv"
all_tracks,wells = from_results_folder_PATH_to_arrays()

In [107]:
all = []
labels = []
for well_name,tracks in zip(wells,all_tracks):
    tracks_norm_cut = cut_tracks_and_save(tracks,well_name,12,True)
    all.append(tracks_norm_cut)
    labels.append(np.repeat(well_name,len(tracks_norm_cut)))

results_tracks = np.vstack(all)
results_labels = np.concatenate(labels)


In [108]:
len(results_tracks)

3705

In [109]:
np.save('../npy_files/tracks.npy',results_tracks)
np.save('../npy_files/labels.npy',results_labels)

### morphologies

In [None]:
PATH = '../Results'
EXT = "*morphologies.csv"
all_tracks,wells = from_results_folder_PATH_to_arrays()

In [8]:
all = []
labels = []
for well_name,tracks in zip(wells,all_tracks):
    tracks_norm_cut = cut_tracks_and_save(tracks,well_name,7,True)
    all.append(tracks_norm_cut)
    labels.append(np.repeat(well_name,len(tracks_norm_cut)))

results_tracks = np.vstack(all)
results_labels = np.concatenate(labels)

In [9]:
len(results_tracks)

8931

In [10]:
np.save('../npy_files/morph.npy',results_tracks) 
np.save('../npy_files/labels_morph.npy',results_labels)

#### file tests

In [None]:
PATH = '../Results_All'
EXT = "*.csv"
all_csv_files = [file
                for path, subdir, files in os.walk(PATH)
                for file in glob(os.path.join(path, EXT))]
                
all_paths = [path for path, subdir, files in os.walk(PATH)]
lens = []
for path in all_paths:
    all_files = [file for file in glob(os.path.join(path, EXT)) ]
    print(all_files) 
    lens.append(len(all_files))
   

In [174]:
pd.Series(lens).value_counts()

3    202
0      1
dtype: int64

In [None]:
for file in all_csv_files:
    file_name = file.split('_')
    print(file_name[-1])

In [None]:
for file in all_csv_files:
    file_name = file.split('_')
    well_name = file_name[1]
    view_name = file_name[2]
    crop_name = file_name[4]
    print(f'well:{well_name} , view:{view_name}, crop:{crop_name}')

# old code

### path to a centroids.csv
comment out mac/windows file depending on os you are using

In [16]:
df_t = pd.read_csv('/Users/amosavni/university/DeepLearningWorkshop/deepcell_mod/DL-WORKSHOP/results/Results_D2_4_crop_0_start_2_2022-08-25_11-10-58/centroids.csv',index_col=[0])

# strings df to array of array of strings

In [None]:
splitted = []
id_well_index = []
for cell_id, series in df_t.iterrows():
    tracks = np.array(using_clump(np.array(series)))
    for tr in tracks:
        splitted.append(tr)

In [None]:
tracks_str = np.array(splitted) 
print(tracks_str.shape)

In [None]:
tracks = str_array_to_float(tracks_str)

In [None]:
print(get_lens(tracks))

In [16]:
tracks_final = np.array([trk for trk in tracks if len(trk)==10])

In [18]:
track_final_norm=normalize_centroids_in_tracks(tracks_final)

In [21]:
np.save('track_final_norm.npy',track_final_norm)