In [10]:
import pandas as pd
import numpy as np
import re
import os
from glob import glob
PATH = '../Tracking_All'
EXT = "*.csv"

# final pipeline for all features in all results

In [11]:
nan = np.nan
def using_clump(a):
    return [a[s] for s in np.ma.clump_unmasked(np.ma.masked_where(a.astype(str)==str(np.nan),a))]
def centroids_zero_center(tracks_arr):
    for centroids_arr in tracks_arr:
        centroids_arr -= centroids_arr[0]
    return tracks_arr
numeric_const_pattern = '[-+]? (?: (?: \d* \. \d+ ) | (?: \d+ \.? ) )(?: [Ee] [+-]? \d+ ) ?'
rx = re.compile(numeric_const_pattern, re.VERBOSE)
def str_array_to_float(arr_of_arr_of_str):
    final_mat = []
    for arr_of_arr in arr_of_arr_of_str:
        float_ts = []
        for str in arr_of_arr:
            float_arr = [float(i) for i in rx.findall(str)]
            if(len(float_arr)>=2):
                float_ts.append(float_arr)
        if(len(float_ts)>=1):
            final_mat.append(float_ts)
    return np.array(final_mat,dtype=object)
def get_lens(tracks):
    return pd.Series([len(trk) for trk in tracks]).value_counts()
def get_feature_index(feature_type,features):
    for idx,fet in enumerate(features):
        if fet in feature_type:
            return idx
    return 0
def cut_feture_vecs_and_preprocess(tracks,feature_type,ts_len,cut_longer_ts=False):
    if cut_longer_ts:
        track_cut = np.array([trk[:ts_len] for trk in tracks if len(trk)>=ts_len])
    else:
        tracks_cut = np.array([trk for trk in tracks if len(trk)==ts_len])
    if 'centroids' in feature_type:
        track_cut = centroids_zero_center(track_cut)
    return track_cut
def save(tracks_final,well_name):
    np.save('../npy_files/'+well_name+'.npy',tracks_final)

In [12]:
def from_results_folder_PATH_to_arrays(features=['centroids','morphologies','embeddings'],ts_len=10,cut_longer_ts=False,save=False,name_ext=""):
    all_tracks = []
    wells = []
    all_paths = [path for path, subdir, files in os.walk(PATH)]
    for path in all_paths:
        feature_vecs_cut = []
        all_files = [file for file in glob(os.path.join(path, EXT))]
        if(len(all_files)<1):
            continue
        for file in all_files:
            file_name = file.split('_')
            well_name = file_name[2]
            feature_type = file_name[-1]
            if(not any(fet in feature_type for fet in features)):
                continue
            df_str = pd.read_csv(file,index_col=[0])
            splitted = []
            for cell_id, series in df_str.iterrows():
                tracks = np.array(using_clump(np.array(series)),dtype=object)
                for tr in tracks:
                    splitted.append(tr)
            tracks_str = np.array(splitted,dtype=object) 
            #print("tracks_str shape: ",tracks_str.shape)
            tracks = str_array_to_float(tracks_str)
            tracks_cut = cut_feture_vecs_and_preprocess(tracks,feature_type,ts_len,cut_longer_ts)
            feature_vecs_cut.append(tracks_cut)
        feature_vecs_cut = np.dstack(feature_vecs_cut)
        if(len(feature_vecs_cut[0])>0):
            print(feature_vecs_cut.shape)
            all_tracks.append(feature_vecs_cut)
            wells.append(well_name)
    #return all_tracks,wells
    labels = []
    for well_name,tracks_vec in zip(wells,all_tracks):
        labels.append(np.repeat(well_name,len(tracks_vec)))

    results_tracks = np.vstack(all_tracks)
    results_labels = np.concatenate(labels)
    if(save):
        np.save('../npy_files/features'+name_ext+'.npy',results_tracks)
        np.save('../npy_files/labels'+name_ext+'.npy',results_labels)
    return results_tracks,results_labels

In [15]:
x,y = from_results_folder_PATH_to_arrays(features=['centroids','morphologies'],ts_len=32,cut_longer_ts=True,save=True,name_ext="_All64")

(14, 32, 5)
(68, 32, 5)
(90, 32, 5)
(3, 32, 5)
(70, 32, 5)
(117, 32, 5)
(153, 32, 5)
(283, 32, 5)
(29, 32, 5)
(31, 32, 5)
(22, 32, 5)
(23, 32, 5)
(45, 32, 5)
(253, 32, 5)
(226, 32, 5)
(2, 32, 5)
(114, 32, 5)
(28, 32, 5)
(3, 32, 5)
(124, 32, 5)
(7, 32, 5)
(2, 32, 5)
(6, 32, 5)
(96, 32, 5)
(20, 32, 5)
(10, 32, 5)
(34, 32, 5)
(8, 32, 5)
(7, 32, 5)
(83, 32, 5)
(6, 32, 5)
(10, 32, 5)
(44, 32, 5)
(16, 32, 5)
(62, 32, 5)
(50, 32, 5)
(182, 32, 5)
(12, 32, 5)
(16, 32, 5)
(20, 32, 5)
(28, 32, 5)
(70, 32, 5)
(14, 32, 5)
(13, 32, 5)
(21, 32, 5)
(71, 32, 5)
(58, 32, 5)
(8, 32, 5)
(6, 32, 5)
(2, 32, 5)
(12, 32, 5)
(16, 32, 5)
(13, 32, 5)
(23, 32, 5)
(3, 32, 5)
(17, 32, 5)
(13, 32, 5)
(9, 32, 5)
(15, 32, 5)
(1, 32, 5)
(1, 32, 5)
(1, 32, 5)
(2, 32, 5)
(2, 32, 5)
(17, 32, 5)
(2, 32, 5)
(8, 32, 5)
(20, 32, 5)
(23, 32, 5)
(16, 32, 5)
(24, 32, 5)
(8, 32, 5)
(10, 32, 5)
(6, 32, 5)
(4, 32, 5)
(7, 32, 5)
(2, 32, 5)
(1, 32, 5)
(12, 32, 5)
(4, 32, 5)
(7, 32, 5)
(39, 32, 5)
(4, 32, 5)
(8, 32, 5)
(1, 32, 5)
(2, 

In [8]:
x.shape

(4668, 32, 5)

In [26]:
pd.Series(y).value_counts()

D2     1427
D4      687
D3      550
G4      519
F4      446
G2      178
F3      159
E6      148
D7      137
G3       85
G6       79
G5       74
E7       62
F2       49
E8       46
E9       15
E10       7
dtype: int64

In [39]:
y_exp = y[np.where(np.isin(y,['D2','D3','D4','G2','G3','G4']))]

In [40]:
x_exp = x[np.where(np.isin(y,['D2','D3','D4','G2','G3','G4']))]

In [41]:
np.save('../npy_files/x_exp.npy',x_exp)
np.save('../npy_files/y_exp.npy',y_exp)

In [24]:
y_true[~y_true.isin(['E3','E2','E8','E9','E10'])]

0       D2
1       D2
2       D2
3       D2
4       D2
        ..
4663    G6
4664    G6
4665    G6
4666    G6
4667    G6
Length: 4600, dtype: object

In [32]:
y_true[y_true.isin(['D2','D3','D4','G2','G3','G4'])]

0       D2
1       D2
2       D2
3       D2
4       D2
        ..
4510    G4
4511    G4
4512    G4
4513    G4
4514    G4
Length: 3446, dtype: object

In [None]:
x_true 

## tests and single fetures

In [None]:
nan = np.nan
def using_clump(a):
    return [a[s] for s in np.ma.clump_unmasked(np.ma.masked_where(a.astype(str)==str(np.nan),a))]
def normalize_centroids_in_tracks(tracks_arr):
    for centroids_arr in tracks_arr:
        centroids_arr-=centroids_arr[0]
    return tracks_arr
def str_array_to_float(arr_of_arr_of_str):
    final_mat = []
    for arr_of_arr in arr_of_arr_of_str:
        float_ts = []
        for str in arr_of_arr:
            float_arr = [float(i) for i in re.findall("\d+\.\d+",str)]
            if(len(float_arr)>=2):
                float_ts.append(float_arr)
        if(len(float_ts)>=1):
            final_mat.append(float_ts)
    return np.array(final_mat)
def get_lens(tracks):
    return pd.Series([len(trk) for trk in tracks]).value_counts()
def cut_tracks_and_save(tracks,well_name,ts_len,cut_longer_ts=False,save=True):
    if(cut_longer_ts):
        tracks_final = np.array([trk[:ts_len] for trk in tracks if len(trk)>=ts_len])
    else:
        tracks_final = np.array([trk for trk in tracks if len(trk)==ts_len])
    track_final_norm=normalize_centroids_in_tracks(tracks_final)
    if(save):
        np.save('../npy_files/'+well_name+'.npy',track_final_norm)
    return track_final_norm

In [None]:
def from_csv_path_to_npy(csv_path,well_name,ts_len,cut_longer_ts=False):
    df_str = pd.read_csv(csv_path,index_col=[0])
    splitted = []
    id_well_index = []
    for cell_id, series in df_str.iterrows():
        tracks = np.array(using_clump(np.array(series)))
        for tr in tracks:
            splitted.append(tr)
    tracks_str = np.array(splitted) 
    print("tracks_str shape: ",tracks_str.shape)
    tracks = str_array_to_float(tracks_str)
    cut_tracks_and_save(tracks,well_name,10,False)

In [None]:
def from_results_folder_PATH_to_arrays():
    all_tracks = []
    wells = []
    all_csv_files = [file
                 for path, subdir, files in os.walk(PATH)
                 for file in glob(os.path.join(path, EXT))]
    for file in all_csv_files:
        file_name = file.split('_')
        well_name = file_name[1]
        df_str = pd.read_csv(file,index_col=[0])
        splitted = []
        id_well_index = []
        for cell_id, series in df_str.iterrows():
            tracks = np.array(using_clump(np.array(series)))
            for tr in tracks:
                splitted.append(tr)
        tracks_str = np.array(splitted) 
        print("tracks_str shape: ",tracks_str.shape)
        tracks = str_array_to_float(tracks_str)
        all_tracks.append(tracks)
        wells.append(well_name)
    return all_tracks,wells

### tracks


In [None]:
PATH = '../Results'
EXT = "*centroids.csv"
all_tracks,wells = from_results_folder_PATH_to_arrays()

In [None]:
all = []
labels = []
for well_name,tracks in zip(wells,all_tracks):
    tracks_norm_cut = cut_tracks_and_save(tracks,well_name,12,True)
    all.append(tracks_norm_cut)
    labels.append(np.repeat(well_name,len(tracks_norm_cut)))

results_tracks = np.vstack(all)
results_labels = np.concatenate(labels)


In [None]:
len(results_tracks)

In [None]:
np.save('../npy_files/tracks.npy',results_tracks)
np.save('../npy_files/labels.npy',results_labels)

### morphologies

In [None]:
PATH = '../Results'
EXT = "*morphologies.csv"
all_tracks,wells = from_results_folder_PATH_to_arrays()

In [None]:
all = []
labels = []
for well_name,tracks in zip(wells,all_tracks):
    tracks_norm_cut = cut_tracks_and_save(tracks,well_name,7,True)
    all.append(tracks_norm_cut)
    labels.append(np.repeat(well_name,len(tracks_norm_cut)))

results_tracks = np.vstack(all)
results_labels = np.concatenate(labels)

In [None]:
len(results_tracks)

In [None]:
np.save('../npy_files/morph.npy',results_tracks) 
np.save('../npy_files/labels_morph.npy',results_labels)

#### file tests

In [None]:
PATH = '../Results_All'
EXT = "*.csv"
all_csv_files = [file
                for path, subdir, files in os.walk(PATH)
                for file in glob(os.path.join(path, EXT))]
                
all_paths = [path for path, subdir, files in os.walk(PATH)]
lens = []
for path in all_paths:
    all_files = [file for file in glob(os.path.join(path, EXT)) ]
    #print(all_files) 
    lens.append(len(all_files))
    for file in all_files:
        file_name = file.split('_')
        well_name = file_name[2]
        view_name = file_name[3]
        crop_name = file_name[5]
        print(f'well:{well_name} , view:{view_name}, crop:{crop_name}')
   

In [None]:
pd.Series(lens).value_counts()

In [None]:
for file in all_csv_files:
    file_name = file.split('_')
    print(file_name[-1])

In [None]:
for file in all_csv_files:
    file_name = file.split('_')
    well_name = file_name[1]
    view_name = file_name[2]
    crop_name = file_name[4]
    print(f'well:{well_name} , view:{view_name}, crop:{crop_name}')

# old code

### path to a centroids.csv
comment out mac/windows file depending on os you are using

In [None]:
df_t = pd.read_csv('/Users/amosavni/university/DeepLearningWorkshop/deepcell_mod/DL-WORKSHOP/results/Results_D2_4_crop_0_start_2_2022-08-25_11-10-58/centroids.csv',index_col=[0])

# strings df to array of array of strings

In [None]:
splitted = []
id_well_index = []
for cell_id, series in df_t.iterrows():
    tracks = np.array(using_clump(np.array(series)))
    for tr in tracks:
        splitted.append(tr)

In [None]:
tracks_str = np.array(splitted) 
print(tracks_str.shape)

In [None]:
tracks = str_array_to_float(tracks_str)

In [None]:
print(get_lens(tracks))

In [None]:
tracks_final = np.array([trk for trk in tracks if len(trk)==10])

In [None]:
track_final_norm=normalize_centroids_in_tracks(tracks_final)

In [None]:
np.save('track_final_norm.npy',track_final_norm)