In [35]:
import pandas as pd
from diff_classifier import features
from os import listdir, getcwd, chdir
from os.path import isfile, join
import numpy as np

In [36]:
msd_path = '/Users/nelsschimek/Documents/nancelab/Data/rotenone/'

In [37]:
filelist = [f for f in listdir(msd_path) if isfile(join(msd_path, f)) and 'msd' in f]
print(len(filelist))

35


In [38]:
def calculate_features(dframe, framerate=1, frame=(10, 100), mean_values=True):
    """test test test Calculates multiple features from input MSD dataset and stores in pandas
    dataframe.

    Parameters
    ----------
    dframe : pandas.core.frame.DataFrame
        Output from msd.all_msds2.  Must have at a minimum the following
        columns:
        Track_ID, Frame, X, Y, and MSDs.
    framerate : int or float
        Framerate of the input videos from which trajectories were calculated.
        Required for accurate calculation of some features.  Default is 1.
        Possibly not required. Ignore if performing all calcuations without
        units.
    frame : int
        Frame at which to calculate Deff

    Returns
    -------
    datai: pandas.core.frame.DataFrame
        Contains a row for each trajectory in dframe.  Holds the following
        features of each trajetory: Track_ID, alpha, D_fit, kurtosis,
        asymmetry1, asymmetry2, asymmetry3, aspect ratio (AR), elongation,
        boundedness, fractal dimension (fractal_dim), trappedness, efficiency,
        straightness, MSD ratio, frames, X, and Y.

    Examples
    --------
    See example outputs from individual feature functions.

    """

    # Skeleton of Trajectory features metadata table.
    # Builds entry for each unique Track ID.
    holder = dframe.Track_ID.unique().astype(float)
    die = {'Track_ID': holder,
           'alpha': holder,
           'D_fit': holder,
           'kurtosis': holder,
           'asymmetry1': holder,
           'asymmetry2': holder,
           'asymmetry3': holder,
           'AR': holder,
           'elongation': holder,
           'boundedness': holder,
           'fractal_dim': holder,
           'trappedness': holder,
           'efficiency': holder,
           'straightness': holder,
           'MSD_ratio': holder,
           'frames': holder,
           'X': holder,
           'Y': holder,
           'Quality': holder,
           'Mean_Intensity': holder,
           'SN_Ratio': holder,
           'Deff1': holder,
           'Deff2': holder,
           'length': holder}

    datai = pd.DataFrame(data=die)

    trackids = dframe.Track_ID.unique()
    partcount = trackids.shape[0]

    for particle in range(0, partcount):
        single_track_masked =\
         dframe.loc[dframe['Track_ID'] ==
                    trackids[particle]].sort_values(['Track_ID', 'Frame'],
                                                    ascending=[
                                                    1,
                                                    1]).reset_index(drop=True)
        single_track = features.unmask_track(single_track_masked)
        datai['length'][particle] = len(single_track)
        print(len(single_track))
        (datai['alpha'][particle],
         datai['D_fit'][particle]) = features.alpha_calc(single_track)
        
        datai['kurtosis'][particle] = features.kurtosis(single_track)
        (eig1, eig2, datai['asymmetry1'][particle],
         datai['asymmetry2'][particle],
         datai['asymmetry3'][particle]) = features.asymmetry(single_track)
        (datai['AR'][particle], datai['elongation'][particle],
         (datai['X'][particle],
          datai['Y'][particle])) = features.aspectratio(single_track)
        (datai['boundedness'][particle], datai['fractal_dim'][particle],
         datai['trappedness'][particle]) = features.boundedness(single_track, framerate)
        (datai['efficiency'][particle],
         datai['straightness'][particle]) = features.efficiency(single_track)
        datai['frames'][particle] = single_track.shape[0]
        if single_track['Frame'][single_track.shape[0]-2] > 2:
            datai['MSD_ratio'][particle] = features.msd_ratio(single_track, 2,
                                                     single_track['Frame'][
                                                      single_track.shape[0]-2])
        else:
            datai['MSD_ratio'][particle] = np.nan

        try:
            datai['Deff1'][particle] = single_track['MSDs'][frame[0]] / (4*frame[0])
        except:
            datai['Deff1'][particle] = np.nan

        try:
            datai['Deff2'][particle] = single_track['MSDs'][frame[1]] / (4*frame[1])
        except:
            datai['Deff2'][particle] = np.nan

        datai['Mean_Intensity'][particle] = np.nanmean(single_track[
              'Mean_Intensity'].replace([np.inf, -np.inf], np.nan).dropna(how="all").values)
        datai['Quality'][particle] = np.nanmean(single_track[
              'Quality'].replace([np.inf, -np.inf], np.nan).dropna(how="all").values)
        datai['SN_Ratio'][particle] = np.nanmean(single_track[
              'SN_Ratio'].replace([np.inf, -np.inf], np.nan).dropna(how="all").values)

    if mean_values:
        nonnum = ['Track_ID']
        for col in datai.columns:
            if col not in nonnum:
                datai['Mean ' + col] = np.nan
                datai['Std ' + col] = np.nan

        for xrange in range(0, 16):
            for yrange in range(0, 16):
                bitesize = datai[(datai['X'] >= 128*xrange) & (datai['X'] < 128*(xrange+1)) &
                                 (datai['Y'] >= 128*yrange) & (datai['Y'] < 128*(yrange+1))]
                bitesize.replace([np.inf, -np.inf], np.nan)
                print(bitesize.shape)
                for col in bitesize.columns:
                    if col not in nonnum and 'Mean' not in col and 'Std' not in col:
                        datai['Mean '+ col][bitesize.index] = np.nanmean(bitesize[col])
                        datai['Std '+ col][bitesize.index] = np.nanstd(bitesize[col])

    return datai

In [39]:
filelist

['msd_091823_P10F_6DIV stop_10uM_str_2.csv',
 'msd_091823_P10F_8DIV_50nM_ctx_1.csv',
 'msd_091823_P10F_6DIV stop_10uM_str_3.csv',
 'msd_091823_P10F_6DIV stop_10uM_str_1.csv',
 'msd_091823_P10F_8DIV_50nM_ctx_3.csv',
 'msd_091823_P10F_8DIV_50nM_ctx_2.csv',
 'msd_091823_P10F_6DIV stop_10uM_ctx_2.csv',
 'msd_091823_P10F_8DIV_50nM_str_1.csv',
 'msd_091823_P10F_6DIV stop_10uM_ctx_3.csv',
 'msd_091823_P10F_6DIV stop_10uM_ctx_1.csv',
 'msd_091823_P10F_8DIV_50nM_str_3.csv',
 'msd_091823_P10F_8DIV_50nM_str_2.csv',
 'msd_091623_P10F_6DIV_50nM_str_1.csv',
 'msd_091623_P10F_6DIV_50nM_str_2.csv',
 'msd_091623_P10F_6DIV_50nM_ctx_1.csv',
 'msd_091623_P10F_6DIV_50nM_ctx_2.csv',
 'msd_091623_P10F_6DIV_50nM_ctx_3.csv',
 'msd_091623_P10F_6DIV_10uM_ctx_2.csv',
 'msd_091623_P10F_6DIV_10uM_ctx_3.csv',
 'msd_091623_P10F_6DIV_10uM_ctx_1.csv',
 'msd_091623_P10F_6DIV_10uM_str_2.csv',
 'msd_091623_P10F_6DIV_10uM_str_3.csv',
 'msd_091623_P10F_6DIV_10uM_str_1.csv',
 'msd_091823_P10F_6DIV stop_50nM_ctx_1.csv',
 'msd

In [40]:
for file in filelist:
    df = pd.read_csv(msd_path+file)
    feat_df = calculate_features(df)
    file_name = 'features' + file[3:]
    print(file)
    print(file_name)
    print()
    feat_df.to_csv(msd_path+file_name)

503
421
15
53
651
651
19
651


  aspratio = width/height


174
103
12
98
265
53
118
138
55
92
127
651
86
651
304
651
29
17
66
49
563
56
65
20
38
43
634
100
14
15
47
13
22
15
20
58
38
11
28
544
29
42
187
Optimal parameters not found. Print NaN instead.
13
25
57
125
193
82
11
43
19
20
35
32
25
238
24
20
27
12
78
68
18
18
70
28
48
38
69
26
69
25
48
47
49
39
12
14
236
68
39
12
62
23
11
58
17
14
398
13
14
465
11
11
108
59
37
23
92
11
89
27
30
Optimal parameters not found. Print NaN instead.
42
56
40
102
17
30
18
56
14
125
15
18
12
19
359
52
18
12
15
49
23
51
31
14
190
39
351
37
77
36
20
20
40
18
30
76
19
15
14
29
41
27
44
12
13
15
16
11
259
18
32
15
47
47
75
54
259
14
17
23
30
53
51
67
197
171
89
42
25
28
23
85
32
24
14
24
12
11
12
12
39
12
13
44
257
11
38
23
17
36
19
27
22
223
Optimal parameters not found. Print NaN instead.
56
193
15
14
78
14
14
111
22
14
14
13
43
51
43
14
86
29
26
53
23
22
24
11
36
18
20
39
17
16
40
11
25
25
40
23
137
17
47
23
11
19
87
14
55
18
26
13
11
24
87
25
49
103
35
35
96
13
25
23
14
88
16
78
40
81
15
18
51
12
67
12
33
57


  ratio = (dframe['MSDs'][fram1]/dframe['MSDs'][fram2]) - (


644
270
193
38
651
384
61
24
38
23
17
88
12
12
261
12
19
15
29
14
12
12
12
83
17
19
36
39
202
70
32
25
16
25
17
11
12
101
20
44
36
63
18
87
12
60
14
20
151
56
139
52
186
153
153
222
13
12
58
27
11
87
33
43
41
11
32
17
35
18
126
127
31
42
22
43
49
26
13
14
40
70
26
38
11
20
19
25
38
22
15
98
205
23
13
97
105
73
17
28
141
19
29
33
36
18
48
28
11
32
24


  asym1 = (eig1**2 - eig2**2)**2/(eig1**2 + eig2**2)**2
  asym2 = eig2/eig1
  asym3 = -np.log(1-((eig1-eig2)**2)/(2*(eig1+eig2)**2))
  aspratio = width/height
  bound = dcoef*fram/(rad**2)
  fractd = np.log(N)/np.log(N*2*rad/netdisp)
  probf = 1 - np.exp(0.2048 - 0.25117*(dcoef*fram/(rad**2)))
  eff = num/den
  strait = num2/den2
  ratio = (dframe['MSDs'][fram1]/dframe['MSDs'][fram2]) - (


110
24
21
14
88
24
14
32
15
21
43
19
38
149
32
41
21
33
38
17
13
29
27
20
38
21
13
26
17
128
12
26
101
21
19
22
93
19
175
16
47
36
39
44
100
40
95
21
19
22
52
29
366
20
13
24
17
27
230
13
12
15
61
59
16
14
27
32
19
182
24
17
13
26
16
73
153
18
98
32
12
22
21
98
18
229
121
Optimal parameters not found. Print NaN instead.
19
23
54
38
36
43
49
215
12
332
13
13
18
70
19
324
Optimal parameters not found. Print NaN instead.
20
17
14
11
13
40
21
11
41
20
17
35
52
82
20
16
19
236
35
13
14
32
21
129
17
18
38
290
284
15
13
13
121
25
27
11
16
26
39
71
20
15
73
13
11
82
11
11
20
138
57
24
23
29
17
22
11
20
12
14
14
21
12
23
38
60
116
27
23
12
12
18
11
11
31
35
41
47
168
22
13
21
52
11
25
18
11
17
49
11
22
130
14
12
15
43
125
59
19
98
12
14
20
13
22
17
20
22
99
11
55
30
25
29
71
14
62
18
18
37
112
23
12
58
18
50
13
24
24
14
93
93
48
90
11
29
55
22
80
Optimal parameters not found. Print NaN instead.
11
22
22
19
45
20
41
14
11
16
29
15
47
39
50
46
23
14
30
14
40
20
22
33
15
11
29
24
23
18
17
12
11
11

  datai['Mean '+ col][bitesize.index] = np.nanmean(bitesize[col])
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  np.subtract(arr, avg, out=arr, casting='unsafe', where=where)


(45, 70)
(1, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(8, 70)
(11, 70)
(8, 70)
(11, 70)
(21, 70)
(20, 70)
(18, 70)
(4, 70)
(1, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(33, 70)
(6, 70)
(12, 70)
(16, 70)
(15, 70)
(12, 70)
(26, 70)
(8, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(36, 70)
(13, 70)
(2, 70)
(24, 70)
(10, 70)
(20, 70)
(7, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(16, 70)
(28, 70)
(29, 70)
(12, 70)
(17, 70)
(16, 70)
(20, 70)
(11, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(8, 70)
(15, 70)
(19, 70)
(8, 70)
(41, 70)
(11, 70)
(24, 70)
(19, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(35, 70)
(19, 70)
(8, 70)
(24, 70)
(21, 70)
(20, 70)
(32, 70)
(14, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(10, 70)
(9, 70)
(23, 70)
(20, 70)
(31, 70)
(84, 70)
(12, 70)
(36, 70)
(1, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0, 70)
(0,



463
31
44
30
14
12
135
11
17
15
384
39
53
35
11
174
13
17
14
396
49
23
136
11
374
371
14
18
17
61
47
18
21
28
14
16
24
14
20
16
13
11
49
23
15
11
25
64
12
21
39
24
14
11
33
35
23
11
13
12
81
11
92
227
19
22
26
224
60
21
16
23
16
127
32
105
12
27
16
89
191
45
47
11
13
23
16
18
14
58
29
23
13
30
32
25
30
39
119
26
48
24
14
119
14
90
23
52
22
43
42
22
31
38
65
65
15
17
12
20
29
17
17
15
13
28
29
13
25
23
22
15
11
12
14
651
14
651
17
651
651
41
651
588
84
102
651
651
651
651
188
350
651
12
578
19
591
18
651
96
120
651
651
651
651
27
277
651
651
218
651
651
651
134
65
80
126
63
18
20
651
121
156
51
650
651
Optimal parameters not found. Print NaN instead.
44
488
247
26
232
139
21
21
94
64
13
16
19
51
631
12
17
23
80
15
615
11
333
35
16
317
Optimal parameters not found. Print NaN instead.
11
12
23
16
604
30
271
26
31
11
129
31
17
15
21
580
Optimal parameters not found. Print NaN instead.
56
46
116
17
34
11
20
68
12
70
15
40
25
33
11
31
527
13
23
40
24
60
164
16
497
58
378
22
11
205
43
28
93
1