In [None]:
import pandas as pd
from diff_classifier import features
from os import listdir, getcwd, chdir
from os.path import isfile, join
import numpy as np

In [None]:
msd_path = '/Users/nelsschimek/Documents/nancelab/Data/rotenone/'

In [None]:
filelist = [f for f in listdir(msd_path) if isfile(join(msd_path, f)) and 'msd' in f]
print(len(filelist))

In [None]:
def calculate_features(dframe, framerate=1, frame=(10, 100), mean_values=True):
    """test test test Calculates multiple features from input MSD dataset and stores in pandas
    dataframe.

    Parameters
    ----------
    dframe : pandas.core.frame.DataFrame
        Output from msd.all_msds2.  Must have at a minimum the following
        columns:
        Track_ID, Frame, X, Y, and MSDs.
    framerate : int or float
        Framerate of the input videos from which trajectories were calculated.
        Required for accurate calculation of some features.  Default is 1.
        Possibly not required. Ignore if performing all calcuations without
        units.
    frame : int
        Frame at which to calculate Deff

    Returns
    -------
    datai: pandas.core.frame.DataFrame
        Contains a row for each trajectory in dframe.  Holds the following
        features of each trajetory: Track_ID, alpha, D_fit, kurtosis,
        asymmetry1, asymmetry2, asymmetry3, aspect ratio (AR), elongation,
        boundedness, fractal dimension (fractal_dim), trappedness, efficiency,
        straightness, MSD ratio, frames, X, and Y.

    Examples
    --------
    See example outputs from individual feature functions.

    """

    # Skeleton of Trajectory features metadata table.
    # Builds entry for each unique Track ID.
    holder = dframe.Track_ID.unique().astype(float)
    die = {'Track_ID': holder,
           'alpha': holder,
           'D_fit': holder,
           'kurtosis': holder,
           'asymmetry1': holder,
           'asymmetry2': holder,
           'asymmetry3': holder,
           'AR': holder,
           'elongation': holder,
           'boundedness': holder,
           'fractal_dim': holder,
           'trappedness': holder,
           'efficiency': holder,
           'straightness': holder,
           'MSD_ratio': holder,
           'frames': holder,
           'X': holder,
           'Y': holder,
           'Quality': holder,
           'Mean_Intensity': holder,
           'SN_Ratio': holder,
           'Deff1': holder,
           'Deff2': holder,
           'length': holder}

    datai = pd.DataFrame(data=die)

    trackids = dframe.Track_ID.unique()
    partcount = trackids.shape[0]

    for particle in range(0, partcount):
        single_track_masked =\
         dframe.loc[dframe['Track_ID'] ==
                    trackids[particle]].sort_values(['Track_ID', 'Frame'],
                                                    ascending=[
                                                    1,
                                                    1]).reset_index(drop=True)
        single_track = features.unmask_track(single_track_masked)
        datai['length'][particle] = len(single_track)
        print(len(single_track))
        (datai['alpha'][particle],
         datai['D_fit'][particle]) = features.alpha_calc(single_track)
        
        datai['kurtosis'][particle] = features.kurtosis(single_track)
        (eig1, eig2, datai['asymmetry1'][particle],
         datai['asymmetry2'][particle],
         datai['asymmetry3'][particle]) = features.asymmetry(single_track)
        (datai['AR'][particle], datai['elongation'][particle],
         (datai['X'][particle],
          datai['Y'][particle])) = features.aspectratio(single_track)
        (datai['boundedness'][particle], datai['fractal_dim'][particle],
         datai['trappedness'][particle]) = features.boundedness(single_track, framerate)
        (datai['efficiency'][particle],
         datai['straightness'][particle]) = features.efficiency(single_track)
        datai['frames'][particle] = single_track.shape[0]
        if single_track['Frame'][single_track.shape[0]-2] > 2:
            datai['MSD_ratio'][particle] = features.msd_ratio(single_track, 2,
                                                     single_track['Frame'][
                                                      single_track.shape[0]-2])
        else:
            datai['MSD_ratio'][particle] = np.nan

        try:
            datai['Deff1'][particle] = single_track['MSDs'][frame[0]] / (4*frame[0])
        except:
            datai['Deff1'][particle] = np.nan

        try:
            datai['Deff2'][particle] = single_track['MSDs'][frame[1]] / (4*frame[1])
        except:
            datai['Deff2'][particle] = np.nan

        datai['Mean_Intensity'][particle] = np.nanmean(single_track[
              'Mean_Intensity'].replace([np.inf, -np.inf], np.nan).dropna(how="all").values)
        datai['Quality'][particle] = np.nanmean(single_track[
              'Quality'].replace([np.inf, -np.inf], np.nan).dropna(how="all").values)
        datai['SN_Ratio'][particle] = np.nanmean(single_track[
              'SN_Ratio'].replace([np.inf, -np.inf], np.nan).dropna(how="all").values)

    if mean_values:
        nonnum = ['Track_ID']
        for col in datai.columns:
            if col not in nonnum:
                datai['Mean ' + col] = np.nan
                datai['Std ' + col] = np.nan

        for xrange in range(0, 16):
            for yrange in range(0, 16):
                bitesize = datai[(datai['X'] >= 128*xrange) & (datai['X'] < 128*(xrange+1)) &
                                 (datai['Y'] >= 128*yrange) & (datai['Y'] < 128*(yrange+1))]
                bitesize.replace([np.inf, -np.inf], np.nan)
                print(bitesize.shape)
                for col in bitesize.columns:
                    if col not in nonnum and 'Mean' not in col and 'Std' not in col:
                        datai['Mean '+ col][bitesize.index] = np.nanmean(bitesize[col])
                        datai['Std '+ col][bitesize.index] = np.nanstd(bitesize[col])

    return datai

In [None]:
filelist

In [None]:
for file in filelist:
    df = pd.read_csv(msd_path+file)
    feat_df = calculate_features(df)
    file_name = 'features' + file[3:]
    print(file)
    print(file_name)
    print()
    feat_df.to_csv(msd_path+file_name)