In [1]:
import numpy as np
import pandas as pd
import numpy.ma as ma
from scipy.optimize import curve_fit

In [2]:
import sys
sys.path.insert(0, '/home/ubuntu/source/diff-classifier/diff_classifier/')

from utils import csv_to_pd, partition_im, nth_diff

In [3]:
data = csv_to_pd('/home/ubuntu/test2.csv').sort_values(['Track_ID', 'Frame'], ascending=[1, 1]).reset_index(drop=True)

In [4]:
#test = data.query('Track_ID=0')
#data
single_track = data.loc[data['Track_ID'] == 4].sort_values(['Track_ID', 'Frame'], ascending=[1, 1]).reset_index(drop=True)

In [13]:
np.zeros?

In [16]:
d = {'col1': [1, 2, 3, 4, 5]}
df = pd.DataFrame(data=d)
nth_diff(df['col1'], 1)

0    1
1    1
2    1
3    1
Name: col1, dtype: int64

In [5]:
def nth_diff(dataframe, n = 1):
    """
    nth_diff(dataframe, n=int)
    
    Returns a new vector of size N - n containing the nth difference between vector elements.
    
    Parameters
    ----------
    dataframe : pandas column of floats or ints
        input data on which differences are to be calculated.
    n : int, default is 1
        Function calculated x(i) - x(i - n) for all values in pandas column
    
    Returns
    ----------
    diff : pandas column
        Pandas column of size N - n, where N is the original size of dataframe.
    
    Examples
    ----------
    >>> d = {'col1': [1, 2, 3, 4, 5]}
    >>> df = pd.DataFrame(data=d)
    >>> nth_diff(df['col1'], 1)
    0   1
    1   1
    2   1
    3   1
    Name: col1, dtype: int64
    
    >>> nth_diff(df['col1'], 2)
    0   2
    1   2
    2   2
    Name: col1, dtype: int64
    """
    test1 = dataframe[:-n].reset_index(drop=True)
    test2 = dataframe[n:].reset_index(drop=True)
    diff = test2 - test1
    return diff

In [6]:
def msd_calc(track):
    length = track.shape[0]
    msd = np.zeros(length)

    for frame in range(0,length-1):
        #creates array to ignore when particles skip frames.
        inc = ma.masked_where(nth_diff(track['Frame'], n=frame+1) != frame+1, nth_diff(track['Frame'], n=frame+1))

        x = ma.array(np.square(nth_diff(track['X'], n=frame+1)), mask=inc.mask)
        y = ma.array(np.square(nth_diff(track['Y'], n=frame+1)), mask=inc.mask)

        msd[frame+1] = ma.mean(x + y)

    return msd

In [7]:
def all_msds(data):
    trackids = data.Track_ID.unique()
    partcount = trackids.shape[0]
    msds = np.zeros(data.shape[0])

    for particle in range(0, partcount):
        single_track = data.loc[data['Track_ID'] == trackids[particle]].sort_values(['Track_ID', 'Frame'],
                                                                                    ascending=[1, 1]).reset_index(drop=True)
        if particle == 0:
            index1 = 0
            index2 = single_track.shape[0]
        else:
            index1 = index2
            index2 = index1 + single_track.shape[0]
        msds[index1:index2] = msd_calc(single_track)
    return msds

In [None]:
data["MSDs"] = all_msds(data)
#data

In [None]:
single_track = data.loc[data['Track_ID'] == 4].sort_values(['Track_ID', 'Frame'], ascending=[1, 1]).reset_index(drop=True)

In [None]:
d = {'Track_ID': data.Track_ID.unique()}
features = pd.DataFrame(data=d)
features

In [8]:
def alpha_calc(track):

    x = track['Frame']
    y = track['MSDs']

    def msd_alpha(x, a, D):
        return 4*D*(x**a)

    try:
        popt, pcov = curve_fit(msd_alpha, x, y)
        a = popt[0]
        D = popt[1]
    except RuntimeError:
        print('Optimal parameters not found. Print NaN instead.')
        a = np.nan
        D = np.nan
    return a, D

In [None]:
alpha_calc(single_track)

In [9]:
def calculate_features(data):
    trackids = data.Track_ID.unique()
    partcount = trackids.shape[0]
    alphas = np.zeros(trackids.shape[0])
    Ds = np.zeros(trackids.shape[0])
    
    data["MSDs"] = all_msds(data)

    for particle in range(0, partcount-1):
        single_track = data.loc[data['Track_ID'] == trackids[particle]].sort_values(['Track_ID', 'Frame'],
                                                                                    ascending=[1, 1]).reset_index(drop=True)
        alphas[particle], Ds[particle] = alpha_calc(single_track)
    return alphas, Ds

In [10]:
a, D = calculate_features(data)

Optimal parameters not found. Print NaN instead.


In [11]:
a

array([1.77478457, 1.26710039, 6.62459292,        nan, 0.        ])

In [12]:
D

array([4.67616702e+00, 5.69785643e+00, 1.37613068e-07,            nan,
       0.00000000e+00])

In [None]:
single_track = data.loc[data['Track_ID'] == 4].sort_values(['Track_ID', 'Frame'],
                                                           ascending=[1, 1]).reset_index(drop=True)

In [None]:
single_track

In [None]:
msd = msd_calc(single_track)

In [None]:
data.shape

In [None]:
trackids = data.Track_ID.unique()
partcount = trackids.shape[0]
msds = np.zeros(data.shape[0])

In [None]:
msds

In [None]:
total_spots = data.shape[0]
inc = np.diff(data['Frame'])
skip = np.zeros(inc.shape[0])
same_track = np.diff(data['Track_ID'])
counter = 0
for i in range(0, total_spots-1):
    if inc[i] != 1 and same_track[i] ==0:
        skip[i] = 1
        counter = counter + 1
        print(i)

In [None]:
print(same_track[70:80])
print(inc[70:80])

In [None]:
data.loc[80]

In [None]:
data.loc[77] = data.loc[83]

In [None]:
test = np.zeros(5)
test[3] = 1
test
test2 = test+1

In [None]:
test1 = ma.masked_where(test != 0, test)


In [None]:
test3 = ma.array(test2, mask=test1.mask)
test3

In [None]:
np.mean(np.square(np.diff(single_track['X'], n=2)) + np.square(np.diff(single_track['Y'], n=2)))

In [None]:
frame = 3

np.square(nth_diff(single_track['X'], n=frame+1))

In [None]:
frame=0
np.diff(single_track['Frame'], n=frame+1)

In [None]:
msd

In [None]:
single_track['Frame']

In [None]:
msd

In [None]:
data