In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from os import listdir, getcwd, chdir
from os.path import isfile, join

from diff_classifier.features import alpha_calc, unmask_track
from diff_predictor.utils import plot_msd_comparisons, plot_individual_msds, plot_particles_in_frame

import scipy.stats as stats
from scipy.optimize import curve_fit
import numpy.ma as ma
import lmfit

In [None]:
workbookDir = getcwd()

print('Current Notebook Dir: ' + workbookDir)
chdir(workbookDir) # Go to current workbook Dir"
chdir('..')        # Go up one
print(f'Using current directory for loading data: {getcwd()}')
workbookDir = getcwd()

# Age Data

In [None]:
age_msd_path = workbookDir + '/raw_data_age/'
age_feature_path = workbookDir + '/data/raw_data_age/'

age_msd_filelist_70 = [f for f in listdir(age_msd_path) if isfile(join(age_msd_path, f)) and 'P70' in f]
age_msd_filelist_14 = [f for f in listdir(age_msd_path) if isfile(join(age_msd_path, f)) and 'P14' in f]
age_msd_filelist_35 = [f for f in listdir(age_msd_path) if isfile(join(age_msd_path, f)) and 'P35' in f]
age_msd_filelist = age_msd_filelist_70 + age_msd_filelist_14 + age_msd_filelist_35

age_feature_filelist_70 = [f for f in listdir(age_feature_path) if isfile(join(age_feature_path, f)) and 'features_P70' in f ]
age_feature_filelist_14 = [f for f in listdir(age_feature_path) if isfile(join(age_feature_path, f)) and 'features_P14' in f ]
age_feature_filelist_35 = [f for f in listdir(age_feature_path) if isfile(join(age_feature_path, f)) and 'features_P35' in f ]

age_feature_filelist = age_feature_filelist_14 + age_feature_filelist_35 + age_feature_filelist_70

print(len(age_msd_filelist))
print(len(age_feature_filelist))

## Scipy curvefit

In [None]:
perr_alph = []
mag_alph = []
perr_dcoef = []
mag_coef = []

high_error_trajs = []

for i in range(len(age_msd_filelist)):

    
    msd_df = pd.read_csv(age_msd_path + age_msd_filelist[i])
    trackids = msd_df['Track_ID'].unique()
    partcount = trackids.shape[0]
    for particle in range(0, partcount):

        single_track_masked = msd_df.loc[msd_df['Track_ID'] == trackids[particle]].sort_values(['Track_ID', 'Frame'], ascending=[1,1]).reset_index(drop=True)
        single_track = unmask_track(single_track_masked)
        xpos = single_track['MSDs']
        ypos = single_track['Frame']

        def msd_alpha(xpos, alph, dcoef):
                return 4*dcoef*(xpos**alph)

        try:
            popt, pcov = curve_fit(msd_alpha, xpos, ypos)
            alph = popt[0]
            dcoef = popt[1]
            perr = np.sqrt(np.diag(pcov))
            perr_alph.append(perr[0])
            perr_dcoef.append(perr[1])
            mag_alph.append(alph)
            mag_coef.append(dcoef)

            if perr[0] >= 10:
                high_error_trajs.append((age_msd_filelist[i], alph, dcoef, perr[0]))
        except RuntimeError:
            print('Optimal parameters not found. Print NaN instead.')
            alph = np.nan
            dcoef = np.nan
        

In [None]:
high_error_trajs

## LMFIT

In [None]:
perr_alph = []
perr_dcoef = []

for i in range(len(age_msd_filelist)):

    
    msd_df = pd.read_csv(age_msd_path + age_msd_filelist[i])
    trackids = msd_df['Track_ID'].unique()
    partcount = trackids.shape[0]
    for particle in range(0, partcount):

        single_track_masked = msd_df.loc[msd_df['Track_ID'] == trackids[particle]].sort_values(['Track_ID', 'Frame'], ascending=[1,1]).reset_index(drop=True)
        single_track = unmask_track(single_track_masked)
        xpos = single_track['MSDs']
        ypos = single_track['Frame']

        def msd_alpha(xpos, alph, dcoef):
                return 4*dcoef*(xpos**alph) #math.pow

        msd_model = lmfit.Model(msd_alpha)
        msd_model.set_param_hint('alph', value=1.0, min=0.00001, max=13)
        #msd_model.set_param_hint('dcoef', value=1.0, min=0, max=10)
        #params = msd_model.make_params(alph=1, dcoef=1)
        

        try:
            model_result = msd_model.fit(ypos, xpos=xpos, alph=1, dcoef=1)
            print(model_result.model_pars)
            
        except RuntimeError:
        #except ValueError:
            print('Optimal parameters not found. Print NaN instead.')
            alph = np.nan
            dcoef = np.nan

In [None]:
perr_alph_arr = np.array(perr_alph)
perr_alph_arr = perr_alph_arr[perr_alph_arr != np.inf]
plt.hist((perr_alph_arr), bins=5000)
plt.xlabel('One Standard Deviation Error of Alpha')
plt.ylabel('Count')
plt.title('Distribution of error for alpha curve fitting')

In [None]:
big_err = perr_alph_arr[perr_alph_arr >=2]
big_err

In [None]:
plt.scatter(mag_alph, perr_alph, alpha=0.5)
plt.xlabel('Magnitude of Alpha value')
plt.ylabel('One standard deviation error')
plt.title('Magnitude of Alpha versus Error, Age Dataset')

In [None]:
perr_dcoef_arr = np.array(perr_dcoef)
perr_dcoef_arr = perr_dcoef_arr[perr_dcoef_arr != np.inf]
plt.hist(perr_dcoef_arr)

In [None]:
popt

In [None]:
alpha, coef = alpha_calc(single_track)

# Region Data

In [None]:
region_dataset_path = workbookDir + '/data/region_feature_folder/'
region_filelist = [f for f in listdir(region_dataset_path) if isfile(join(region_dataset_path, f)) and 'feat' in f]

region_msd_path = workbookDir + '/data/raw_data_region/'
region_msd_filelist = [f for f in listdir(region_msd_path) if isfile(join(region_msd_path, f)) and 'msd' in f]
print(len(region_filelist))
print(len(region_msd_filelist))

In [None]:
perr_alph = []
mag_alph = []
perr_dcoef = []
mag_dcoef = []

reg_high_error_trajs = []

for i in range(len(region_msd_filelist)):

    
    msd_df = pd.read_csv(region_msd_path + region_msd_filelist[i])
    trackids = msd_df['Track_ID'].unique()
    partcount = trackids.shape[0]
    for particle in range(0, partcount):

        single_track_masked = msd_df.loc[msd_df['Track_ID'] == trackids[particle]].sort_values(['Track_ID', 'Frame'], ascending=[1,1]).reset_index(drop=True)
        single_track = unmask_track(single_track_masked)
        xpos = single_track['MSDs']
        ypos = single_track['Frame']

        def msd_alpha(xpos, alph, dcoef):
                return 4*dcoef*(xpos**alph)

        try:
            popt, pcov = curve_fit(msd_alpha, xpos, ypos)
            alph = popt[0]
            dcoef = popt[1]
            perr = np.sqrt(np.diag(pcov))
            perr_alph.append(perr[0])
            perr_dcoef.append(perr[1])
            mag_alph.append(alph)
            mag_dcoef.append(dcoef)
            if perr[0] >= 10:
                high_error_trajs.append((age_msd_filelist[i], alph, dcoef, perr[0]))
            
        except RuntimeError:
            print('Optimal parameters not found. Print NaN instead.')
            alph = np.nan
            dcoef = np.nan
        

In [None]:
reg_high_error_trajs

In [None]:
np.array(mag_alph).min()

In [None]:
plt.scatter((np.array(mag_alph)), (np.array(perr_alph)), alpha=0.5)
plt.xlabel('Magnitude of Alpha value')
plt.ylabel('One standard deviation error')
plt.title('Magnitude of Alpha versus Error, Region Dataset')

In [None]:
perr_alph_arr = np.array(perr_alph)
perr_alph_arr = perr_alph_arr[perr_alph_arr != np.inf]
plt.hist((perr_alph_arr), bins=5000)

In [None]:
big_err = perr_alph_arr[perr_alph_arr >=5]
len(big_err)

# Treatment

In [None]:
treatment_msd_path = workbookDir + '/raw_data_pnn/'
treatment_feature_path = workbookDir + '/data/ecm_feature_folder/'

treatment_msd_filelist_70 = [f for f in listdir(treatment_msd_path) if isfile(join(treatment_msd_path, f)) and 'NT' in f]
treatment_msd_filelist_14 = [f for f in listdir(treatment_msd_path) if isfile(join(treatment_msd_path, f)) and 'ChABC' in f]
treatment_msd_filelist = treatment_msd_filelist_70 + treatment_msd_filelist_14

treatment_feature_filelist_70 = [f for f in listdir(treatment_feature_path) if isfile(join(treatment_feature_path, f)) and 'NT' in f ]
treatment_feature_filelist_35 = [f for f in listdir(treatment_feature_path) if isfile(join(treatment_feature_path, f)) and 'ChABC' in f ]

treatment_feature_filelist =  treatment_feature_filelist_70 + treatment_feature_filelist_35

print(len(treatment_msd_filelist))
print(len(treatment_feature_filelist))

In [None]:
perr_alph = []
mag_alph = []
perr_dcoef = []
mag_dcoef = []

for i in range(len(treatment_msd_filelist)):

    
    msd_df = pd.read_csv(treatment_msd_path + treatment_msd_filelist[i])
    trackids = msd_df['Track_ID'].unique()
    partcount = trackids.shape[0]
    for particle in range(0, partcount):

        single_track_masked = msd_df.loc[msd_df['Track_ID'] == trackids[particle]].sort_values(['Track_ID', 'Frame'], ascending=[1,1]).reset_index(drop=True)
        single_track = unmask_track(single_track_masked)
        xpos = single_track['MSDs']
        ypos = single_track['Frame']

        def msd_alpha(xpos, alph, dcoef):
                return 4*dcoef*(xpos**alph)

        try:
            popt, pcov = curve_fit(msd_alpha, xpos, ypos)
            alph = popt[0]
            dcoef = popt[1]
            perr = np.sqrt(np.diag(pcov))
            perr_alph.append(perr[0])
            perr_dcoef.append(perr[1])
            mag_alph.append(alph)
            mag_dcoef.append(dcoef)
        except RuntimeError:
            print('Optimal parameters not found. Print NaN instead.')
            alph = np.nan
            dcoef = np.nan
        

In [None]:
plt.scatter((np.array(mag_alph)), (np.array(perr_alph)), alpha=0.5)
plt.xlabel('Magnitude of Alpha value')
plt.ylabel('One standard deviation error')
plt.title('Magnitude of Alpha versus Error, Treatment Dataset')