In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import sys
from statannot import add_stat_annotation
sys.path.append("../")

from utils.io import read_parameters

In [None]:
plt.rcParams.update({'font.size': 18})

parameter_file  = "../local/parameters.yml"
parameters = read_parameters(parameter_file)   

base_folder = parameters["base_folder"]
tracking_data_files = parameters["tracking_data"]
data_exclude = parameters["data_exclude"]
output_folder = parameters["output_folder"]

single_time_point_evaluation = parameters["single_time_point_evaluation"]

interval = parameters["time_lag"]
#hue_order = ["siScr", "siCdc42", "siRac1"]
hue_order = ["siCtrl", "siSmad4", "siAlk1"]

decimal_places = parameters["decimal_places"]

In [None]:
parameters

# load data

In [None]:
tracking_data_df = pd.DataFrame() 

column_dtypes = {'TRACK_ID': 'int16', 
                 'FRAME': 'int16', 
                             'POSITION_X' : 'float16',
                             'POSITION_Y' : 'float16',
                             'POSITION_T' : 'float32'}

print(list(column_dtypes))

for condition in tracking_data_files:
    for filename in tracking_data_files[condition]:
        
        if filename in data_exclude:
            print("Excluding file: ", filename)
            print("##################")
            continue
        
        print("Processing file: ", filename)
        data = pd.read_csv(base_folder + "/"+ filename, low_memory=False).drop([0,1,2])

        data_ = data[list(column_dtypes)]

        data_.insert(0, "filename", filename)
        data_.insert(0, "condition", condition)
        
        data_ = data_.astype(column_dtypes)
        data_ = data_.sort_values(by= "FRAME")
        
        for track_id in data_["TRACK_ID"].unique():
            single_track_df = data_[data_["TRACK_ID"]==track_id]
            track_length = len(single_track_df.index) 
            start_frame = single_track_df["FRAME"].min()
            end_frame = single_track_df["FRAME"].max()
            ### uncomment to check for gaps
            #if track_length < end_frame - start_frame + 1:
            #    print("Track: ", track_id, "with length ", track_length, " has a gap")
            #    print(np.array(single_track_df["FRAME"]))
            #else: 
            #    print("Track: ", track_id, "with length ", track_length," has no gap")
            #    print(np.array(single_track_df["FRAME"]))
            ###
            start_x = np.array(single_track_df["POSITION_X"])[0]
            start_y = np.array(single_track_df["POSITION_Y"])[0]
            data_.loc[data_.TRACK_ID == track_id, "START_X"] = start_x
            data_.loc[data_.TRACK_ID == track_id, "START_Y"] = start_y
            
            if track_length < parameters["min_track_length"]:
                data_ = data_[data_["TRACK_ID"] != track_id ]
                
            #print(track_id)
                
        print("##################")
        if len(tracking_data_df.index) > 10:
            #tracking_data_df = tracking_data_df.append(data_)
            tracking_data_df = pd.concat([tracking_data_df, data_], ignore_index = True)
        else:
            tracking_data_df = data_.copy()
            
        del data_
        del data

In [None]:
### for trajectory plots            
tracking_data_df["ORIGIN_X"] = tracking_data_df["POSITION_X"] - tracking_data_df["START_X"] 
tracking_data_df["ORIGIN_Y"] = tracking_data_df["POSITION_Y"] - tracking_data_df["START_Y"] 
tracking_data_df["ORIGIN_L"] = np.sqrt(tracking_data_df["ORIGIN_X"]**2 + tracking_data_df["ORIGIN_Y"]**2)

In [None]:
display(tracking_data_df)

In [None]:
tracking_data_df.to_csv(output_folder + "tracking_data.csv")

# Quality check: plot abundance and length of trajectories

In [None]:
for filename in tracking_data_df["filename"].unique():
    tracking_data_df_ = tracking_data_df[tracking_data_df["filename"] == filename]
    fig, ax = plt.subplots(figsize=(20,10))
    sns.scatterplot(data = tracking_data_df_, x = "FRAME", y = "TRACK_ID")
    ax.set_title(filename)

In [None]:
for filename in tracking_data_df["filename"].unique():
    tracking_data_df_ = tracking_data_df[tracking_data_df["filename"] == filename]
    fig, ax = plt.subplots(figsize=(20,10))
    tracking_data_df_[["FRAME","TRACK_ID"]].groupby("FRAME").count().plot(ax =ax)
    ax.set_title(filename)

# Compute migration velocities

In [None]:
migration_speed_df = pd.DataFrame()


interval = parameters["time_lag"]
for filename in tracking_data_df["filename"].unique():
    tracks_df_ = tracking_data_df[tracking_data_df["filename"] == filename]
    tracks_df = tracks_df_[["TRACK_ID", "POSITION_X", "POSITION_Y", "POSITION_T", "FRAME", "ORIGIN_X", "ORIGIN_Y"]]
    
    print(filename)
    status = 0
    tracks_num = len(tracks_df["TRACK_ID"].unique())
    
    for track_id in tracks_df["TRACK_ID"].unique():

        single_track_df = tracks_df[tracks_df ["TRACK_ID"]==track_id]
        single_track_df = single_track_df.sort_values(by="FRAME")
        dist = single_track_df.diff(interval).fillna(0.)
        dist["time_in_h"] = dist["POSITION_T"]/3600.0

        single_track_df["step_size"] = np.round(np.sqrt(dist.POSITION_X**2 + dist.POSITION_Y**2),decimal_places)
        single_track_df["step_size_x"] = np.round(dist.POSITION_X,decimal_places)
        single_track_df["step_size_y"] =  np.round(dist.POSITION_Y,decimal_places)
        single_track_df["vel_mu_per_h"] = np.round(np.sqrt(dist.POSITION_X**2 + dist.POSITION_Y**2)/dist.time_in_h,decimal_places)
        single_track_df["vel_x_mu_per_h"] = np.round(dist.POSITION_X/dist.time_in_h,decimal_places)
        single_track_df["vel_y_mu_per_h"] =  np.round(dist.POSITION_Y/dist.time_in_h,decimal_places)
        
        single_track_df["phi"] =  np.round(np.arctan2(dist.POSITION_Y,-dist.POSITION_X)*180.0/np.pi,decimal_places)

        single_track_df["filename"] = filename
        single_track_df["condition"] = tracks_df_["condition"].iloc[0]
        
        single_track_df["time_in_h"] = np.round(single_track_df["POSITION_T"]/3600.0,decimal_places)
        
        if len(migration_speed_df.index) > 1:
            migration_speed_df = pd.concat( [migration_speed_df, single_track_df], ignore_index=True)
        else:
            migration_speed_df = single_track_df.copy()
        
        status +=1 
        if status % 500 == 0:
            print("%s out of %s tracks analyzed." % (status,tracks_num)) 

# save velocity computations

In [None]:
migration_speed_df.to_csv(output_folder + "migration_speeds_time_lag_%s.csv" % interval, index = False)