In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append("../")

from utils.io import read_parameters

In [None]:
parameter_file  = "../local/parameters.yml"
parameters = read_parameters(parameter_file)   

base_folder = parameters["base_folder"]
tracking_data_files = parameters["tracking_data"]

In [None]:
parameters

# load data

In [None]:
tracking_data_df = pd.DataFrame() 
column_dtypes = {'TRACK_ID': 'int16', 
                 'FRAME': 'int16', 
                             'POSITION_X' : 'float16',
                             'POSITION_Y' : 'float16',
                             'POSITION_T' : 'float16'}
print(list(column_dtypes))

for condition in tracking_data_files:
    for filename in tracking_data_files[condition]:
        print(filename)
        data = pd.read_csv(base_folder + "/"+ filename, low_memory=False).drop([0,1,2])
        #print(data.dtypes)
        data_ = data[list(column_dtypes)]
        #data_["condition"] = condition
        #data_["filename"] = filename
        data_.insert(0, "filename", filename)
        data_.insert(0, "condition", condition)
        data_ = data_.astype(column_dtypes)
        data_ = data_.sort_values(by= "FRAME")
        for track_id in data_["TRACK_ID"].unique():
            single_track_df = data_[data_["TRACK_ID"]==track_id]
            track_length = len(single_track_df.index) 
            start_frame = single_track_df["FRAME"].min()
            end_frame = single_track_df["FRAME"].max()
            #if track_length < end_frame - start_frame + 1:
            #    print("Track: ", track_id, "with length ", track_length, " has a gap")
            #    print(np.array(single_track_df["FRAME"]))
            #else: 
            #    print("Track: ", track_id, "with length ", track_length," has no gap")
            #    print(np.array(single_track_df["FRAME"]))
            start_x = np.array(single_track_df["POSITION_X"])[0]
            start_y = np.array(single_track_df["POSITION_Y"])[0]
            data_.loc[data_.TRACK_ID == track_id, "START_X"] = start_x
            data_.loc[data_.TRACK_ID == track_id, "START_Y"] = start_y
            #data_.insert(2, "START_Y", start_y)
            #data_.insert(2, "START_X", start_x)
            
            if track_length < parameters["min_track_length"]:
                data_ = data_[data_["TRACK_ID"] != track_id ]
        print("##################")
        if len(tracking_data_df.index) > 10:
            tracking_data_df = tracking_data_df.append(data_)
        else:
            tracking_data_df = data_.copy()

In [None]:
tracking_data_df

In [None]:
# plot info for a single track from a single file
test = tracking_data_df[tracking_data_df["TRACK_ID"] ==0].sort_values(by= "FRAME")
test = test[test["filename"] == "/siScr/SUM_230420_siScr_20dyn_TrackMate.csv"]
display(test)
len(test.index)

In [None]:
for filename in tracking_data_df["filename"].unique():
    tracking_data_df_ = tracking_data_df[tracking_data_df["filename"] == filename]
    fig, ax = plt.subplots(figsize=(20,10))
    sns.scatterplot(data = tracking_data_df_, x = "FRAME", y = "TRACK_ID")
    ax.set_title(filename)

In [None]:
for filename in tracking_data_df["filename"].unique():
    tracking_data_df_ = tracking_data_df[tracking_data_df["filename"] == filename]
    fig, ax = plt.subplots(figsize=(20,10))
    tracking_data_df_[["FRAME","TRACK_ID"]].groupby("FRAME").count().plot(ax =ax)
    ax.set_title(filename)

In [None]:
tracking_data_df["ORIGIN_X"] = tracking_data_df["POSITION_X"] - tracking_data_df["START_X"] 
tracking_data_df["ORIGIN_Y"] = tracking_data_df["POSITION_Y"] - tracking_data_df["START_Y"] 

In [None]:
phase_1_data_df = tracking_data_df[tracking_data_df["FRAME"] < parameters["end_phase_1"]]
phase_2_data_df = tracking_data_df[tracking_data_df["FRAME"] > parameters["start_phase_2"]]

In [None]:
for filename in phase_1_data_df["filename"].unique():
    tracking_data_df_ = phase_1_data_df[phase_1_data_df["filename"] == filename]

    #tracking_data_df_ = tracking_data_df_.sort_values(by="TRACK_ID")
    #sub_tracking_data_df = tracking_data_df_.iloc[::100, :]
    fig, ax = plt.subplots(figsize=(10,10))
    counter = 0
    for track_id in tracking_data_df_["TRACK_ID"].unique():
        if counter % 20 == 0 and counter < 1000:
            single_track_df = tracking_data_df_[tracking_data_df_["TRACK_ID"]==track_id]
            ax.plot(single_track_df["ORIGIN_X"],single_track_df["ORIGIN_Y"])
        counter += 1
        
    ax.set_xlim(-100,100)
    ax.set_ylim(-100,100)
    ax.set_title(filename)
    plt.savefig("collapsed_trajectories_%s.png" % str(filename).split("/")[2])

In [None]:
#for filename in phase_1_data_df["filename"].unique():
#    tracking_data_df_ = phase_1_data_df[phase_1_data_df["filename"] == filename]



for condition in phase_1_data_df["condition"].unique():
    condition_data_df = phase_1_data_df[phase_1_data_df["condition"] == condition]
    #tracking_data_df_ = tracking_data_df_.sort_values(by="TRACK_ID")
    #sub_tracking_data_df = tracking_data_df_.iloc[::100, :]
    fig, ax = plt.subplots(figsize=(10,10))
    counter = 0
    condition_data_df = condition_data_df[condition_data_df["filename"] != "/siScr/SUM_230420_siScr_20dyn_TrackMate.csv"]
    condition_data_df = condition_data_df[condition_data_df["filename"] != "/siRac1/SUM_230420_siRac1_TrackMate.csv"]
    
    for filename in condition_data_df["filename"].unique():
        tracking_data_df_ = condition_data_df[condition_data_df["filename"] == filename ]
        for track_id in tracking_data_df_["TRACK_ID"].unique():
            if counter % 3 == 0 and counter < 1500:
                single_track_df = tracking_data_df_[tracking_data_df_["TRACK_ID"]==track_id]
                if len(single_track_df.index) >= parameters["end_phase_1"]:
                    ax.plot(single_track_df["ORIGIN_X"],single_track_df["ORIGIN_Y"])
            counter += 1
        
    ax.set_xlim(-50,50)
    ax.set_ylim(-50,50)
    ax.set_title(condition)
    plt.savefig("collapsed_trajectories_%s.png" % condition)
            
        

In [None]:
for condition in phase_2_data_df["condition"].unique():
    condition_data_df = phase_2_data_df[phase_2_data_df["condition"] == condition]
    #tracking_data_df_ = tracking_data_df_.sort_values(by="TRACK_ID")
    #sub_tracking_data_df = tracking_data_df_.iloc[::100, :]
    fig, ax = plt.subplots(figsize=(10,10))
    counter = 0
    condition_data_df = condition_data_df[condition_data_df["filename"] != "/siScr/SUM_230420_siScr_20dyn_TrackMate.csv"]
    condition_data_df = condition_data_df[condition_data_df["filename"] != "/siRac1/SUM_230420_siRac1_TrackMate.csv"]
    
    for filename in condition_data_df["filename"].unique():
        tracking_data_df_ = condition_data_df[condition_data_df["filename"] == filename ]
        for track_id in tracking_data_df_["TRACK_ID"].unique():
            if counter % 10 == 0 and counter < 1500:
                single_track_df = tracking_data_df_[tracking_data_df_["TRACK_ID"]==track_id]
                ax.plot(single_track_df["ORIGIN_X"],single_track_df["ORIGIN_Y"])
            counter += 1
        
    ax.set_xlim(-75,75)
    ax.set_ylim(-75,75)
    ax.set_title(condition)
    plt.savefig("collapsed_trajectories_%s.png" % condition)

# Compute migration velocities

In [None]:
#migration_speed_df = pd.DataFrame(columns=tracking_data_df.columns)
migration_speed_df = pd.DataFrame()

interval = parameters["time_lag"]
for filename in tracking_data_df["filename"].unique():
    tracks_df_ = tracking_data_df[tracking_data_df["filename"] == filename]
    tracks_df = tracks_df_[["TRACK_ID", "POSITION_X", "POSITION_Y", "POSITION_T", "FRAME"]]
    
    print(filename)
    status = 0
    tracks_num = len(tracks_df["TRACK_ID"].unique())
    
    for track_id in tracks_df["TRACK_ID"].unique():

        single_track_df = tracks_df[tracks_df ["TRACK_ID"]==track_id]
        single_track_df = single_track_df.sort_values(by="FRAME")
        dist = single_track_df.diff(interval).fillna(0.)
        dist["time_in_h"] = dist["FRAME"]*5.0/60.0

        single_track_df["step_size"] = np.sqrt(dist.POSITION_X**2 + dist.POSITION_Y**2)
        single_track_df["step_size_x"] = dist.POSITION_X
        single_track_df["step_size_y"] =  dist.POSITION_Y
        single_track_df["vel"] = np.sqrt(dist.POSITION_X**2 + dist.POSITION_Y**2)/dist.FRAME
        single_track_df["vel_x"] = dist.POSITION_X/dist.FRAME
        single_track_df["vel_y"] =  dist.POSITION_Y/dist.FRAME
        single_track_df["vel_mu_per_h"] = np.sqrt(dist.POSITION_X**2 + dist.POSITION_Y**2)/dist.time_in_h
        single_track_df["vel_x_mu_per_h"] = dist.POSITION_X/dist.time_in_h
        single_track_df["vel_y_mu_per_h"] =  dist.POSITION_Y/dist.time_in_h
        
        single_track_df["phi"] =  np.arctan2(dist.POSITION_Y,-dist.POSITION_X)*180.0/np.pi

        single_track_df["filename"] = filename
        single_track_df["condition"] = tracks_df_["condition"].iloc[0]
        
        single_track_df["time_in_h"] = single_track_df["FRAME"]*5.0/60.0
        
        #print("Track ID %s" % track_id)
        #print(single_track_df.head())
        if len(migration_speed_df.index) > 1:
            migration_speed_df = migration_speed_df.append(single_track_df, ignore_index=True)
        else:
            migration_speed_df = single_track_df.copy()
        
        #migration_speed_df = migration_speed_df.append(single_track_df, ignore_index=True)
        
        status +=1 
        if status % 500 == 0:
            print("%s out of %s tracks analyzed." % (status,tracks_num)) 


In [None]:
migration_speed_df.to_csv("../data/migration_speeds.csv", index = False)

In [None]:
plot_migration_speeds = migration_speed_df.dropna()
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["FRAME"] > interval]
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["filename"] != "/siScr/SUM_230420_siScr_20dyn_TrackMate.csv"]
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["filename"] != "/siRac1/SUM_230420_siRac1_TrackMate.csv"]

#fig, ax = plt.subplots(len(intervals),figsize=(15,30))
fig, ax = plt.subplots(1, figsize=(15,10))

sns.lineplot(x = "time_in_h", y = "vel_x_mu_per_h", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 

sns.lineplot(x = "time_in_h", y = "vel_x_mu_per_h", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 


#sns.lineplot(x = "FRAME", y = "vel_x", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 
#interval_length_min = 5*intervals[i]
#ax.set_title("time interval %s min" % interval_length_min)
ax.set_ylabel("velocity in microns/h")
ax.set_ylim(-7.0,7.0)
ax.axhline(y = 0.0, color = 'r', linestyle = 'dashed')
plt.savefig("velocity_parallel_to_flow.pdf")

In [None]:
plot_migration_speeds = migration_speed_df.dropna()
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["FRAME"] > interval]
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["filename"] != "/siScr/SUM_230420_siScr_20dyn_TrackMate.csv"]
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["filename"] != "/siRac1/SUM_230420_siRac1_TrackMate.csv"]

#fig, ax = plt.subplots(len(intervals),figsize=(15,30))
fig, ax = plt.subplots(1, figsize=(15,10))

sns.lineplot(x = "time_in_h", y = "vel_x_mu_per_h", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 

sns.lineplot(x = "time_in_h", y = "vel_x_mu_per_h", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 


#sns.lineplot(x = "FRAME", y = "vel_x", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 
#interval_length_min = 5*intervals[i]
#ax.set_title("time interval %s min" % interval_length_min)
ax.set_ylabel("velocity in microns/h")
ax.set_ylim(-7.0,7.0)
ax.axhline(y = 0.0, color = 'r', linestyle = 'dashed')
plt.savefig("velocity_parallel_to_flow.pdf")

In [None]:
plot_migration_speeds = migration_speed_df.dropna()
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["FRAME"] > interval]

#fig, ax = plt.subplots(len(intervals),figsize=(15,30))
fig, ax = plt.subplots(1, figsize=(15,10))

sns.lineplot(x = "time_in_h", y = "vel_x_mu_per_h", hue = "filename", data = plot_migration_speeds, ax=ax, ci = 90) 

#sns.lineplot(x = "FRAME", y = "vel_x", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 
#interval_length_min = 5*intervals[i]
#ax.set_title("time interval %s min" % interval_length_min)
ax.set_ylabel("velocity in microns/h")
ax.set_ylim(-7.0,7.0)
ax.axhline(y = 0.0, color = 'r', linestyle = 'dashed')
plt.savefig("velocity_parallel_to_flow_filename.pdf")

In [None]:
plot_migration_speeds = migration_speed_df.dropna()
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["FRAME"] > interval]
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["filename"] != "/siScr/SUM_230420_siScr_20dyn_TrackMate.csv"]
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["filename"] != "/siRac1/SUM_230420_siRac1_TrackMate.csv"]


#fig, ax = plt.subplots(len(intervals),figsize=(15,30))
fig, ax = plt.subplots(1, figsize=(15,10))

sns.lineplot(x = "time_in_h", y = "vel_x_mu_per_h", hue = "filename", data = plot_migration_speeds, ax=ax, ci = 90) 

#sns.lineplot(x = "FRAME", y = "vel_x", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 
#interval_length_min = 5*intervals[i]
#ax.set_title("time interval %s min" % interval_length_min)
ax.set_ylabel("velocity in microns/h")
ax.set_ylim(-7.0,7.0)
ax.axhline(y = 0.0, color = 'r', linestyle = 'dashed')
plt.savefig("velocity_parallel_to_flow_filename.pdf")

In [None]:
plot_migration_speeds = migration_speed_df.dropna()
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["FRAME"] > interval]

#fig, ax = plt.subplots(len(intervals),figsize=(15,30))
fig, ax = plt.subplots(1, figsize=(15,10))

sns.lineplot(x = "time_in_h", y = "vel_mu_per_h", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 
#sns.lineplot(x = "FRAME", y = "vel", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 

#interval_length_min = 5*intervals[i]
#ax.set_title("time interval %s min" % interval_length_min)
ax.set_ylabel("velocity in microns/h")
ax.set_ylim(0,10.0)
ax.axhline(y = 0.0, color = 'r', linestyle = 'dashed')
#ax.set_ylim(0.0,1.0)
plt.savefig("velocity.pdf")

In [None]:
plot_migration_speeds = migration_speed_df.dropna()
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["FRAME"] > interval]
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["filename"] != "/siScr/SUM_230420_siScr_20dyn_TrackMate.csv"]
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["filename"] != "/siRac1/SUM_230420_siRac1_TrackMate.csv"]

#fig, ax = plt.subplots(len(intervals),figsize=(15,30))
fig, ax = plt.subplots(1, figsize=(15,10))

sns.lineplot(x = "time_in_h", y = "vel_mu_per_h", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 
#sns.lineplot(x = "FRAME", y = "vel", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 

#interval_length_min = 5*intervals[i]
#ax.set_title("time interval %s min" % interval_length_min)
ax.set_ylabel("velocity in microns/h")
ax.set_ylim(0,10.0)
ax.axhline(y = 0.0, color = 'r', linestyle = 'dashed')
#ax.set_ylim(0.0,1.0)
plt.savefig("velocity.pdf")

In [None]:
plot_migration_speeds = migration_speed_df.dropna()
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["FRAME"] > interval]
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["filename"] != "/siScr/SUM_230420_siScr_20dyn_TrackMate.csv"]
plot_migration_speeds = plot_migration_speeds[plot_migration_speeds["filename"] != "/siRac1/SUM_230420_siRac1_TrackMate.csv"]

#fig, ax = plt.subplots(len(intervals),figsize=(15,30))
fig, ax = plt.subplots(1, figsize=(15,10))

sns.lineplot(x = "time_in_h", y = "vel_y_mu_per_h", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 

#sns.lineplot(x = "FRAME", y = "vel_y", hue = "condition", data = plot_migration_speeds, ax=ax, ci = 90) 
#interval_length_min = 5*intervals[i]
#ax.set_title("time interval %s min" % interval_length_min)
ax.set_ylabel("velocity in microns/h")
ax.set_ylim(-7.0,7.0)
ax.axhline(y = 0.0, color = 'r', linestyle = 'dashed')

plt.savefig("velocity_perpendicular_to_flow.pdf")

In [None]:
start_tracks = dict()

for filename in tracking_data_df["filename"].unique():
    tracks_df_ = tracking_data_df[tracking_data_df["filename"] == filename]
    tracks_df = tracks_df_[["TRACK_ID", "POSITION_X", "POSITION_Y", "POSITION_T", "FRAME"]]
     
    for track_id in tracks_df["TRACK_ID"].unique():
        single_track_df = tracks_df[tracks_df ["TRACK_ID"]==track_id]
        single_track_df
        
        