# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Imports-and-readins" data-toc-modified-id="Imports-and-readins-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports and readins</a></div><div class="lev1 toc-item"><a href="#Control-each-step" data-toc-modified-id="Control-each-step-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Control each step</a></div><div class="lev1 toc-item"><a href="#Plot-steps" data-toc-modified-id="Plot-steps-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Plot steps</a></div><div class="lev1 toc-item"><a href="#All-in-one-step" data-toc-modified-id="All-in-one-step-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>All in one step</a></div><div class="lev1 toc-item"><a href="#All-in-loop" data-toc-modified-id="All-in-loop-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>All in loop</a></div>

# Imports and readins

In [1]:
import numpy as np
import pandas as pd
import glob
%matplotlib notebook
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, "/Users/jennavergeynst/OneDrive - UGent/Sharing_code/prepare_toa_for_yaps/")
sys.path.insert(0, "/Users/jennavergeynst/OneDrive - UGent/Ham/functions")
from paths import *

from Prepare_toa_data import *

In [2]:
# read in temperature data (usually with pd.read_csv)
temperature = pd.read_pickle(ABIOTICS_PATH+'all_watertemp_data.pkl')
temperature.index = temperature.index.tz_localize(None) # make tz-naive
temperature = temperature.reset_index()

# read in synchronised detection data of all receivers (usually with pd.read_csv)
#recs_df = pd.read_pickle(INTERIM+'receiverlogs_sync_03.pkl')
recs_df = pd.read_pickle(project_folder+'all_fish/data/positions_synced_03/receiverlogs_official_sync_03.pkl')
recs_df = recs_df.reset_index().rename(columns={'level_0':'rec_name'}).drop(columns='level_1')

# get list of possible receiver names
hydros = pd.read_csv('Receiver_location.csv')
rec_list = list(hydros.station_name)


In [3]:
## FOR 3rd DEPLOYMENT ONLY
recs_dfA = recs_df[(recs_df.synced_time<'2017-04-05  08:47')].sort_values(by='synced_time')
recs_dfB = recs_df[(recs_df.synced_time>'2017-04-05  08:47')].sort_values(by='synced_time')
recs_dfB.loc[recs_df.rec_name=='ST08','rec_name']='ST08-2'

In [4]:
min_burst = 540
max_burst = 660
fill_the_gaps = True # put on true if you want to fill the gaps
dep = 'D3b'
TOA_data_path = 'TOA_data_cleaned_'+dep+'/'
TOA_fig_path = 'TOA_cleaning_figures_'+dep+'/'

In [5]:
# ID = '65040'
# self_receiver = 'ST07'
# # take part of the dataframe that only contains detections of one tag
# tag_data = recs_dfB[recs_dfB.ID == ID].copy()

# Control each step

In [6]:
time_col='synced_time'
temp_time_col='DATETIME'
temp_temp_col='Temp'
temp = temperature
pas_tol = 60
rec_col = 'rec_name'
max_time = 1
min_track_length = 10

In [7]:
mean_burst = str(np.mean((min_burst,max_burst)).round(1))+'s'
tag_data = tag_data.sort_values(by=time_col).reset_index(drop=True)
tag_data = add_soundspeed(tag_data=tag_data, time_col=time_col, temp=temp,
                          temp_time_col=temp_time_col, temp_temp_col=temp_temp_col)

In [8]:
# pas_tol = 5: cut track when a fish is not heard during 5 minutes. Don't make this too small
toa_data = prepare_tag_data(tag_data, time_col=time_col, rec_col=rec_col, max_time=max_time, pas_tol=pas_tol).reset_index()
rec_cols = [x for x in toa_data.columns if x in rec_list]

In [10]:
rec_cols

['ST07',
 'ST08-2',
 'ST09-2',
 'ST10',
 'ST12',
 'ST13',
 'ST14',
 'ST15',
 'ST16',
 'ST17',
 'ST18',
 'ST18-2']

In [11]:
# clean observations that follow eachother faster than the minimum known time delay (allow 10% lower than min)
cleaned_toa_data = clean_toa_data(toa_data,min_delay=0.9*min_burst, rec_cols=rec_cols)

In [12]:
# fill the gaps of pings that were not observed
toa_data_groups = cleaned_toa_data.groupby(by='groups_pas')
filled_toa = {}
for group_key in toa_data_groups.groups.keys():
    toa_part = toa_data_groups.get_group(group_key).copy()
    # only consider a track if it is more than 2 minutes long (120 seconds). Too short tracks will bug YAPS.
    if len(toa_part)>min_track_length:
        result = fill_gaps(toa_part, rec_cols, time_col=time_col, mean_burst=mean_burst)
        filled_toa[group_key] = result


# Some burst intervals are still double of max interval,
# probably due to use of virtual pings and incorrect alignment with real pings.
# Remove the pings creating these intervals
for key in filled_toa.keys():
    # allow interval to be 10% more than max_burst
    errors = filled_toa[key][filled_toa[key].loc[:,rec_cols].mean(axis=1).diff()>1.1*max_burst]
    filled_toa[key].drop(index=errors.index, inplace=True)

In [13]:
# This creates again gaps in the data, so repeat the gaps filling
final_toa = {}
for group_key in filled_toa.keys():
    toa_part = filled_toa[group_key].copy()
    # only consider a track if it is more than min_track_length long. Too short tracks will bug YAPS.
    if len(toa_part)>min_track_length:
        result = fill_gaps(toa_part, rec_cols, time_col=time_col, mean_burst=mean_burst)
        final_toa[group_key] = result
final_toa_df = pd.concat(final_toa).set_index('groups_pas').drop(columns='groups_obs')


In [14]:
final_toa[0][['ST07', 'ST08-2', 'ST09-2', 'ST10', 'ST12', 'ST13', 'ST14',
       'ST15', 'ST16', 'ST17', 'ST18', 'ST18-2']].mean(axis=1).diff().describe()

count    6526.000000
mean      603.913783
std        34.293539
min       543.682461
25%       574.928030
50%       603.968044
75%       633.298664
max       664.564649
dtype: float64

# Plot steps

In [None]:
plt.ion()

In [None]:
fig,ax = plt.subplots()
dif = toa_data.loc[:,rec_cols].mean(axis=1).diff()
ax.plot(toa_data[time_col], dif, alpha=0.3, lw=0, marker='.')
fig.autofmt_xdate()
ax.set_ylim(-5,5*max_burst)
ax.plot(ax.get_xbound(),(min_burst, min_burst), c='red')
ax.plot(ax.get_xbound(),(max_burst, max_burst), c='red')
ax.set_title('Burst intervals fish '+str(ID)+' without cleaning')
# should remove intervals below min interval
#fig.savefig(write_path+'1_uncleaned_'+str(ID)+'.png')

In [None]:
fig,ax = plt.subplots()
dif = cleaned_toa_data.loc[:,rec_cols].mean(axis=1).diff()
ax.plot(cleaned_toa_data[time_col], dif, alpha=0.3, lw=0, marker='.')
fig.autofmt_xdate()
ax.set_ylim(-5,5*max_burst)
ax.plot(ax.get_xbound(),(min_burst, min_burst), c='red')
ax.plot(ax.get_xbound(),(max_burst, max_burst), c='red')
ax.set_title('Burst intervals fish '+str(ID)+' after first clean-up of too-close rows')
# should remove intervals below min interval
#fig.savefig(write_path+'2_cleaned_'+str(ID)+'.png')

In [None]:
fig,ax = plt.subplots()
for key in filled_toa.keys():
    filled_toa[key]['diff'] = filled_toa[key].loc[:,rec_cols].mean(axis=1).diff()
filled_toa_df = pd.concat(filled_toa).reset_index(drop=True)
ax.plot(filled_toa_df[time_col], filled_toa_df['diff'], alpha=0.3, lw=0, marker='.')
fig.autofmt_xdate()
ax.set_ylim(0,5*max_burst)
ax.plot(ax.get_xbound(),(min_burst, min_burst), c='red')
ax.plot(ax.get_xbound(),(max_burst, max_burst), c='red')
ax.set_title('Burst intervals fish '+str(ID)+ ' after filling of gaps')
# should remove major part of intervals above max interval
#fig.savefig(write_path+'3_gap_filled_'+str(ID)+'.png')

In [None]:
fig,ax = plt.subplots()
for key in final_toa.keys():
    final_toa[key]['diff'] = final_toa[key].loc[:,rec_cols].mean(axis=1).diff()
final_toa_df = pd.concat(final_toa).reset_index(drop=True)
ax.plot(final_toa_df[time_col], final_toa_df['diff'], alpha=0.3, lw=0, marker='.')
fig.autofmt_xdate()
#ax.set_ylim(0,5*max_burst)
ax.plot(ax.get_xbound(),(min_burst, min_burst), c='red')
ax.plot(ax.get_xbound(),(max_burst, max_burst), c='red')
ax.set_title('Burst intervals fish '+str(ID)+ ' after second round of gap filling')
# Now all pings should be between min and max
#fig.savefig(write_path+'4_final_'+str(ID)+'.png')

# All in one step

In [None]:
# If the below gives an error and filled_toa is empty => decrease min_track_length
rec_cols, toa_data, cleaned_toa_data, filled_toa, final_toa = create_final_toa(tag_data=tag_data, max_time=1, min_burst=min_burst, max_burst=max_burst,
                                                                               time_col='synced_time', rec_col='rec_name', rec_list=rec_list,
                                                                               temp=temperature, temp_time_col='DATETIME', temp_temp_col='Temp',
                                                                               pas_tol=60, min_track_length=10)

# The resulting dataframe can be used as input for yaps
if fill_gaps==True:
    final_toa_df = pd.concat(final_toa).set_index('groups_pas').drop(columns='groups_obs').reset_index()
    final_toa_df.to_csv(TOA_data_path+'TOA_data_'+str(ID)+'.csv', index=False)
else:
#     cleaned_toa_data = cleaned_toa_data.drop(columns='ST07')
#     cleaned_toa_data = cleaned_toa_data.dropna(how='all', subset=['ST08', 'ST09', 'ST10', 'ST12', 'ST13', 'ST14', 'ST15',
#        'ST16', 'ST17', 'ST18', 'ST18-2'])
    cleaned_toa_data.to_csv(TOA_data_path+'TOA_data_'+str(ID)+'.csv', index=False)

# Check visually the evolution in the different steps. In the final_toa,
# the time differences between 2 lines of the dataframe should be within min and max burst
create_plots(ID=ID, min_burst=min_burst, max_burst=max_burst, rec_cols=rec_cols, toa_data=toa_data,
             cleaned_toa_data=cleaned_toa_data, filled_toa=filled_toa, final_toa=final_toa,
             write_path=TOA_fig_path, time_col='synced_time', plot='off')


# All in loop

In [15]:
receiver_table = pd.read_csv('Receiver_and_system_info.csv')
fixed = receiver_table[(receiver_table.Device=='sync')|(receiver_table.Device=='ref')].copy()
fixed['ID'] = fixed['Full-ID'].apply(lambda s: s.split('-')[2])

In [None]:
# sync = receiver_table[receiver_table.Device=='sync'].copy()
# sync['station_name'] = sync.Name.apply(lambda name: 'ST'+ str(name[1:]))
# sync.station_name = sync.station_name.replace({'ST8':'ST08', 'ST7':'ST07', 'ST8-2':'ST08-2','ST9-2':'ST09-2'})
# sync['ID'] = sync['Full-ID'].apply(lambda s: s.split('-')[2])
# sync[['station_name','ID']].to_csv('sync_self.csv')

In [None]:
sorted(fixed.ID.unique())

In [16]:
for ID in sorted(fixed.ID.unique()):
    tag_data = recs_dfB[recs_dfB.ID == ID].copy()
    if tag_data.empty==False:
        # If the below gives an error and filled_toa is empty => decrease min_track_length
        rec_cols, toa_data, cleaned_toa_data, filled_toa, final_toa = create_final_toa(tag_data=tag_data, max_time=1, min_burst=min_burst, max_burst=max_burst,
                                                                                       time_col='synced_time', rec_col='rec_name', rec_list=rec_list,
                                                                                       temp=temperature, temp_time_col='DATETIME', temp_temp_col='Temp',
                                                                                       pas_tol=60, min_track_length=10)

        # The resulting dataframe can be used as input for yaps
        if fill_the_gaps==True:
            final_toa_df = pd.concat(final_toa).set_index('groups_pas').drop(columns='groups_obs').reset_index()
            final_toa_df.to_csv(TOA_data_path+'TOA_data_'+str(ID)+'.csv', index=False)
        else:
        #     cleaned_toa_data = cleaned_toa_data.drop(columns='ST07')
        #     cleaned_toa_data = cleaned_toa_data.dropna(how='all', subset=['ST08', 'ST09', 'ST10', 'ST12', 'ST13', 'ST14', 'ST15',
        #        'ST16', 'ST17', 'ST18', 'ST18-2'])
            cleaned_toa_data.to_csv(TOA_data_path+'TOA_data_'+str(ID)+'.csv', index=False)

        # Check visually the evolution in the different steps. In the final_toa,
        # the time differences between 2 lines of the dataframe should be within min and max burst
        create_plots(ID=ID, min_burst=min_burst, max_burst=max_burst, rec_cols=rec_cols, toa_data=toa_data,
                     cleaned_toa_data=cleaned_toa_data, filled_toa=filled_toa, final_toa=final_toa,
                     write_path=TOA_fig_path, time_col='synced_time', plot='off')



To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()
  fig,ax = plt.subplots()
  fig,ax = plt.subplots()
  fig,ax = plt.subplots()
  fig,ax = plt.subplots()
