# Prepare data


## Information
Project: **The price of being late: short- and long-term consequences of a delayed migration timing**  
Author: Iris Bontekoe  
Program: Python 3.8.5  
Description: This script prepares the data for analyses and figures. 

### Preparations

In [None]:
# Import stuff that is necessary to execute the script
import os, glob
import pandas as pd
import datetime
import pickle
import time
import numpy as np
import geopy.distance
from collections import namedtuple
from astral import LocationInfo
from astral.sun import sun
from scipy.ndimage.filters import uniform_filter1d
from statistics import mean

# Set the path to the folder where the data is located
data_folder = "[...]"

## Prepare GPS data for calculations

### Combine data for each study

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Write a function to make sure all objects are removed afterwards
def function_Aff():

    # Find all file names for Affenberg
    all_files_Aff = glob.glob(data_folder+"TempWind/White Stork Affenberg*.csv") + glob.glob(data_folder+"TempWind/*.csv-*.csv")

    # Load all data files for Affenberg
    data_Aff_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files_Aff)

    # Merge the files together
    data_Aff = pd.concat(data_Aff_all)

    # Save the data into a new pkl file
    data_Aff.to_pickle(data_folder+"DataAff_TempWind_AllData.pkl")

# Execute the function and print the run time
start = datetime.datetime.now()
function_Aff()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Write a function to make sure all objects are removed afterwards
def function_CC():
    
    # Find all file names for CareCenter
    all_files_CC = glob.glob(data_folder+"TempWind/*White Stork SW Germany Care Centre*.csv")

    # Load all data files for CareCenter
    data_CC_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files_CC)

    # Merge the files together
    data_CC = pd.concat(data_CC_all)

    # Save the data into a new pkl file
    data_CC.to_pickle(data_folder+"DataCC_TempWind_AllData.pkl")
    
# Execute the function and print the run time
start = datetime.datetime.now()
function_CC()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Write a function to make sure all objects are removed afterwards
def function_CASCB():

    # Find all file names for CASCB
    all_files_CASCB = glob.glob(data_folder+"TempWind/*White Stork SW Germany CASCB*.csv")

    # Load all data files for CASCB
    data_CASCB_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files_CASCB)

    # Merge the files together
    data_CASCB = pd.concat(data_CASCB_all)

    # Save the data into a new pkl file
    data_CASCB.to_pickle(data_folder+"DataCASCB_TempWind_AllData.pkl")

# Execute the function and print the run time
start = datetime.datetime.now()
function_CASCB()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Prepare data
- Remove unnecessary columns
- Remove duplicates
- Remove data before release (if applicable)
- Remove data after death (if applicable)
- Remove data from 1 November

In [None]:
# Define a list of columns to keep
columns_to_keep = [
    "timestamp",
    "location-long",
    "location-lat",
    "tag-local-identifier",
    "individual-local-identifier",
    "ground-speed",
    "heading",
    "height-above-ellipsoid",
    "sensor-type",
    "gps:satellite-count",
    "Temp_SL",
    "U_Wind_PL",
    "V_Wind_PL",
]

# Load the release-death data
ReleaseDeath = pd.read_csv(data_folder + "Release_Death.csv",sep=",",low_memory=False)

In [None]:
# Define function
def function_prep(Released):
    
    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)

    # Change some column names
    data.rename({
        "ECMWF ERA5 SL Temperature (2 m above Ground)":"Temp_SL",
        "ECMWF ERA5 PL U Wind":"U_Wind_PL",
        "ECMWF ERA5 PL V wind":"V_Wind_PL"
    }, axis=1, inplace=True)

    # Only keep columns that are necessary for further steps in the analyses
    data = data[columns_to_keep]

    # Only keep GPS data
    data = data[data["sensor-type"]=="gps"]

    # Only keep locations that are not NA
    data.dropna(subset=["location-long","location-lat"],inplace=True)

    # Convert the timestamps
    data["timestamp"] = pd.to_datetime(data["timestamp"],format="%Y-%m-%d %H:%M:%S.%f")


    # Remove data before release and after death
    
    # Add the timing of release and death to the data
    data["Release"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Release"].to_dict()),format="%d-%m-%Y %H:%M")
    data["Death"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Death"].to_dict()),format="%d-%m-%Y %H:%M")

    # Only keep data after release and before death
    data = data[(data["Release"].isna() | (data["timestamp"] >= data["Release"])) & (data["Death"].isna() | (data["timestamp"] <= data["Death"]))]
   
    # Remove duplicates

    # Sort the data
    data.sort_values(["tag-local-identifier","timestamp","gps:satellite-count"], ascending=[True,True,True], inplace=True)

    # Remove duplicated timestamps
    data.drop_duplicates(["timestamp","tag-local-identifier"],keep="first",inplace=True)

    # Add the start-year of the data
    Start_year = data.groupby("tag-local-identifier")["timestamp"].min().dt.year.reset_index()
    data["StartYear"] = data["tag-local-identifier"].map(Start_year.set_index("tag-local-identifier")["timestamp"].to_dict())
    
    # Remove data from 1 November in the year of tagging
    data = data[data["timestamp"]<[datetime.datetime(y,11,1) for y in data["StartYear"]]]
    
    # Add the aviary
    data["Aviary"] = Aviary
    
    # Add an individual ID
    data["Individual"] = data["Aviary"].map(str)+"_"+data["tag-local-identifier"].map(str)
    
    # Add Day without time
    data["Day"] = data["timestamp"].dt.date
    
    # Save the data into a new pkl file
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_AllData.pkl"
file_name_out = "DataAff_TempWind_P.pkl"
aviary = ["Affenberg_2019","Affenberg_2020"]
Aviary = "Affenberg"

# Execute the function and print the run time
start = datetime.datetime.now()
function_prep(Released=True)
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_AllData.pkl"
file_name_out = "DataCC_TempWind_P.pkl"
aviary = ["CareCenter_2019","CareCenter_2020"]
Aviary = "CareCenter"

# Execute the function and print the run time
start = datetime.datetime.now()
function_prep(Released=True)
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_AllData.pkl"
file_name_out = "DataCASCB_TempWind_P.pkl"
aviary = ["CASCB_East","CASCB_West"]
Aviary = "CASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
function_prep(Released=False)
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Add daylength

- Daylenght per day in minutes
- Sunrise for the first location of the day
- Sunset for the last location of the day



In [None]:
# Define function
def DayLength():

    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Order the data frame by individual and timestamp
    data.sort_values(["tag-local-identifier","timestamp"], ascending=[True,True], inplace=True)
    
    # Add a new column for daylength
    data["DayLength"] = np.nan
    
    for i in data["tag-local-identifier"].unique():

        for Day in data["Day"].unique():

            if len(data[(data["tag-local-identifier"]==i)&(data["Day"]==Day)])<1:
                continue
                
            # Save the first and last location of the day
            FirstLoc = LocationInfo('name', 'region', 'timezone/name', data[(data["tag-local-identifier"]==i)&(data["Day"]==Day)]["location-lat"].iloc[0], data[(data["tag-local-identifier"]==i)&(data["Day"]==Day)]["location-long"].iloc[0])
            LastLoc = LocationInfo('name', 'region', 'timezone/name', data[(data["tag-local-identifier"]==i)&(data["Day"]==Day)]["location-lat"].iloc[-1], data[(data["tag-local-identifier"]==i)&(data["Day"]==Day)]["location-long"].iloc[-1])

            # Calculate the time of sunset and sunrise and add the difference in minutes to the data
            data.loc[(data["tag-local-identifier"]==i)&(data["Day"]==Day),"DayLength"] = (sun(LastLoc.observer, date=Day)["sunset"]-sun(FirstLoc.observer, date=Day)["sunrise"]).total_seconds()/60

    # Save the data
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_P.pkl"
file_name_out = "DataAff_TempWind_P.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
DayLength()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_P.pkl"
file_name_out = "DataCC_TempWind_P.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
DayLength()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_P.pkl"
file_name_out = "DataCASCB_TempWind_P.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
DayLength()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Split bursts and assign IDs

In [None]:
# Define the function nearest
def nearest(items, pivot):
    return min(items, key=lambda x: abs(x - pivot))

In [None]:
# Define function SplitGPSBursts
def SplitGPSBursts(MaxTimeDiff=10,MinBurstLength=120):
    
    #---------------------------------#
    #- Preparation of the data frame -#
    #---------------------------------#
    
    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Sort the data frame by individual and date
    data.sort_values(["tag-local-identifier","timestamp"], ascending=[True,True], inplace=True)
    
    # Reset indices
    data.reset_index(inplace=True)
    
    #----------------------#
    #- Identifying bursts -#
    #----------------------#

    # Find which rows of the dataframe differ more than MaxTimeDiff from the previous row
    #    Index-numbers of these rows will be saved in Breaks
    #    Note: If the tag takes a burst, a position is taken every second, so the time difference between
    #        locations in the same burst is 1 second. A break of at least 10 seconds to be counted as separte bursts
   
    Breaks = list(data[data["timestamp"].diff().dt.total_seconds()>MaxTimeDiff].index)
    
    # Include 0 at the start
    Breaks.insert(0, 0)
        
    # Include the last index of the data at the end
    Breaks.insert(len(Breaks), max(data.index))
    
    #-------------------#
    #- Separate bursts -#
    #-------------------#
    
    # Create a list with the length of data to enter a number for every separate 'burst'
    BurstNo = [None] * len(data)
    
    # Create a list with the length of data to enter if the data belongs to an actual burst (at least MinBurstLength long)
    BelongsToBurst = [None] * len(data)

    # Add the number for every burst
    for i in range(len(Breaks)-1):
        BurstNo[Breaks[i]:Breaks[i+1]] = [i]*len(BurstNo[Breaks[i]:Breaks[i+1]])
        
        if len(BurstNo[Breaks[i]:Breaks[i+1]]) >= MinBurstLength:
            BelongsToBurst[Breaks[i]:Breaks[i+1]] = [True]*len(BurstNo[Breaks[i]:Breaks[i+1]])
        else:
            BelongsToBurst[Breaks[i]:Breaks[i+1]] = [False]*len(BurstNo[Breaks[i]:Breaks[i+1]])
            
        # The code above doesn't apply to the last item in the list (has to do with the way indexing works in Python-lists)
        if Breaks[i+1] == (len(data)-1):
            BurstNo[Breaks[i]:(Breaks[i+1]+1)] = [i]*len(BurstNo[Breaks[i]:(Breaks[i+1]+1)])
            
            if len(BurstNo[Breaks[i]:(Breaks[i+1]+1)]) >= MinBurstLength:
                BelongsToBurst[Breaks[i]:(Breaks[i+1]+1)] = [True]*len(BurstNo[Breaks[i]:(Breaks[i+1]+1)])
            else:
                BelongsToBurst[Breaks[i]:(Breaks[i+1]+1)] = [False]*len(BurstNo[Breaks[i]:(Breaks[i+1]+1)])

    
    # Add the lists to the data
    data["BurstID"] = BurstNo
    data["BelongsToBurst"] = BelongsToBurst
    
    #--------------------------#
    #- Assign burst to a time -#
    #--------------------------#

    # Make lists with the time on wich bursts should have started
    StartTime1 = pd.date_range(data["Day"].min(),data["Day"].max()+datetime.timedelta(days=1), freq="15min")
    StartTime2 = pd.date_range(datetime.datetime.combine(data["Day"].min(),datetime.time(10, 40)),datetime.datetime.combine(data["Day"].max(),datetime.time(10, 40))+datetime.timedelta(days=1), freq="D")
    StartTime = StartTime1.union(StartTime2) # Is already sorted
    
    # Add a new column to data
    data["Burst_A"] = np.nan
    
    # Find the nearest start time for every burst and enter the start time in the new column
    for i in data[data["BelongsToBurst"]==True]["BurstID"].unique():
        First_timestamp = data[data["BurstID"]==i]["timestamp"].min()
        data.loc[data["BurstID"]==i,"Burst_A"] = nearest(items=StartTime,pivot=First_timestamp)

    #-----------------#
    #- Save the data -#
    #-----------------#
    
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_P.pkl"
file_name_out = "DataAff_TempWind_Q.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
SplitGPSBursts()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_P.pkl"
file_name_out = "DataCC_TempWind_Q.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
SplitGPSBursts()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_P.pkl"
file_name_out = "DataCASCB_TempWind_Q.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
SplitGPSBursts()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Match bursts between individuals

In [None]:
# Define function FindOverlappingGPSBursts

def FindOverlappingGPSBursts(MinNumOverlap=100,MaxDist=1):
    #---------------------------------#
    #- Preparation of the data frame -#
    #---------------------------------#
    
    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Sort the data frame by individual and date
    data.sort_values(["tag-local-identifier","timestamp","Burst_A"], ascending=[True,True,True], inplace=True)
    
    # Reset indices
    data.reset_index(inplace=True)
    
    # Make a new column in the data frame in which the BurstNumber will be entered
    data["Burst"] = np.nan
    
    #--------------#
    #- For-loop 1 -#
    #--------------#

    # Make a list with all different values of Burst_A.
    #    Bursts with the same value for Burst_A were matched to the same time
    List_of_Bursts = data["Burst_A"].dropna().unique()

    # Loop through the following instructions for every burst in list_of_Bursts
    for Burst in List_of_Bursts:
        
        # Find all BurstIDs within Burst
        BurstIDs = data[data["Burst_A"] == Burst]["BurstID"].unique()
        
        #--------------#
        #- For-loop 2 -#
        #--------------#

        # Loop through the following instructions for every value in IndividualBurstIDs
        for ID1 in BurstIDs: # Start of for-loop 2
            
            #----------------------#
            #- Time-saving checks -#
            #----------------------#
            
            # Go to the start of the loop if the BurstID is already part of a burst
            if len(data[data["BurstID"] == ID1]["Burst"].dropna()) > 0:
                continue
            
            #----------------------#
            #- Define BurstNumber -#
            #----------------------#
            
            # The BurstNumber of the currently handled burst should be one higher than the
            #    maximum BurstNumber that is already in the data frame
            if len(data["Burst"].dropna()) > 0:
                BurstNumber = data["Burst"].dropna().max()+1
                
            else:
                BurstNumber = 0

            # Assign BurstNumber to the column Burst in the data
            data.loc[data["BurstID"] == ID1,"Burst"] = BurstNumber
            
            #--------------#
            #- For-loop 3 -#
            #--------------#
            
            for ID2 in BurstIDs: # Start of for-loop 3
                
                #----------------------#
                #- Time-saving checks -#
                #----------------------#
            
                # Go to the start of the loop if the BurstID is already part of a burst
                if len(data[data["BurstID"] == ID2]["Burst"].dropna()) > 0:
                    continue
                
                # Go to the start of the loop if ID1 == ID2
                if ID1 == ID2:
                    continue
                
                # Go to the start of the loop if the overlap is less than MinNumOverlap
                
                # Define 'Range'
                Range = namedtuple('Range', ['start', 'end'])

                # Get the ranges for both BurstIDs
                R_ID1 = Range(start=data[data["BurstID"] == ID1]["timestamp"].iloc()[0],end=data[data["BurstID"] == ID1]["timestamp"].iloc()[-1])
                R_ID2 = Range(start=data[data["BurstID"] == ID2]["timestamp"].iloc()[0],end=data[data["BurstID"] == ID2]["timestamp"].iloc()[-1])

                # Determine the latest start and earliest end for the ranges
                latest_start = max(R_ID1.start,R_ID2.start)
                earliest_end = min(R_ID1.end,R_ID2.end)
                
                # Calculate the difference between the start and end calculated above
                delta = (earliest_end - latest_start).total_seconds() + 1

                # Determine the overlap
                overlap = max(0,delta)
                
                if overlap < MinNumOverlap:
                    continue
                                    
                #-------------------#
                #- Check locations -#
                #-------------------#
                
                # Make a subset data frame
                data_dist = pd.merge(data[data["BurstID"] == ID1][["timestamp","location-lat","location-long"]], data[data["BurstID"] == ID2][["timestamp","location-lat","location-long"]], on="timestamp")
                
                # Define a list to enter all distances
                Distances = [None] * len(data_dist)

                # Calculate the distance between the locations for each row in the data
                for row in range(len(data_dist)):
                    Distances[row] = geopy.distance.distance((data_dist["location-lat_x"].iloc()[row], data_dist["location-long_x"].iloc()[row]),(data_dist["location-lat_y"].iloc()[row], data_dist["location-long_y"].iloc()[row])).km

                # Calculate the minimum and maximum distance
                Min_Dist = min(Distances)
                Max_Dist = max(Distances)
                
                # If the minimum distance is lower than MaxDist and the maximum distance is lower than 10 times MaxDist, enter the same BurstNumber in the column Burst in data
                if Min_Dist < MaxDist and Max_Dist < 10*MaxDist:
                    data.loc[data["BurstID"] == ID2,"Burst"] = BurstNumber

                    
    #-------------#
    #- Save data -#
    #-------------#
    
    # Save the data
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_Q.pkl"
file_name_out = "DataAff_TempWind_Q.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
FindOverlappingGPSBursts()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_Q.pkl"
file_name_out = "DataCC_TempWind_Q.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
FindOverlappingGPSBursts()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_Q.pkl"
file_name_out = "DataCASCB_TempWind_Q.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
FindOverlappingGPSBursts()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Classify flight, climbing and gliding segments

In [None]:
# Define function FlightClassification that calculates climbing rates and classifies flight, climbing and gliding segments
def FlightClassification(MinGroundSpeed=2.5,RunningWindowLength=15,MinFlightTime=15,MinNonFlightTime=5,MinClimbingRate=0.2,MaxDecliningRate=0):

    #---------------------------------#
    #- Preparation of the data frame -#
    #---------------------------------#
    
    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Sort the data frame by individual and date
    data.sort_values(["tag-local-identifier","timestamp","BurstID"], ascending=[True,True,True], inplace=True)
    
    # Reset indices
    #data.reset_index(inplace=True)
    
    # Make a list with all BurstIDs
    BurstIDs = data["BurstID"].unique()
    
    #----------------------------#
    #- Calculate climbing rates -#
    #----------------------------#
    
    # Make new columns to enter the values into
    data["ClimbingRate"] = np.nan
    
    TimeDiff = data["timestamp"].diff().dt.total_seconds()
    HeightDiff = data["height-above-ellipsoid"].diff()
    ClimbingRate = (HeightDiff/TimeDiff).tolist()
    
    # Enter climbing rates into the data frame, only within bursts

    # Make a list that indicates if a data point belongs to a burst
    InBurst = [i for i, x in enumerate(data["BurstID"].diff()) if x == 0]
    
    # Enter ClimbingRate into the data, but only for the rows belonging to a burst
    data.loc[data.index.isin(InBurst),"ClimbingRate"] = [ClimbingRate[idx] for idx in InBurst]
    
    # Shift the data one up to have the climbing rate between the current location and the next
    data.ClimbingRate = data.ClimbingRate.shift(-1)
    
    # Calculate the running window/smoothed climbing rate
    for BurstID in BurstIDs:
        data.loc[data["BurstID"]==BurstID,"Smoothed_height-above-ellipsoid"] = uniform_filter1d(data.loc[data["BurstID"]==BurstID,"height-above-ellipsoid"], size=RunningWindowLength)
    
    data["SmoothedClimbingRate"] = np.nan
    
    HeightDiff = data["Smoothed_height-above-ellipsoid"].diff()
    ClimbingRate = (HeightDiff/TimeDiff).tolist()
    
    # Enter ClimbingRate into the data, but only for the rows belonging to a burst
    data.loc[data.index.isin(InBurst),"SmoothedClimbingRate"] = [ClimbingRate[idx] for idx in InBurst]
    
    # Shift the data one up to have the climbing rate between the current location and the next
    data.SmoothedClimbingRate = data.SmoothedClimbingRate.shift(-1)
    

    for BurstID in BurstIDs:
        
        #-------------------#
        #- Classify flight -#
        #-------------------#
            
        # Set Flying to T when the ground speed is higher than MinGroundSpeed and to F if not
        Flying = data[data["BurstID"]==BurstID]["ground-speed"] >= MinGroundSpeed
    
        # Give each flight segment an ID (non-flight will also get an ID first)
        FlyingID = (Flying == False).cumsum()

        # Replace the IDs with na when Flying is False
        FlyingID[(Flying == False)] = np.nan

        # Check if there is another segment less than MinNonFlightTime away
        if len(FlyingID.dropna().unique())>1:
            for i in FlyingID.dropna().unique():
                
                if i <= min(FlyingID.dropna().unique()):
                    idx = max(FlyingID[FlyingID==i].index)
                    indices = [*range(idx+1,idx+MinNonFlightTime+1)]
                    indices = [j for (j, v) in zip(indices, [item in FlyingID.index for item in indices]) if v]
                    if len(FlyingID[indices].dropna())>0:
                        idxs = FlyingID[indices].isnull()
                        idxs = idxs[idxs].index
                        FlyingID[idxs] = i

                elif i >= max(FlyingID.dropna().unique()):
                    idx = min(FlyingID[FlyingID==i].index)
                    indices = [*range(idx-MinNonFlightTime,idx)]
                    indices = [j for (j, v) in zip(indices, [item in FlyingID.index for item in indices]) if v]
                    if len(FlyingID[indices].dropna())>0:
                        idxs = FlyingID[indices].isnull()
                        idxs = idxs[idxs].index
                        FlyingID[idxs] = i

                else:
                    idx = max(FlyingID[FlyingID==i].index)
                    indices = [*range(idx+1,idx+MinNonFlightTime+1)]
                    indices = [j for (j, v) in zip(indices, [item in FlyingID.index for item in indices]) if v]
                    if len(FlyingID[indices].dropna())>0:
                        idxs = FlyingID[indices].isnull()
                        idxs = idxs[idxs].index
                        FlyingID[idxs] = i

                    idx = min(FlyingID[FlyingID==i].index)
                    indices = [*range(idx-MinNonFlightTime,idx)]
                    indices = [j for (j, v) in zip(indices, [item in FlyingID.index for item in indices]) if v]
                    if len(FlyingID[indices].dropna())>0:
                        idxs = FlyingID[indices].isnull()
                        idxs = idxs[idxs].index
                        FlyingID[idxs] = i

        # Give merged segments the same number
        FlyingID2 = FlyingID.isnull().cumsum()

        # Replace the values with nan where FlyingID is nan
        FlyingID2[FlyingID.isnull()] = np.nan
        FlyingID = FlyingID2

        # Replace the IDs with na if the segment is shorter than MinFlightTime
        for i in FlyingID.dropna().unique():
            if len(FlyingID[FlyingID==i])<MinFlightTime:
                FlyingID[FlyingID==i] = np.nan

        # Enter the FlyingIDs in the data
        data.loc[data["BurstID"]==BurstID,"FlyingID"] = FlyingID

        #---------------------#
        #- Classify climbing -#
        #---------------------#

        
        # Do this for every flying segment separately
        for F_ID in FlyingID.dropna().unique():
            
            # Set Climbing to T when the climbing rate is higher than MinClimbingRate and to F if not
            Climbing = data[(data["BurstID"]==BurstID)&(data["FlyingID"]==F_ID)]["SmoothedClimbingRate"] >= MinClimbingRate

            # Give each flight segment an ID (non-flight will also get an ID first)
            ClimbingID = (Climbing == False).cumsum()

            # Replace the IDs with na when Flying is False
            ClimbingID[(Climbing == False)] = np.nan

            # Check if there is another segment less than MinNonFlightTime away
            if len(ClimbingID.dropna().unique())>1:
                for i in ClimbingID.dropna().unique():

                    if i <= min(ClimbingID.dropna().unique()):
                        idx = max(ClimbingID[ClimbingID==i].index)
                        indices = [*range(idx+1,idx+MinNonFlightTime+1)]
                        indices = [j for (j, v) in zip(indices, [item in ClimbingID.index for item in indices]) if v]
                        if len(ClimbingID[indices].dropna())>0:
                            idxs = ClimbingID[indices].isnull()
                            idxs = idxs[idxs].index
                            ClimbingID[idxs] = i
                            
                    elif i >= max(ClimbingID.dropna().unique()):
                        idx = min(ClimbingID[ClimbingID==i].index)
                        indices = [*range(idx-MinNonFlightTime,idx)]
                        indices = [j for (j, v) in zip(indices, [item in ClimbingID.index for item in indices]) if v]
                        if len(ClimbingID[indices].dropna())>0:
                            idxs = ClimbingID[indices].isnull()
                            idxs = idxs[idxs].index
                            ClimbingID[idxs] = i
                            
                    else:
                        
                        idx = max(ClimbingID[ClimbingID==i].index)
                        indices = [*range(idx+1,idx+MinNonFlightTime+1)]
                        indices = [j for (j, v) in zip(indices, [item in ClimbingID.index for item in indices]) if v]
                        if len(ClimbingID[indices].dropna())>0:
                            idxs = ClimbingID[indices].isnull()
                            idxs = idxs[idxs].index
                            ClimbingID[idxs] = i
                        
                        idx = min(ClimbingID[ClimbingID==i].index)
                        indices = [*range(idx-MinNonFlightTime,idx)]
                        indices = [j for (j, v) in zip(indices, [item in ClimbingID.index for item in indices]) if v]
                        if len(ClimbingID[indices].dropna())>0:
                            idxs = ClimbingID[indices].isnull()
                            idxs = idxs[idxs].index
                            ClimbingID[idxs] = i

            # Give merged segments the same number
            ClimbingID2 = ClimbingID.isnull().cumsum()

            # Replace the values with nan where FlyingID is nan
            ClimbingID2[ClimbingID.isnull()] = np.nan
            ClimbingID = ClimbingID2

            # Replace the IDs with na if the segment is shorter than MinFlightTime
            for i in ClimbingID.dropna().unique():
                if len(ClimbingID[ClimbingID==i])<MinFlightTime:
                    ClimbingID[ClimbingID==i] = np.nan

            # Enter the ClimbingIDs in the data
            data.loc[(data["BurstID"]==BurstID)&(data["FlyingID"]==F_ID),"ClimbingID"] = ClimbingID          

        #--------------------#
        #- Classify gliding -#
        #--------------------#
        
        # Do this for every flying segment separately
        for F_ID in FlyingID.dropna().unique():
            
            # Set Gliding to T when the climbing rate is higher than MinClimbingRate and to F if not
            Gliding = data[(data["BurstID"]==BurstID)&(data["FlyingID"]==F_ID)]["SmoothedClimbingRate"] <= MaxDecliningRate

            # Give each flight segment an ID (non-flight will also get an ID first)
            GlidingID = (Gliding == False).cumsum()

            # Replace the IDs with na when Flying is False
            GlidingID[(Gliding == False)] = np.nan

            # Check if there is another segment less than MinNonFlightTime away
            if len(GlidingID.dropna().unique())>1:
                for i in GlidingID.dropna().unique():
                    
                    if i <= min(GlidingID.dropna().unique()):
                        idx = max(GlidingID[GlidingID==i].index)
                        indices = [*range(idx+1,idx+MinNonFlightTime+1)]
                        indices = [j for (j, v) in zip(indices, [item in GlidingID.index for item in indices]) if v]
                        if len(GlidingID[indices].dropna())>0:
                            idxs = GlidingID[indices].isnull()
                            idxs = idxs[idxs].index
                            GlidingID[idxs] = i
                            
                    elif i >= max(GlidingID.dropna().unique()):
                        idx = min(GlidingID[GlidingID==i].index)
                        indices = [*range(idx-MinNonFlightTime,idx)]
                        indices = [j for (j, v) in zip(indices, [item in GlidingID.index for item in indices]) if v]
                        if len(GlidingID[indices].dropna())>0:
                            idxs = GlidingID[indices].isnull()
                            idxs = idxs[idxs].index
                            GlidingID[idxs] = i
                            
                    else:
                        
                        idx = max(GlidingID[GlidingID==i].index)
                        indices = [*range(idx+1,idx+MinNonFlightTime+1)]
                        indices = [j for (j, v) in zip(indices, [item in GlidingID.index for item in indices]) if v]
                        if len(GlidingID[indices].dropna())>0:
                            idxs = GlidingID[indices].isnull()
                            idxs = idxs[idxs].index
                            GlidingID[idxs] = i
                        
                        idx = min(GlidingID[GlidingID==i].index)
                        indices = [*range(idx-MinNonFlightTime,idx)]
                        indices = [j for (j, v) in zip(indices, [item in GlidingID.index for item in indices]) if v]
                        if len(GlidingID[indices].dropna())>0:
                            idxs = GlidingID[indices].isnull()
                            idxs = idxs[idxs].index
                            GlidingID[idxs] = i

            # Give merged segments the same number
            GlidingID2 = GlidingID.isnull().cumsum()

            # Replace the values with nan where FlyingID is nan
            GlidingID2[GlidingID.isnull()] = np.nan
            GlidingID = GlidingID2

            # Replace the IDs with na if the segment is shorter than MinFlightTime
            for i in GlidingID.dropna().unique():
                if len(GlidingID[GlidingID==i])<MinFlightTime:
                    GlidingID[GlidingID==i] = np.nan

            # Enter the GlidingIDs in the data
            data.loc[(data["BurstID"]==BurstID)&(data["FlyingID"]==F_ID),"GlidingID"] = GlidingID
            
    # Save the data
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_Q.pkl"
file_name_out = "DataAff_TempWind_Q.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
FlightClassification()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_Q.pkl"
file_name_out = "DataCC_TempWind_Q.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
FlightClassification()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_Q.pkl"
file_name_out = "DataCASCB_TempWind_Q.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
FlightClassification()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Add elevation and altitude
Elevation is separately downloaded from Movebank

In [None]:
# Write a function
def AddElevation():

    #---------------------------#
    #- Load the elevation data -#
    #---------------------------#
    
    # Find all file names
    all_files = glob.glob(data_folder+"Elevation/"+Pattern+"*.csv")

    # Load all data files
    data_E = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data_E = pd.concat(data_E)
    
    # Only keep GPS data
    data_E = data_E[data_E["sensor-type"]=="gps"]

    # Only keep locations that are not NA
    data_E.dropna(subset=["location-long","location-lat"],inplace=True)

    # Convert the timestamps
    data_E["timestamp"] = pd.to_datetime(data_E["timestamp"],format="%Y-%m-%d %H:%M:%S.%f")

    # Sort the data
    data_E.sort_values(["tag-local-identifier","timestamp","gps:satellite-count"], ascending=[True,True,True], inplace=True)

    # Remove duplicated timestamps
    data_E.drop_duplicates(["timestamp","tag-local-identifier"],keep="first",inplace=True)
    
    # Replace some column names
    data_E.rename({
        "ASTER ASTGTM2 Quality Control":"QualityControlElevation",
        "ASTER ASTGTM2 Elevation":"Elevation"
    }, axis=1, inplace=True)

    # Only keep necessary columns
    data_E = data_E[[
        "timestamp",
        "tag-local-identifier",
        "height-above-msl",
        "QualityControlElevation",
        "Elevation"
    ]]

    #----------------------#
    #- Load the main data -#
    #----------------------#
    
    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)

    #-----------------#
    #- Add elevation -#
    #-----------------#
    
    # Merge the data frames by timestamp and tag-local-identifier
    data = data.merge(data_E,on=["timestamp","tag-local-identifier"],how="left")   
    
    # Calculate altitude
    data["Altitude"] = data["height-above-msl"] - data["Elevation"]
    
    #-----------------#
    #- Save the data -#
    #-----------------#
    
    # Save the data into a new pkl file
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
Pattern = "White Stork Affenberg"
file_name_in = "DataAff_TempWind_Q.pkl"
file_name_out = "DataAff_TempWind_R.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
AddElevation()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCemter ===#
#==================#

# Define objects
Pattern = "*White Stork SW Germany Care Centre"
file_name_in = "DataCC_TempWind_Q.pkl"
file_name_out = "DataCC_TempWind_R.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
AddElevation()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
Pattern = "*White Stork SW Germany CASCB"
file_name_in = "DataCASCB_TempWind_Q.pkl"
file_name_out = "DataCASCB_TempWind_R.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
AddElevation()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Calculate wind components

In [None]:
# Define function
def CalculateWind():

    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)
    
    #--------------------------------------#
    #- Calculate wind speed and direction -#
    #--------------------------------------#
    
    # The U component of the wind is the west-east component
    # The V component of the wind is the south-north component

    # Calculate horizontal windspeed
    data["WindSpeed_PL"] = (data["U_Wind_PL"]**2 + data["V_Wind_PL"]**2)**0.5
    
    # Calculate wind direction
    # Formula from https://www.eol.ucar.edu/content/wind-direction-quick-reference
    # arctan2 calculates the direction from which the wind comes. +180 -> wind direction
    data["WindDirection_PL"] = np.arctan2(-data["U_Wind_PL"],-data["V_Wind_PL"])*(180/np.pi)+180
    
    # Make all angles smaller than 360
    if len(data.loc[data["WindDirection_PL"]>360])>0:
        print("Some angles are larger than 360")
        data.loc[data["WindDirection_PL"]>360] = data.loc[data["WindDirection_PL"]>360] - 360 # Not tested, because this line is most likely not necessary
        
    
    #---------------------------------------#
    #- Calculate windsupport and crosswind -#
    #---------------------------------------#

    # Calculate the angle between the direction of the stork (heading) and the direction of the wind
    data["Angle_heading_WindPL"] = data["heading"] - data["WindDirection_PL"]

    # Determine the a-angle to do the calculations with
    data["Angle_a"] = abs(data["Angle_heading_WindPL"])

    # Calculate the w-angle
    data["Angle_w"] = 90-data["Angle_a"]

    # Calculate windsupport
    # Formula from https://doi.org/10.1186/2051-3933-1-4
    # Windsupport is in the direction of the heading
    data["WindSupport_PL"] = data["WindSpeed_PL"]*np.sin(data["Angle_w"]*np.pi/180)
    
    # Calculate crosswind
    # Formula from https://doi.org/10.1186/2051-3933-1-4
    data["CrossWind_PL"] = data["WindSpeed_PL"]*np.cos(data["Angle_w"]*np.pi/180)
    
    # Calculate Airspeed
    # Formula from https://doi.org/10.1186/2051-3933-1-4
    data["AirSpeed_PL"] = ((data["ground-speed"]-data["WindSupport_PL"])**2 + data["CrossWind_PL"]**2)**0.5
    
    #-----------------#
    #- Save the data -#
    #-----------------#
    
    # Save the data into a new pkl file
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_R.pkl"
file_name_out = "DataAff_TempWind_R.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
CalculateWind()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCemter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_R.pkl"
file_name_out = "DataCC_TempWind_R.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
CalculateWind()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_R.pkl"
file_name_out = "DataCASCB_TempWind_R.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
CalculateWind()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Find the start and end of daily migration flights

- Start of flight is the start time of the first burst that ends at least 100 m from the first location of the day
- Plus a lot of other refinements
- End of flight is the end time (start + 10 minutes) of the last burst that increases the net displacement compared to the previous burst(s)

In [None]:
# Define function
def FindMigFlight(MinDist=0.2,MinNoBursts=4,MinMigDist=50,SplitHours=1):

    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Order the data frame by individual and timestamp
    data.sort_values(["tag-local-identifier","timestamp"], ascending=[True,True], inplace=True)
    
    data["UniqueNumber"] = range(len(data))
    
    # Add new columns for the start and end of flight
    data["StartFlight"] = np.nan
    data["EndFlight"] = np.nan

    for ind in data["tag-local-identifier"].unique():
        for Day in data[data["tag-local-identifier"]==ind]["Day"].unique():
            
            # Check if the stork moved more than MinMigDist km on the given day
            data_sub = data[(data["tag-local-identifier"]==ind) & (data["Day"]==Day)]
            Distance_day = geopy.distance.distance((data_sub.iloc[0,data_sub.columns.get_loc("location-lat")], data_sub.iloc[0,data_sub.columns.get_loc("location-long")]),(data_sub.iloc[-1,data_sub.columns.get_loc("location-lat")], data_sub.iloc[-1,data_sub.columns.get_loc("location-long")])).km

            # The daily distance should be at least MinMigDist
            if Distance_day < MinMigDist:
                continue
                
            # The last location should be south or west of the first location
            if ~((data_sub["location-lat"].iloc[0] > data_sub["location-lat"].iloc[-1])|(data_sub["location-long"].iloc[0] > data_sub["location-long"].iloc[-1])):
                continue

            # Determine the end of the bursts for this specific day and individual
            BurstStarts = data[(data["tag-local-identifier"]==ind) & (data["Day"]==Day) & (data["BelongsToBurst"]==True)].groupby("BurstID").first()
            BurstEnds = data[(data["tag-local-identifier"]==ind) & (data["Day"]==Day) & (data["BelongsToBurst"]==True)].groupby("BurstID").last()

            # Determine the first location of the day
            DayStarts = data[(data["tag-local-identifier"]==ind) & (data["Day"]==Day)].iloc[0]

            # Make a list to enter distances
            k_Dist = [None] * len(BurstEnds["location-lat"])
            k_BurstID = [None] * len(BurstEnds["location-lat"])
            k_StartTime = [None] * len(BurstEnds["location-lat"])
            k_BDist = [None] * len(BurstEnds["location-lat"])
            k_LatF = [None] * len(BurstEnds["location-lat"])
            k_LatL = [None] * len(BurstEnds["location-lat"])
            k_LongF = [None] * len(BurstEnds["location-lat"])
            k_LongL = [None] * len(BurstEnds["location-lat"])
            
            for idx in range(len(BurstEnds)): 
                # Calculate the distance between the first location of the day and the last location of each burst
                k_Dist[idx] = geopy.distance.distance((DayStarts["location-lat"], DayStarts["location-long"]),(BurstEnds["location-lat"].iloc[idx], BurstEnds["location-long"].iloc[idx])).km
                k_BurstID[idx] = data[data.UniqueNumber==BurstEnds.UniqueNumber.iloc[idx]]["BurstID"].item()
                k_StartTime[idx] = BurstEnds["Burst_A"].iloc[idx]
                k_BDist[idx] = geopy.distance.distance((BurstStarts["location-lat"].iloc[idx], BurstStarts["location-long"].iloc[idx]),(BurstEnds["location-lat"].iloc[idx], BurstEnds["location-long"].iloc[idx])).km
                k_LatF[idx] = BurstStarts["location-lat"].iloc[idx]
                k_LatL[idx] = BurstEnds["location-lat"].iloc[idx]
                k_LongF[idx] = BurstStarts["location-long"].iloc[idx]
                k_LongL[idx] = BurstEnds["location-long"].iloc[idx]

            # Combine k into a data frame
            k = pd.DataFrame({"Dist": k_Dist,"BurstID": k_BurstID,"StartTime": k_StartTime,"BDist": k_BDist,"LatF": k_LatF,"LatL": k_LatL,"LongF": k_LongF,"LongL": k_LongL})
            
            if not (len(k)>0):
                continue
                
            # Find the locations where bursts are not consecutive
            Breaks = list(k[k["StartTime"].diff().dt.total_seconds()/60>25].index)
            Breaks.insert(0,0)
            Breaks.insert(len(Breaks),max(k.index))

            BurstNo = [None] * len(k)
            for i in range(len(Breaks)-1):
                BurstNo[Breaks[i]:Breaks[i+1]] = [i]*len(BurstNo[Breaks[i]:Breaks[i+1]])
                # The code above doesn't apply to the last item in the list (has to do with the way indexing works in Python-lists)
                if Breaks[i+1] == (len(k)-1):
                    BurstNo[Breaks[i]:(Breaks[i+1]+1)] = [i]*len(BurstNo[Breaks[i]:(Breaks[i+1]+1)])

            k["BurstNo"] = BurstNo

            k["DistDiff"] = k["Dist"].diff()
            
            k = k[k["BDist"]>MinDist]
            
            k2 = k.groupby("BurstNo",as_index=False).agg(
                    Number=("BurstNo",'count'),
                    FirstD=("Dist",'first'),
                    LastD=("Dist",'last'),
                    FirstT=("StartTime",'first'),
                    LastT=("StartTime",'last'),
                    BDistS=("BDist",'sum'),
                    DistDiffM=("DistDiff",'mean'),
                    FirstLat=("LatF",'first'),
                    LastLat=("LatL",'last'),
                    FirstLong=("LongF",'first'),
                    LastLong=("LongL",'last')
                )
                
            k2["CumDistDiff"] = k2["FirstD"].diff()#.shift(-1)
            k2["SCDist"] = k2["LastD"] - k2["FirstD"]
            k2["LatDiff"] = k2["FirstLat"] - k2["LastLat"]
            k2["LongDiff"] = k2["FirstLong"] - k2["LastLong"]
            k2 = k2[(k2["LatDiff"]>0)|(k2["LongDiff"]>0)]
                
            if len(k2)>1:
        
                # Only keep 
                k2 = k2[(k2["DistDiffM"]>0)|(k2["DistDiffM"].isna())]

                # Only keep consecutive burst parts when the cumulative daily distance in the parts increases and if there are a minimum of MinNoBursts consecutive bursts
                k2 = k2[(k2["CumDistDiff"]>0)|k2["CumDistDiff"].isna()|(k2["SCDist"]>(k2["Number"]*MinDist))]

                k2["CumDistDiff2"] = k2["LastD"] - k2["FirstD"]

                k2 = k2[(k2["Number"]<2)|(k2["SCDist"]>0)]
                    
            k = k[k.BurstNo.isin(k2.BurstNo)]
            
                                
            if len(k2)>1:
                k["DiffTime"] = (k["StartTime"].diff().dt.total_seconds()/3600).shift(-1)
                if max(k["DiffTime"])>=SplitHours:

                    BNos = [*(k[k["DiffTime"]>=SplitHours]["BurstNo"])]
                    BNos = [[*k.BurstNo.unique()].index(BN) for BN in BNos] + [[*k.BurstNo.unique()].index(BN)+1 for BN in BNos]
                    BNos = k.BurstNo.unique()[BNos]
                    k3 = k2[k2["BurstNo"].isin(BNos)]

                    if len(k3)>1:
                        k3 = k3[~(k3["Number"]==max(k3["Number"]))]
                        k3 = k3[k3["SCDist"]<(MinMigDist/4)]
                    k = k[~k["BurstNo"].isin(k3["BurstNo"])]
                    k["DiffTime"] = (k["StartTime"].diff().dt.total_seconds()/3600).shift(-1)
                    
                if max(k["DiffTime"])>=SplitHours:

                    BNos = [*(k[k["DiffTime"]>=SplitHours]["BurstNo"])]
                    BNos = [[*k.BurstNo.unique()].index(BN) for BN in BNos] + [[*k.BurstNo.unique()].index(BN)+1 for BN in BNos]
                    BNos = k.BurstNo.unique()[BNos]
                    k3 = k2[k2["BurstNo"].isin(BNos)]
                    
                    if len(k3)>1:
                        k3 = k3[k3["SCDist"]<(MinMigDist/4)]
                    k = k[~k["BurstNo"].isin(k3["BurstNo"])]
                    
            if len(k)>0:
                # The start of the daily migration is the start of the first burst in k and the end is the start of the last burst + 10 minutes
                data.loc[(data["tag-local-identifier"]==ind) & (data["Day"]==Day),"StartFlight"] = k["StartTime"].iloc[0]
                data.loc[(data["tag-local-identifier"]==ind) & (data["Day"]==Day),"EndFlight"] = k["StartTime"].iloc[-1] + datetime.timedelta(minutes = 10)
                
    # Save the data
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_R.pkl"
file_name_out = "DataAff_TempWind_R.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
FindMigFlight()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_R.pkl"
file_name_out = "DataCC_TempWind_R.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
FindMigFlight()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_R.pkl"
file_name_out = "DataCASCB_TempWind_R.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
FindMigFlight()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Subset the data
- Keep data during flight
- Keep data during migration days
- Keep data within the segment
- Keep individuals that cover the entire segment

In [None]:
# Define function
def SubsetData():
    
    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Keep only data during flight (the main migration flight)
    data = data[(~data.StartFlight.isna())&(~data.EndFlight.isna())]
    data = data[data.timestamp>=data.StartFlight]
    data = data[data.timestamp<(data.EndFlight+datetime.timedelta(minutes=5))]

    # Keep only migration days (>50 km)
    Subset = data.groupby(["Day","tag-local-identifier"],as_index=False)[["location-long","location-lat"]].agg(['first','last'])
    Subset.reset_index(level=0, inplace=True)
    Subset.reset_index(level=0, inplace=True)
    
    Dist = [None] * len(Subset)
    for i in range(len(Subset)):
        Dist[i] = geopy.distance.distance((Subset["location-lat"]["first"][i],Subset["location-long"]["first"][i]),(Subset["location-lat"]["last"][i],Subset["location-long"]["last"][i])).km

    Subset["Dist"] = Dist
    Subset = Subset[Subset["Dist"]>=50]

    data["MigDay"] = False
    for i in data["tag-local-identifier"].unique():
        data.loc[(data["tag-local-identifier"]==i)&(data["Day"].isin(Subset[Subset["tag-local-identifier"]==i]["Day"])),["MigDay"]] = True
        
    data = data[data["MigDay"]]

    # Keep only data during migration / within the segment
    
    # Find first location south of the segment
    Subset = data.groupby(["Burst_A","tag-local-identifier"],as_index=False)["location-lat"].first()
    Subset = Subset[Subset["location-lat"]<44.0]
    
    # Remove individuals that did not reach south of 44.0
    data = data[data["tag-local-identifier"].isin(Subset["tag-local-identifier"])]
    
    # Remove the data from the first Burst_A the individual is south of the segment
    Subset = Subset.groupby(["tag-local-identifier"],as_index=False).first()
    data = data[data["Burst_A"]<data["tag-local-identifier"].map(Subset.set_index("tag-local-identifier")["Burst_A"].to_dict())]
    
    # Find the first Burst_A that ends in the segment
    Subset = data.groupby(["Burst_A","tag-local-identifier"],as_index=False)["location-lat"].last()
    Subset = Subset[Subset["location-lat"]<47.5]
    Subset = Subset.groupby(["tag-local-identifier"],as_index=False).first()
    
    # Remove data before the first Burst_A that ends within the segment
    data = data[~(data["Burst_A"]<data["tag-local-identifier"].map(Subset.set_index("tag-local-identifier")["Burst_A"].to_dict()))]
    
    # Add an individual ID
    data["Individual"] = data["Aviary"].map(str)+"_"+data["tag-local-identifier"].map(str)
    
    # Save the data
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_R.pkl"
file_name_out = "DataAff_TempWind_S.pkl"
Aviary = "Affenberg"

# Execute the function and print the run time
start = datetime.datetime.now()
SubsetData()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_R.pkl"
file_name_out = "DataCC_TempWind_S.pkl"
Aviary = "CareCenter"

# Execute the function and print the run time
start = datetime.datetime.now()
SubsetData()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_R.pkl"
file_name_out = "DataCASCB_TempWind_S.pkl"
Aviary = "CASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
SubsetData()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Subset data for improvement and pre-segment flight
- Keep data during flight

In [None]:
# Define function
def SubsetData2():

    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Add an individual ID
    data["Individual"] = data["Aviary"].map(str)+"_"+data["tag-local-identifier"].map(str)

    # Save the data
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_R.pkl"
file_name_out = "DataAff_TempWind_S2.pkl"
Aviary = "Affenberg"

# Execute the function and print the run time
start = datetime.datetime.now()
SubsetData2()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_R.pkl"
file_name_out = "DataCC_TempWind_S2.pkl"
Aviary = "CareCenter"

# Execute the function and print the run time
start = datetime.datetime.now()
SubsetData2()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_R.pkl"
file_name_out = "DataCASCB_TempWind_S2.pkl"
Aviary = "CASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
SubsetData2()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Determine migration timing and minimum latitude for first and second year
- The moment a stork enters the segment is used for migration timing

In [None]:
#=================#
#=== Affenberg ===#
#=================#

def SaveAff():
    
    # Load data and convert timestamps
    data = pd.read_csv(data_folder+"SecondMigration/White Stork Affenberg releases MPIAB.csv",sep=',',low_memory=False)
    data["timestamp"] = pd.to_datetime(data["timestamp"],format="%Y-%m-%d %H:%M:%S.%f")
    
    # Load the release-death data
    ReleaseDeath = pd.read_csv(data_folder + "Release_Death.csv",sep=",",low_memory=False)
    
    # Add the timing of release and death to the data
    data["Release"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Release"].to_dict()),format="%d-%m-%Y %H:%M")
    data["Death"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Death"].to_dict()),format="%d-%m-%Y %H:%M")
    
    # Only keep data after release and before death
    data = data[(data["Release"].isna() | (data["timestamp"] >= data["Release"])) & (data["Death"].isna() | (data["timestamp"] <= data["Death"]))]
    
    # Add the start-year of an individual to the data
    Start_year = data.groupby("tag-local-identifier")["timestamp"].min().dt.year.reset_index()
    data["StartYear"] = data["tag-local-identifier"].map(Start_year.set_index("tag-local-identifier")["timestamp"].to_dict())
    
    # Save data as .pkl file
    data.to_pickle(data_folder+"Second_DataAff_All_Second.pkl")
    
start = datetime.datetime.now()
aviary = ["Affenberg_2019","Affenberg_2020"]
SaveAff()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

def SaveCC():
    
    # Load data and convert timestamps
    data = pd.read_csv(data_folder+"SecondMigration/LifeTrack White Stork SW Germany Care Centre Releases.csv",sep=',',low_memory=False)
    data["timestamp"] = pd.to_datetime(data["timestamp"],format="%Y-%m-%d %H:%M:%S.%f")
    
    # Load the release-death data
    ReleaseDeath = pd.read_csv(data_folder + "Release_Death.csv",sep=",",low_memory=False)
    
    # Add the timing of release and death to the data
    data["Release"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Release"].to_dict()),format="%d-%m-%Y %H:%M")
    data["Death"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Death"].to_dict()),format="%d-%m-%Y %H:%M")
    
    # Only keep data after release and before death
    data = data[(data["Release"].isna() | (data["timestamp"] >= data["Release"])) & (data["Death"].isna() | (data["timestamp"] <= data["Death"]))]
    
    # Add the start-year of an individual to the data
    Start_year = data.groupby("tag-local-identifier")["timestamp"].min().dt.year.reset_index()
    data["StartYear"] = data["tag-local-identifier"].map(Start_year.set_index("tag-local-identifier")["timestamp"].to_dict())
    
    # Save data as .pkl file
    data.to_pickle(data_folder+"Second_DataCC_All_Second.pkl")

start = datetime.datetime.now()
aviary = ["CareCenter_2019","CareCenter_2020"]
SaveCC()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

def SaveCASCB():
    
    # Load data and convert timestamps
    data = pd.read_csv(data_folder+"SecondMigration/LifeTrack White Stork SW Germany CASCB.csv",sep=',',low_memory=False)
    data["timestamp"] = pd.to_datetime(data["timestamp"],format="%Y-%m-%d %H:%M:%S.%f")
    
    # Load the release-death data
    ReleaseDeath = pd.read_csv(data_folder + "Release_Death.csv",sep=",",low_memory=False)
    
    # Add the timing of release and death to the data
    data["Release"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Release"].to_dict()),format="%d-%m-%Y %H:%M")
    data["Death"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Death"].to_dict()),format="%d-%m-%Y %H:%M")
    
    # Only keep data after release and before death
    data = data[(data["Release"].isna() | (data["timestamp"] >= data["Release"])) & (data["Death"].isna() | (data["timestamp"] <= data["Death"]))]
    
    # Add the start-year of an individual to the data
    Start_year = data.groupby("tag-local-identifier")["timestamp"].min().dt.year.reset_index()
    data["StartYear"] = data["tag-local-identifier"].map(Start_year.set_index("tag-local-identifier")["timestamp"].to_dict())
    
    # Save data as .pkl file
    data.to_pickle(data_folder+"Second_DataCASCB_All_Second.pkl")

start = datetime.datetime.now()
aviary = ["CASCB_East","CASCB_West"]
SaveCASCB()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Define function
def FindMigTL():

    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Order the data frame by individual and timestamp
    data.sort_values(["individual-local-identifier","timestamp"], ascending=[True,True], inplace=True)

    # Convert the timestamps
    data["timestamp"] = pd.to_datetime(data["timestamp"],format="%Y-%m-%d %H:%M:%S.%f")
    
    # Add the start-year of the data
    Start_year = data.groupby("individual-local-identifier")["timestamp"].min().dt.year.reset_index()
    data["StartYear"] = data["individual-local-identifier"].map(Start_year.set_index("individual-local-identifier")["timestamp"].to_dict())
    
    # Only keep storks that started in 2019 or 2020
    data = data[(data["StartYear"]==2019)|(data["StartYear"]==2020)]

    # Add the date (without time) to the data frame
    data["Day"] = data["timestamp"].dt.date
    
    # Load the release-death data
    ReleaseDeath = pd.read_csv(data_folder + "Release_Death.csv",sep=",",low_memory=False)

    # Add the timing of release and death to the data
    data["Release"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Release"].to_dict()),format="%d-%m-%Y %H:%M")
    data["Death"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Death"].to_dict()),format="%d-%m-%Y %H:%M")

    # Only keep data after release and before death
    data = data[(data["Release"].isna() | (data["timestamp"] >= data["Release"])) & (data["Death"].isna() | (data["timestamp"] <= data["Death"]))]
    
    # Sort the data
    data.sort_values(["tag-local-identifier","timestamp","gps:satellite-count"], ascending=[True,True,True], inplace=True)

    # Remove duplicated timestamps
    data.drop_duplicates(["timestamp","tag-local-identifier"],keep="first",inplace=True)

    # Only keep GPS data
    data = data[data["sensor-type"]=="gps"]

    # Only keep locations that are not NA
    data.dropna(subset=["location-long","location-lat"],inplace=True)

    # Make some empty lists to enter the data into
    
    # First year
    # Tag number
    SEM_tag1 = [None] * len(data["tag-local-identifier"].unique())
    
    # First date for the tag/individual
    SEM_FirstDate1 = [None] * len(data["tag-local-identifier"].unique())
    SEM_FirstLat1 = [None] * len(data["tag-local-identifier"].unique())
    SEM_FirstLong1 = [None] * len(data["tag-local-identifier"].unique())
    
    # First day in segment
    SEM_FirstSegm1 = [None] * len(data["tag-local-identifier"].unique())
    SEM_FirstSegmLat1 = [None] * len(data["tag-local-identifier"].unique())
    
    # Minimum and maximum latitude
    SEM_MinLat1 = [None] * len(data["tag-local-identifier"].unique())
    SEM_MinLong1 = [None] * len(data["tag-local-identifier"].unique())
    SEM_MinLatDate1 = [None] * len(data["tag-local-identifier"].unique())
    SEM_MaxLat1 = [None] * len(data["tag-local-identifier"].unique())
    SEM_MaxLatDate1 = [None] * len(data["tag-local-identifier"].unique())
    
    # Date of death
    SEM_Death1 = [None] * len(data["tag-local-identifier"].unique())
    
    # Date of release
    SEM_Release1 = [None] * len(data["tag-local-identifier"].unique())
    
    # Second year
    # Tag number
    SEM_tag2 = [None] * len(data["tag-local-identifier"].unique())
    
    # First date for the tag/individual
    SEM_FirstDate2 = [None] * len(data["tag-local-identifier"].unique())
    SEM_FirstLat2 = [None] * len(data["tag-local-identifier"].unique())
    SEM_FirstLong2 = [None] * len(data["tag-local-identifier"].unique())
    
    # First day in segment
    SEM_FirstSegm2 = [None] * len(data["tag-local-identifier"].unique())
    SEM_FirstSegmLat2 = [None] * len(data["tag-local-identifier"].unique())
    
    # Minimum and maximum latitude
    SEM_MinLat2 = [None] * len(data["tag-local-identifier"].unique())
    SEM_MinLong2 = [None] * len(data["tag-local-identifier"].unique())
    SEM_MinLatDate2 = [None] * len(data["tag-local-identifier"].unique())
    SEM_MaxLat2 = [None] * len(data["tag-local-identifier"].unique())
    SEM_MaxLatDate2 = [None] * len(data["tag-local-identifier"].unique())
    
    # Date of death
    SEM_Death2 = [None] * len(data["tag-local-identifier"].unique())
    
    # Longitude
    Long__ = None

    #--------------#
    #- First year -#
    #--------------#
    
    # Remove data from 1 July in the year after tagging
    data2 = data[data["timestamp"]<[datetime.datetime(y+1,7,1) for y in data["StartYear"]]]
    
    # Remove data before 1 July in the year of tagging
    data2 = data2[data2["timestamp"]>=[datetime.datetime(y,7,1) for y in data2["StartYear"]]]

    for idx,ind in enumerate(data["tag-local-identifier"].unique()):
        
        Long__ = None
        StartYear = None
        N_MinLat = None
        N_MinLatD = None
        
        # Subset the data so that it only contains individual 'ind'
        data_sub = data2[data2["tag-local-identifier"]==ind]
        
        # Insert the tag number in the list
        SEM_tag1[idx] = ind
        
        if len(data_sub)<1:
            continue
        
        StartYear = data_sub["StartYear"].iloc[0]
        
        # Insert the date of death
        SEM_Death1[idx] = data_sub["Death"].iloc[0]
        
        # Insert the date of release
        SEM_Release1[idx] = data_sub["Release"].iloc[0]
        
        # Insert the first date for the individual
        SEM_FirstDate1[idx] = data_sub["Day"].min()
        SEM_FirstLat1[idx] = data_sub[data_sub["Day"]==data_sub["Day"].min()]["location-lat"].iloc[0]
        SEM_FirstLong1[idx] = data_sub[data_sub["Day"]==data_sub["Day"].min()]["location-long"].iloc[0]
        
        # Determine the maximum (most northern) latitude before 1 January (to exclude northward migration in the next year)
        data_sub2 = data_sub[data_sub["timestamp"]<=[datetime.datetime(y,12,31) for y in data_sub["StartYear"]]]
        
        if len(data_sub2)>0:
            SEM_MaxLat1[idx] = data_sub2["location-lat"].max()
            SEM_MaxLatDate1[idx] = data_sub2[data_sub2["location-lat"]==data_sub2["location-lat"].max()]["Day"].iloc()[0]
        
                        
        # Find the last day north of the segment before 1 November
        data_sub2 = data_sub[data_sub["timestamp"]<=[datetime.datetime(y,11,1) for y in data_sub["StartYear"]]]
        if len(data_sub2[data_sub2["location-lat"]>=47.5]["Day"])>1:
            SEM_FirstSegm1[idx] = data_sub2[data_sub2["location-lat"]>=47.5]["Day"].iloc()[-1]
            SEM_FirstSegmLat1[idx] = data_sub2[data_sub2["location-lat"]>=47.5]["location-lat"].iloc()[-1]
            Long__ = data_sub2[data_sub2["location-lat"]>=47.5]["location-long"].iloc()[-1]
        
        # If the longitude is >9, set to None
        if not Long__ is None:
            if Long__>9:
                SEM_FirstSegm1[idx] = None
                SEM_FirstSegmLat1[idx] = None
                    
        # Determine the minimum latitude
        SEM_MinLat1[idx] = data_sub["location-lat"].min()
        SEM_MinLatDate1[idx] = data_sub[data_sub["location-lat"]==data_sub["location-lat"].min()]["Day"].iloc()[0]
        SEM_MinLong1[idx] = data_sub[data_sub["location-lat"]==data_sub["location-lat"].min()]["location-long"].iloc()[0]
        
        N_MinLat = SEM_MinLat1[idx]+0.1
        if len(data_sub[data_sub["location-lat"]<N_MinLat]["Day"])>0:
            N_MinLatD = data_sub[data_sub["location-lat"]<N_MinLat]["Day"].iloc()[0]
        
        # Set to NA when MinLatDate or SEM_MinLatDate1 is less than 3 days from death
        if  not pd.isnull(SEM_Death1[idx]):
            if (SEM_FirstSegm1[idx] is not None):
                if SEM_Death1[idx]<(SEM_FirstSegm1[idx]+datetime.timedelta(days=3)):
                    SEM_FirstSegm1[idx] = None
                    SEM_FirstSegmLat1[idx] = None
   
            if (N_MinLatD is not None):
                if SEM_Death1[idx]<(N_MinLatD+datetime.timedelta(days=3)):
                    SEM_MinLatDate1[idx] = None
                    SEM_MinLat1[idx] = None
                    SEM_MinLong1[idx] = None
                    
            if (SEM_Release1[idx] is not None):
                if SEM_Death1[idx]<(SEM_Release1[idx]+datetime.timedelta(days=5)):
                    SEM_MinLatDate1[idx] = None
                    SEM_MinLat1[idx] = None
                    SEM_MinLong1[idx] = None
                    
        # Set to None when MinLatDate and MaxLatDate are less than 3 days apart
        if (SEM_MinLatDate1[idx] is not None):
            if (SEM_MaxLatDate1[idx] is not None):
                if SEM_MinLatDate1[idx]<(SEM_MaxLatDate1[idx]+datetime.timedelta(days=3)):
                    if SEM_MaxLatDate1[idx]<(SEM_MinLatDate1[idx]+datetime.timedelta(days=3)):
                        SEM_MinLatDate1[idx] = None
                        SEM_MinLat1[idx] = None
                        SEM_MinLong1[idx] = None
                        
        # If the MinLatDate is before 1 August, set to None
        if (SEM_MinLatDate1[idx] is not None):
            if SEM_MinLatDate1[idx]<datetime.date(StartYear,8,15):
                SEM_MinLatDate1[idx] = None
                SEM_MinLat1[idx] = None
                SEM_MinLong1[idx] = None

    
    #---------------#
    #- Second year -#
    #---------------#
    
    # Remove data from 1 July in the second year after tagging
    data2 = data[data["timestamp"]<[datetime.datetime(y+2,7,1) for y in data["StartYear"]]]
    
    # Remove data before 1 July in the year after tagging
    data2 = data2[data2["timestamp"]>=[datetime.datetime(y+1,7,1) for y in data2["StartYear"]]]

    for idx,ind in enumerate(data["tag-local-identifier"].unique()):
        
        Long__ = None
        StartYear = None
        N_MinLat = None
        N_MinLatD = None
        
        # Subset the data so that it only contains individual 'ind'
        data_sub = data2[data2["tag-local-identifier"]==ind]
        
        # Insert the tag number in the list
        SEM_tag2[idx] = ind

        if len(data_sub)<1:
            continue

        StartYear = data_sub["StartYear"].iloc[0]
        
        # Insert the date of death
        SEM_Death2[idx] = data_sub["Death"].iloc[0]
        
        # Insert the first date for the individual
        SEM_FirstDate2[idx] = data_sub["Day"].min()
        SEM_FirstLat2[idx] = data_sub[data_sub["Day"]==data_sub["Day"].min()]["location-lat"].iloc[0]
        SEM_FirstLong2[idx] = data_sub[data_sub["Day"]==data_sub["Day"].min()]["location-long"].iloc[0]

        # Determine the maximum (most northern) latitude before 1 January (to exclude northward migration in the next year)
        data_sub2 = data_sub[data_sub["timestamp"]<=[datetime.datetime(y+1,12,31) for y in data_sub["StartYear"]]]

        if len(data_sub2)>0:
            SEM_MaxLat2[idx] = data_sub2["location-lat"].max()
            SEM_MaxLatDate2[idx] = data_sub2[data_sub2["location-lat"]==data_sub2["location-lat"].max()]["Day"].iloc()[0]

        # Find the last day north of the segment before 1 November
        data_sub2 = data_sub[data_sub["timestamp"]<=[datetime.datetime(y+1,11,1) for y in data_sub["StartYear"]]]
        if len(data_sub2[data_sub2["location-lat"]>=47.5]["Day"])>1:
            SEM_FirstSegm2[idx] = data_sub2[data_sub2["location-lat"]>=47.5]["Day"].iloc()[-1]
            SEM_FirstSegmLat2[idx] = data_sub2[data_sub2["location-lat"]>=47.5]["location-lat"].iloc()[-1]
            Long__ = data_sub2[data_sub2["location-lat"]>=47.5]["location-long"].iloc()[-1]
        
        # If the longitude is >9, set to None
        if not Long__ is None:
            if Long__>9:
                SEM_FirstSegm2[idx] = None
                SEM_FirstSegmLat2[idx] = None

        # Determine the minimum latitude
        SEM_MinLat2[idx] = data_sub["location-lat"].min()
        SEM_MinLatDate2[idx] = data_sub[data_sub["location-lat"]==data_sub["location-lat"].min()]["Day"].iloc()[0]
        SEM_MinLong2[idx] = data_sub[data_sub["location-lat"]==data_sub["location-lat"].min()]["location-long"].iloc()[0]
        
        N_MinLat = SEM_MinLat1[idx]+0.1
        if len(data_sub[data_sub["location-lat"]<N_MinLat]["Day"])>0:
            N_MinLatD = data_sub[data_sub["location-lat"]<N_MinLat]["Day"].iloc()[0]
        
        # Set to NA when MinLatDate or SEM_MinLatDate1 is less than 3 days from death
        if  not pd.isnull(SEM_Death2[idx]):
            if (SEM_FirstSegm2[idx] is not None):
                if SEM_Death2[idx]<(SEM_FirstSegm2[idx]+datetime.timedelta(days=3)):
                    SEM_FirstSegm2[idx] = None
                    SEM_FirstSegmLat2[idx] = None
   
            if (N_MinLatD is not None):
                if SEM_Death2[idx]<(N_MinLatD+datetime.timedelta(days=3)):
                    SEM_MinLatDate2[idx] = None
                    SEM_MinLat2[idx] = None
                    SEM_MinLong2[idx] = None
                    

        # Set to None when MinLatDate and MaxLatDate are less than 3 days apart
        if (SEM_MinLatDate2[idx] is not None):
            if (SEM_MaxLatDate2[idx] is not None):
                if SEM_MinLatDate2[idx]<(SEM_MaxLatDate2[idx]+datetime.timedelta(days=3)):
                    if SEM_MaxLatDate2[idx]<(SEM_MinLatDate2[idx]+datetime.timedelta(days=3)):
                        SEM_MinLatDate2[idx] = None
                        SEM_MinLat2[idx] = None
                        SEM_MinLong2[idx] = None
                        
        # If the MinLatDate is before 1 August, set to None
        if (SEM_MinLatDate2[idx] is not None):
            if SEM_MinLatDate2[idx]<datetime.date(StartYear,8,15):
                SEM_MinLatDate2[idx] = None
                SEM_MinLat2[idx] = None
                SEM_MinLong2[idx] = None
        

            
    # Bind the data together into a data frame
    SegTim_MinLat = pd.DataFrame({
        "tag-local-identifier": SEM_tag1,
        "FirstDate1": SEM_FirstDate1,
        "FirstLat1": SEM_FirstLat1,
        "FirstLong1": SEM_FirstLong1,
        "FirstSegm1": SEM_FirstSegm1,
        "FirstSegmLat1": SEM_FirstSegmLat1,
        "MinLat1": SEM_MinLat1,
        "MinLong1": SEM_MinLong1,
        "MinLatDate1": SEM_MinLatDate1,
        "MaxLat1": SEM_MaxLat1,
        "MaxLatDate1": SEM_MaxLatDate1,
        "Death": SEM_Death1,
        "tag-local-identifier2": SEM_tag2,
        "FirstDate2": SEM_FirstDate2,
        "FirstLat2": SEM_FirstLat1,
        "FirstLong2": SEM_FirstLong1,
        "FirstSegm2": SEM_FirstSegm2,
        "FirstSegmLat2": SEM_FirstSegmLat2,
        "MinLat2": SEM_MinLat2,
        "MinLong2": SEM_MinLong2,
        "MinLatDate2": SEM_MinLatDate2,
        "MaxLat2": SEM_MaxLat2,
        "MaxLatDate2": SEM_MaxLatDate2,
        "Death2": SEM_Death2
    })
    
    # Add aviary
    SegTim_MinLat["Aviary"] = Aviary
    
    # Add Individual
    SegTim_MinLat["Individual"] = SegTim_MinLat["Aviary"].map(str)+"_"+SegTim_MinLat["tag-local-identifier"].map(str)

    # Calculate the distance between the first location and the most southern latitude location
    Distances1 = [None] * len(SegTim_MinLat)
    Distances2 = [None] * len(SegTim_MinLat)
    for row in range(len(SegTim_MinLat)):
        if (not np.isnan(SegTim_MinLat["MinLat1"].iloc()[row])):
            if(not np.isnan(SegTim_MinLat["FirstLat1"].iloc()[row])):
                Distances1[row] = geopy.distance.distance((SegTim_MinLat["FirstLat1"].iloc()[row], SegTim_MinLat["FirstLong1"].iloc()[row]),(SegTim_MinLat["MinLat1"].iloc()[row], SegTim_MinLat["MinLong1"].iloc()[row])).km
        if (not np.isnan(SegTim_MinLat["MinLat2"].iloc()[row])):
            if(not np.isnan(SegTim_MinLat["FirstLat2"].iloc()[row])):
                Distances2[row] = geopy.distance.distance((SegTim_MinLat["FirstLat2"].iloc()[row], SegTim_MinLat["FirstLong2"].iloc()[row]),(SegTim_MinLat["MinLat2"].iloc()[row], SegTim_MinLat["MinLong2"].iloc()[row])).km
    SegTim_MinLat["Distance1"] = Distances1
    SegTim_MinLat["Distance2"] = Distances2
    

    # Save the data
    SegTim_MinLat.to_csv(data_folder+file_name_out)

In [None]:
# Define objects
aviary = ["Affenberg_2019","Affenberg_2020"]
Aviary = "Affenberg"
file_name_in = "Second_DataAff_All_Second.pkl"
file_name_out = "DataAff_SegTimMinLat.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
FindMigTL()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Define objects
aviary = ["CareCenter_2019","CareCenter_2020"]
Aviary = "CareCenter"
file_name_in = "Second_DataCC_All_Second.pkl"
file_name_out = "DataCC_SegTimMinLat.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
FindMigTL()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Define objects
aviary = ["CASCB_East","CASCB_West"]
Aviary = "CASCB"
file_name_in = "Second_DataCASCB_All_Second.pkl"
file_name_out = "DataCASCB_SegTimMinLat.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
FindMigTL()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="SegTimMinLat")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

## Prepare ACC data for calculations

### Prepare data

In [None]:
# Define a list of columns to keep
columns_to_keep = [
    "timestamp",
    "tag-local-identifier",
    "individual-local-identifier",
    "eobs:accelerations-raw",
    "sensor-type"
]

# Load the release-death data
ReleaseDeath = pd.read_csv(data_folder + "Release_Death.csv",sep=",",low_memory=False)

In [None]:
# Define function
def function_prep_acc(Released):
    
    # Load the data
    data = pd.read_csv(data_folder+file_name_in,sep=",",low_memory=False)

    # Only keep columns that are necessary for further steps in the analyses
    data = data[columns_to_keep]

    # Only keep ACC data
    data = data[data["sensor-type"]=="acceleration"]

    # Convert the timestamps
    data["timestamp"] = pd.to_datetime(data["timestamp"],format="%Y-%m-%d %H:%M:%S.%f")

    # Remove data before release and after death
    
    # Add the timing of release and death to the data
    data["Release"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Release"].to_dict()),format="%d-%m-%Y %H:%M")
    data["Death"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Death"].to_dict()),format="%d-%m-%Y %H:%M")

    # Only keep data after release and before death
    data = data[(data["Release"].isna() | (data["timestamp"] >= data["Release"])) & (data["Death"].isna() | (data["timestamp"] <= data["Death"]))]
   
    # Sort the data
    data.sort_values(["tag-local-identifier","timestamp"], ascending=[True,True], inplace=True)

    #--- Split the ACC into colums X Y Z ---#
    
    # Make empty list to store the values in
    timestamp = []
    identifier = []
    X_raw = []
    Y_raw = []
    Z_raw = []
    ACC_num = []
    
    for ind in data["tag-local-identifier"].unique():
        timestamp__ = []
        identifier__ = []
        X_raw__ = []
        Y_raw__ = []
        Z_raw__ = []
        ACC_num__ = []

        data2 = data[data["tag-local-identifier"]==ind]
        
        # Split the ACC values into columns
        for i in range(len(data2)): 

            # Split the ACC values
            SplittedACC = data2.iloc[i]["eobs:accelerations-raw"].split()
            timestamp_ = np.repeat(data2.iloc[i]["timestamp"],len(SplittedACC)/3)
            identifier_ = np.repeat(data2.iloc[i]["tag-local-identifier"],len(SplittedACC)/3)
            X_raw_ = SplittedACC[0:len(SplittedACC):3]
            Y_raw_ = SplittedACC[1:len(SplittedACC):3]
            Z_raw_ = SplittedACC[2:len(SplittedACC):3]
            ACC_num_ = [*range(0+1,round(len(SplittedACC)/3)+1)]

            timestamp__ += timestamp_.tolist()
            identifier__ += identifier_.tolist()
            X_raw__ += X_raw_
            Y_raw__ += Y_raw_
            Z_raw__ += Z_raw_
            ACC_num__ += ACC_num_
            
        timestamp += timestamp__
        identifier += identifier__
        X_raw += X_raw__
        Y_raw += Y_raw__
        Z_raw += Z_raw__
        ACC_num += ACC_num__

    # Combine the list into a data frame
    data = pd.DataFrame({"timestamp":timestamp,"tag-local-identifier":identifier,"X_raw":X_raw,"Y_raw":Y_raw,"Z_raw":Z_raw,"ACC_num": ACC_num})

    #--- Transform the values to g or m/s2 ---#
    
    # Calibrate ACC data # values from Andrea
    cal_xzero = 2042
    cal_cx = 0.0020
    cal_yzero = 2042
    cal_cy = 0.0020
    cal_zzero = 2049
    cal_cz = 0.0023
    cal_g = 9.80665

    # Convert ACC raw values to meaningful unit (m/s2)
    data["X_mps"] = [(int(i)-cal_xzero)*cal_cx*cal_g for i in data["X_raw"]]
    data["Y_mps"] = [(int(i)-cal_yzero)*cal_cy*cal_g for i in data["Y_raw"]]
    data["Z_mps"] = [(int(i)-cal_zzero)*cal_cz*cal_g for i in data["Z_raw"]]
    
    # Add the aviary
    data["Aviary"] = Aviary
    
    # Add an individual ID
    data["Individual"] = data["Aviary"].map(str)+"_"+data["tag-local-identifier"].map(str)
    
    # Save the data into a new csv or pkl file
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "White Stork Affenberg releases MPIAB_ACC.csv"
file_name_out = "DataAff_ACC.pkl"
aviary = ["Affenberg_2019","Affenberg_2020"]
Aviary = "Affenberg"

# Execute the function and print the run time
start = datetime.datetime.now()
function_prep_acc(Released=True)
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "LifeTrack White Stork SW Germany Care Centre Releases_ACC.csv"
file_name_out = "DataCC_ACC.pkl"
aviary = ["CareCenter_2019","CareCenter_2020"]
Aviary = "CareCenter"

# Execute the function and print the run time
start = datetime.datetime.now()
function_prep_acc(Released=True)
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "LifeTrack White Stork SW Germany CASCB_ACC.csv"
file_name_out = "DataCASCB_ACC.pkl"
aviary = ["CASCB_East","CASCB_West"]
Aviary = "CASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
function_prep_acc(Released=False)
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Calculate ODBA

In [None]:
# Define the function
def CalculateODBA():

    # Load data
    data = pd.read_pickle(data_folder+file_name_in)

    # Round the timestamp to minutes
    data["time"] = data["timestamp"].round('min')

    # Make empty list to store the values in
    Timestamp = []
    Tag = []
    ODBA = []
    Ind = []

    for tsp in data["time"].unique():
        
        # Make empty list to store the values in
        Timestamp_ = []
        Tag_ = []
        ODBA_ = []
        Ind_ = []

        data2 = data[data["time"]==tsp]

        for ind in data2["tag-local-identifier"].unique():

            data3 = data2[data2["tag-local-identifier"]==ind]

            # Calculate ODBA
            Diff_X = abs(data3["X_mps"] - mean(data3["X_mps"]))
            Diff_Y = abs(data3["Y_mps"] - mean(data3["Y_mps"]))
            Diff_Z = abs(data3["Z_mps"] - mean(data3["Z_mps"]))

            ODBA__ = mean(Diff_X)+mean(Diff_Y)+mean(Diff_Z)

            Timestamp__ = data3["timestamp"].unique()[0]
            Tag__ = ind
            Ind__ = data3["Individual"].unique()[0]

            # Put values into list
            Timestamp_ += [Timestamp__]
            Tag_ += [Tag__]
            ODBA_ += [ODBA__]
            Ind_ += [Ind__]

        # Put values into list
        Timestamp += Timestamp_
        Tag += Tag_
        ODBA += ODBA_
        Ind += Ind_     

    # Combine the lists into a dataframe
    data = pd.DataFrame({"timestamp": Timestamp,"tag-local-identifier": Tag,"Individual": Ind,"ODBA": ODBA})

    data["Aviary"] = Aviary
    data["Day"] = data["timestamp"].dt.date

    # Save the data
    data.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_ACC.pkl"
file_name_out = "DataAff_ODBA.pkl"
Aviary = "Affenberg"

# Execute the function and print the run time
start = datetime.datetime.now()
CalculateODBA()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_ACC.pkl"
file_name_out = "DataCC_ODBA.pkl"
Aviary = "CareCenter"

# Execute the function and print the run time
start = datetime.datetime.now()
CalculateODBA()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_ACC.pkl"
file_name_out = "DataCASCB_ODBA.pkl"
Aviary = "CASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
CalculateODBA()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Add weather variables, location and flight information

In [None]:
# Define the function nearest
def nearest(items, pivot):
    return min(items, key=lambda x: abs(x - pivot))

In [None]:
# Define function
def AddVariables():

    # Load ODBA data
    data_O = pd.read_pickle(data_folder+file_name_in_ODBA)

    # Load GPS data
    data_G = pd.read_pickle(data_folder+file_name_in_GPS)

    # Remove ACC data that is outside the subset of GPS data
    data_G["IndDay"] = data_G["tag-local-identifier"].map(str)+"_"+data_G["Day"].map(str)
    data_O["IndDay"] = data_O["tag-local-identifier"].map(str)+"_"+data_O["Day"].map(str)
    data_O = data_O[data_O["IndDay"].isin(data_G["IndDay"])]

    # Add a new column to data to enter Burst_A
    data_O["Burst_A"] = np.nan
    
    # Find the nearest start time for every burst and enter the start time in the new column
    data_O.reset_index(drop=True, inplace=True)
    data_O["UniqueNumber"] = range(len(data_O))

    for ind in data_G["tag-local-identifier"].unique():

        data_O_sub = data_O[data_O["tag-local-identifier"]==ind]
        data_G_sub = data_G[data_G["tag-local-identifier"]==ind]
        
        for row in data_O_sub["UniqueNumber"]:
            First_timestamp = data_O.loc[data_O["UniqueNumber"]==row,"timestamp"].unique()
            if (First_timestamp < (min(data_G_sub["timestamp"])-datetime.timedelta(minutes=10))):
                continue
                
            if (First_timestamp > (max(data_G_sub["timestamp"])+datetime.timedelta(minutes=10))):
                continue

            TimeSTPs = data_G_sub[data_G_sub["timestamp"]<=First_timestamp[0]]["timestamp"].unique()
           
            if len(TimeSTPs)<1:
                continue
                
            Nearest = nearest(items=TimeSTPs,pivot=First_timestamp)
            Nearest = data_G_sub.loc[data_G_sub["timestamp"]==Nearest,"Burst_A"].unique()
            
            if pd.isnull(Nearest):
                continue
                        
            data_O.loc[data_O["UniqueNumber"]==row,"Burst_A"] = Nearest

    data_O["DiffTime"]=abs((pd.to_datetime(data_O["Burst_A"]) - pd.to_datetime(data_O["timestamp"])).dt.total_seconds()/60)
    

    # Replace Burst_A by NA if the timedifference is more than 12 minutes
    data_O.loc[~(data_O["DiffTime"].isna()|(data_O["DiffTime"]<12)),"Burst_A"] = np.nan

    # Remove the data where Burst_A is NA
    data_O = data_O[~(data_O["Burst_A"].isna())]

    # Make a table with the information from the GPS data
    data_G2 = data_G.groupby(["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A","IndDay"],as_index=False).agg(
        WindSupportPL_Mean = ("WindSupport_PL",'mean'),
        WindDirectionPL_Mean = ("WindDirection_PL",'mean'),
        TempSL_Mean = ("Temp_SL",'mean'),
        WindSpeedPL_Mean = ("WindSpeed_PL",'mean'),
        TimestampG_End = ("timestamp",'last'),
        Lat_End = ("location-lat",'last'),
        Long_End = ("location-long",'last')
    )

    # Summarise the behaviour (flying, gliding, climbing) and weather at the end (last 5 seconds) of the burst
    data_G3 = data_G.groupby(["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A","IndDay"],as_index=False).tail(5)
    data_G3 = data_G3.groupby(["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A","IndDay"],as_index=False).agg(
        FlyingID = ("FlyingID",'mean'),
        GlidingID = ("GlidingID",'mean'),
        ClimbingID = ("ClimbingID",'mean'),
        Altitude_End = ("Altitude",'mean'),
        WindSupportPL_End = ("WindSupport_PL",'mean'),
        WindSpeedPL_End = ("WindSpeed_PL",'mean'),
        TempSL_End = ("Temp_SL",'mean'),
        #TimestampG_End = ("timestamp",'last'),
        ClimbingRate_End = ("ClimbingRate",'mean'),
        Sink_End = ("ClimbingRate",'mean'),
        GlidingSpeed_End = ("ground-speed",'mean'),
        GlidingAirspeed_End = ("AirSpeed_PL",'mean')
    )

    # Summarise flight properties during climbing bits
    data_G4 = data_G[~(data_G["ClimbingID"].isna())]
    data_G4 = data_G4.groupby(["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A","IndDay"],as_index=False).agg(
        ClimbingRate_Mean = ("ClimbingRate",'mean')
    )

    # Summarise flight properties during climbing bits
    data_G5 = data_G[~(data_G["GlidingID"].isna())]
    data_G5 = data_G5.groupby(["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A","IndDay"],as_index=False).agg(
        Sink_Mean = ("ClimbingRate",'mean'),
        GlidingSpeed_Mean = ("ground-speed",'mean'),
        GlidingAirspeed_Mean = ("AirSpeed_PL",'mean')
    )

    # Summarise the data for the last gliding segment of the burst (only if the gliding segment is at the end)
    data_G3g = data_G3[~(data_G3["GlidingID"].isna())]
    data_G3g = data_G3g[["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A","IndDay","GlidingID"]]
    data_G["Burst_A"] = data_G.Burst_A.astype('datetime64[ns]')
    data_G3g = pd.merge(data_G,data_G3g)
    
    data_G3g = data_G3g.groupby(["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A","IndDay","GlidingID"],as_index=False).agg(
        Sink_Segm = ("ClimbingRate",'mean'),
        GlidingSpeed_Segm = ("ground-speed",'mean'),
        GlidingAirspeed_Segm = ("AirSpeed_PL",'mean'),
        WindSupportPL_Segm = ("WindSupport_PL",'mean')
    )
    
    # Summarise the data for the last climbing segment of the burst (only if the climbing segment is at the end)
    data_G3c = data_G3[~(data_G3["ClimbingID"].isna())]
    data_G3c = data_G3c[["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A","IndDay","ClimbingID"]]
    data_G3c = pd.merge(data_G,data_G3c)
    
    data_G3c = data_G3c.groupby(["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A","IndDay","ClimbingID"],as_index=False).agg(
        ClimbingRate_Segm = ("ClimbingRate",'mean')
    )

    # Add the information to the ODBA data
    data_O["Burst_A"] = data_O.Burst_A.astype('datetime64[ns]')

    data_O2 = pd.merge(data_O,data_G2,on=["IndDay","Burst_A","tag-local-identifier","Aviary","Day","Individual"],how="outer")

    data_O2 = pd.merge(data_O2,data_G3,on=["IndDay","Burst_A","tag-local-identifier","Aviary","Day","Individual","BurstID"],how="outer")
    data_O2 = pd.merge(data_O2,data_G4,on=["IndDay","Burst_A","tag-local-identifier","Aviary","Day","Individual","BurstID"],how="outer")
    data_O2 = pd.merge(data_O2,data_G5,on=["IndDay","Burst_A","tag-local-identifier","Aviary","Day","Individual","BurstID"],how="outer")
    data_O2 = pd.merge(data_O2,data_G3g,on=["IndDay","Burst_A","tag-local-identifier","Aviary","Day","Individual","BurstID","GlidingID"],how="outer")
    data_O2 = pd.merge(data_O2,data_G3c,on=["IndDay","Burst_A","tag-local-identifier","Aviary","Day","Individual","BurstID","ClimbingID"],how="outer")

    data_O2["DiffTime_"] = (data_O2["timestamp"] - data_O2["TimestampG_End"]).dt.total_seconds()

    # Remove data that does not fit to a burst
    data_O2 = data_O2[~(data_O2["DiffTime_"].isna())]
    data_O2 = data_O2[data_O2["DiffTime_"]>=0]
    data_O2 = data_O2[data_O2["DiffTime_"]<120]
    
    # Add a column for climbing vs gliding
    data_O2["FlightType"] = np.nan
    data_O2.loc[~(data_O2["GlidingID"].isna()),"FlightType"] = "Gliding"
    data_O2.loc[~(data_O2["ClimbingID"].isna()),"FlightType"] = "Climbing"

    #-----------------#
    #- Save the data -#
    #-----------------#
    # Save the data into a new pkl file
    data_O2.to_pickle(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in_ODBA = "DataAff_ODBA.pkl"
file_name_in_GPS = "DataAff_TempWind_S.pkl"
file_name_out = "DataAff_ODBA_Temp.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
AddVariables()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in_ODBA = "DataCC_ODBA.pkl"
file_name_in_GPS = "DataCC_TempWind_S.pkl"
file_name_out = "DataCC_ODBA_Temp.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
AddVariables()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in_ODBA = "DataCASCB_ODBA.pkl"
file_name_in_GPS = "DataCASCB_TempWind_S.pkl"
file_name_out = "DataCASCB_ODBA_Temp.pkl"

# Execute the function and print the run time
start = datetime.datetime.now()
AddVariables()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Combine the ACC files

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".pkl")

    # Load all data files for Affenberg
    data_all = (pd.read_pickle(f) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="_ODBA_Temp")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

## Calculations on data

### Survival

In [None]:
# Define a function to make sure all objects are removed afterwards
def function_survival(Released):

    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)

    # Load the release-death data
    ReleaseDeath = pd.read_csv(data_folder + "Release_Death.csv",sep=",",low_memory=False)
        
    # Only keep columns that are necessary for further steps in the analyses
    columns_to_keep = [
        "timestamp",
        "location-long",
        "location-lat",
        "tag-local-identifier",
        "individual-local-identifier",
        "gps:satellite-count",
        "sensor-type"
    ]
    data = data[columns_to_keep]

    # Only keep GPS data
    data = data[data["sensor-type"]=="gps"]

    # Only keep locations that are not NA
    data.dropna(subset=["location-long","location-lat"],inplace=True)

    # Convert the timestamps
    data["timestamp"] = pd.to_datetime(data["timestamp"],format="%Y-%m-%d %H:%M:%S.%f")

    # Remove data before release and after death
    
    # Add the timing of release and death to the data
    data["Release"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Release"].to_dict()),format="%d-%m-%Y %H:%M")
    data["Death"] = pd.to_datetime(data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Death"].to_dict()),format="%d-%m-%Y %H:%M")

    # Only keep data after release and before death
    data = data[(data["Release"].isna() | (data["timestamp"] >= data["Release"])) & (data["Death"].isna() | (data["timestamp"] <= data["Death"]))]
   
    # Remove duplicates

    # Sort the data
    data.sort_values(["tag-local-identifier","timestamp","gps:satellite-count"], ascending=[True,True,True], inplace=True)

    # Remove duplicated timestamps
    data.drop_duplicates(["timestamp","tag-local-identifier"],keep="first",inplace=True)

    # Add the start-year of the data
    Start_year = data.groupby("tag-local-identifier")["timestamp"].min().dt.year.reset_index()
    data["StartYear"] = data["tag-local-identifier"].map(Start_year.set_index("tag-local-identifier")["timestamp"].to_dict())
    
    # Only keep individuals from 2019 and 2020
    data = data[(data["StartYear"]==2019)|(data["StartYear"]==2020)]
    
    # Remove data from 1 July in the year of after tagging
    data = data[data["timestamp"]<[datetime.datetime(y+1,7,1) for y in data["StartYear"]]]

    # Set mortality after 1 July in the year after tagging to NA
    data["Death2"]=np.nan
    Death2 = data[data["Death"]<=[datetime.datetime(y+1,7,1) for y in data["StartYear"]]]["Death"]
    data.loc[data["Death"]<=[datetime.datetime(y+1,7,1) for y in data["StartYear"]],"Death2"] = Death2

    # Add the aviary
    data["Aviary"] = Aviary
    
    # Add an individual ID
    data["Individual"] = data["Aviary"].map(str)+"_"+data["tag-local-identifier"].map(str)
    
    # Add Day without time
    data["Day"] = data["timestamp"].dt.date
    
    # Subset the data to daily
    data = data.groupby(["tag-local-identifier","Day"],as_index=False).first()
    
    # Save the data into a new csv file
    data.to_csv(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "Second_DataAff_All_Second.pkl"
file_name_out = "DataAff_Survival.csv"
aviary = ["Affenberg_2019","Affenberg_2020"]
Aviary = "Affenberg"

# Execute the function and print the run time
start = datetime.datetime.now()
function_survival(Released=True)
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "Second_DataCC_All_Second.pkl"
file_name_out = "DataCC_Survival.csv"
aviary = ["CareCenter_2019","CareCenter_2020"]
Aviary = "CareCenter"

# Execute the function and print the run time
start = datetime.datetime.now()
function_survival(Released=True)
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "Second_DataCASCB_All_Second.pkl"
file_name_out = "DataCASCB_Survival.csv"
aviary = ["CASCB_East","CASCB_West"]
Aviary = "CASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
function_survival(Released=False)
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="Survival")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### # days to cover segment

In [None]:
# Define a function
def DaysInSegment():

    # Load data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Remove data that is not during flight
    data = data[~np.isnan(data["FlyingID"])]    

    # Get the first and last timestamp in the data
    data_ts = data.groupby(["tag-local-identifier","Individual","Aviary"],as_index=False).agg(
        Timestamp_first = ("timestamp",'first'),
        Timestamp_last = ("timestamp",'last')
    )
    
    # Calculate the timedifference in days
    data_ts["Days"] = (data_ts["Timestamp_last"]-data_ts["Timestamp_first"]).dt.total_seconds()/(3600*24)
    
    # Save the data
    data_ts.to_csv(data_folder+file_name_out+"DaysInSegment.csv")

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_S.pkl"
file_name_out = "DataAff"
Aviary="Affenberg"

# Execute the function and print the run time
start = datetime.datetime.now()
DaysInSegment()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_S.pkl"
file_name_out = "DataCC"
Aviary="CareCenter"

# Execute the function and print the run time
start = datetime.datetime.now()
DaysInSegment()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_S.pkl"
file_name_out = "DataCASCB"
Aviary = "CASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
DaysInSegment()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="DaysInSegment")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Daily distance

In [None]:
# Define a function
def DailyDist():

    # Load data
    data = pd.read_pickle(data_folder+file_name_in_1)
    data_s = pd.read_pickle(data_folder+file_name_in_2)
    
    data_s["ID"] = data_s["tag-local-identifier"].map(str) + data_s["Day"].map(str)
    data["ID"] = data["tag-local-identifier"].map(str) + data["Day"].map(str)

    data["Aviary"] = Aviary
    data["Individual"] = data["Aviary"].map(str) + data["tag-local-identifier"].map(str)
    data = data[data["ID"].isin(data_s["ID"])]

    # Get the start and end of the daily flight
    data2 = data.groupby(["Day","tag-local-identifier","Individual","Aviary"],as_index=False).agg(
        Lat_first = ("location-lat",'first'),
        Lat_last = ("location-lat",'last'),
        Long_first = ("location-long",'first'),
        Long_last = ("location-long",'last')
    )
        
    # Get the distance between the first location and the last location of the day
    Distances = [None] * len(data2)
    for row in range(len(data2)):
        Distances[row] = geopy.distance.distance((data2["Lat_first"].iloc()[row], data2["Long_first"].iloc()[row]),(data2["Lat_last"].iloc()[row], data2["Long_last"].iloc()[row])).km
    data2["Distance"] = Distances
    
    # Save the data
    data2.to_csv(data_folder+file_name_out+"DailyDistance.csv")

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in_1 = "DataAff_TempWind_R.pkl"
file_name_in_2 = "DataAff_TempWind_S.pkl"
file_name_out = "DataAff"
Aviary="Affenberg"

# Execute the function and print the run time
start = datetime.datetime.now()
DailyDist()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in_1 = "DataCC_TempWind_R.pkl"
file_name_in_2 = "DataCC_TempWind_S.pkl"
file_name_out = "DataCC"
Aviary="CareCenter"

# Execute the function and print the run time
start = datetime.datetime.now()
DailyDist()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in_1 = "DataCASCB_TempWind_R.pkl"
file_name_in_2 = "DataCASCB_TempWind_S.pkl"
file_name_out = "DataCASCB"
Aviary = "CASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
DailyDist()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="DailyDistance")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Route straightness

In [None]:
# Define a function
def EfficiencySegment():

    # Load data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Remove data that is not during flight
    data = data[~np.isnan(data["FlyingID"])]    

    # Get the start and end of the daily flight
    data_dist = data.groupby(["tag-local-identifier","Individual","Aviary"],as_index=False).agg(
        Lat_first = ("location-lat",'first'),
        Lat_last = ("location-lat",'last'),
        Long_first = ("location-long",'first'),
        Long_last = ("location-long",'last')
    )
    
    #-------------------------------#
    #- Straight distance - segment -#
    #-------------------------------#
    
    # Calculate the distance between the first and last location for every individual
    Distances = [None] * len(data_dist)
    for row in range(len(data_dist)):
        Distances[row] = geopy.distance.distance((data_dist["Lat_first"].iloc()[row], data_dist["Long_first"].iloc()[row]),(data_dist["Lat_last"].iloc()[row], data_dist["Long_last"].iloc()[row])).km
    data_dist["Dist_Segm"] = Distances
    
    #---------------------------------------#
    #- Cumulative distance - segment - all -#
    #---------------------------------------#
    
    # Calculate the cumulative distance for every individual using all data
    
    # Make an empty list to enter the values
    Distances = [None] * len(data_dist)
    
    # Calculate the distance between the first and last location for each individual
    for row in range(len(data_dist)):
        
        # Subset the data so that in only contains the given individual
        data_ind = data[data["tag-local-identifier"]==data_dist["tag-local-identifier"].iloc()[row]]

        # Create an empty list
        Distancess = [None] * (len(data_ind)-1)
        
        # Calculate the distance between each location and the location in the next row
        for rows in range(len(data_ind)-1):
            Distancess[rows] = geopy.distance.distance((data_ind["location-lat"].iloc()[rows],data_ind["location-long"].iloc()[rows]),(data_ind["location-lat"].iloc()[rows+1],data_ind["location-long"].iloc()[rows+1])).km

        # Calculate the cumulative distance for the individual
        Distances[row] = sum(Distancess)
        
    # Add the cumulative distances to the data
    data_dist["CumDist_Segm_All"] = Distances
    
    # Calculate the straightness
    data_dist["Straightness_Segm"] = data_dist["Dist_Segm"]/data_dist["CumDist_Segm_All"]
    
    # Save the data
    data_dist.to_csv(data_folder+file_name_out+"EfficiencySegment.csv")

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_S.pkl"
file_name_out = "DataAff"
Aviary="Affenberg"

# Execute the function and print the run time
start = datetime.datetime.now()
EfficiencySegment()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_S.pkl"
file_name_out = "DataCC"
Aviary="CareCenter"

# Execute the function and print the run time
start = datetime.datetime.now()
EfficiencySegment()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_S.pkl"
file_name_out = "DataCASCB"
Aviary = "CASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
EfficiencySegment()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="EfficiencySegment")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Daily flight time

In [None]:
# Define a function that finds the start and end of migration flights

def FlightTime():

    # Load data
    data = pd.read_pickle(data_folder+file_name_in)

    # Get the start and end of the daily flight
    data = data.groupby(["Day","tag-local-identifier","Individual","Aviary"],as_index=False).agg(
        StartFlight = ("StartFlight",'first'),
        EndFlight = ("EndFlight",'first'),
        DayLength = ("DayLength",'mean')
    )
    
    # Get the number of hours the storks have been flying (end time - start time)
    data["DailyFlightTime"] = (data["EndFlight"] - data["StartFlight"]).dt.total_seconds()/3600
    
    # Get the day length in hours
    data["DayLength"] = data["DayLength"]/60
    
    # Save the data
    data.to_csv(data_folder+file_name_out+"DailyFlightTime.csv")

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_S.pkl"
file_name_out = "DataAff"

# Execute the function and print the run time
start = datetime.datetime.now()
FlightTime()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_S.pkl"
file_name_out = "DataCC"

# Execute the function and print the run time
start = datetime.datetime.now()
FlightTime()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_S.pkl"
file_name_out = "DataCASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
FlightTime()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="DailyFlightTime")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Daily cross country speed

In [None]:
# Define a function
def CrossCountrySpeedDay():

    # Load data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Get a data frame with the first and last timestamps and locations for every burst
    #    And with the average U and V wind component at pressure level
    data_2 = data.groupby(["Day","tag-local-identifier","Individual","Aviary"],as_index=False).agg(
        StartLat = ("location-lat",'first'),
        EndLat = ("location-lat",'last'),
        StartLong = ("location-long",'first'),
        EndLong = ("location-long",'last'),
        StartTime = ("timestamp",'first'),
        EndTime = ("timestamp",'last'),
        U_Wind_PL_Mean=("U_Wind_PL",'mean'),
        V_Wind_PL_Mean=("V_Wind_PL",'mean')
    )

    # Define a list to enter all distances
    Distances = [None] * len(data_2)

    # Calculate the straight-line distance between the locations for each row in the data
    for row in range(len(data_2)):
        Distances[row] = geopy.distance.distance((data_2["StartLat"].iloc()[row],data_2["StartLong"].iloc()[row]),(data_2["EndLat"].iloc()[row],data_2["EndLong"].iloc()[row])).km

    # Enter the distances in the data frame
    data_2["StraightDist"] = Distances

    # Define a list to enter all distances
    Distances = [None] * len(data_2)

    # Calculate the cumulative distance between the locations for each row in the data
    for row in range(len(data_2)):
        data_3 = data[(data.Day==data_2["Day"].iloc()[row])&(data["tag-local-identifier"]==data_2["tag-local-identifier"].iloc()[row])]

        Distancess = [None] * (len(data_3)-1)
        for rows in range(len(data_3)-1):
            Distancess[rows] = geopy.distance.distance((data_3["location-lat"].iloc()[rows],data_3["location-long"].iloc()[rows]),(data_3["location-lat"].iloc()[rows+1],data_3["location-long"].iloc()[rows+1])).km

        Distances[row] = sum(Distancess)

    # Enter the distances in the data frame
    data_2["CumDist"] = Distances
    
    # Calculate the time difference in seconds
    data_2["DiffTime"] = (data_2["EndTime"]-data_2["StartTime"]).dt.total_seconds()
    

    # Calculate wind support
    
    # Calculate wind speed and direction
    
    # The U component of the wind is the west-east component
    # The V component of the wind is the south-north component

    # Calculate horizontal windspeed
    data_2["WindSpeed_PL"] = (data_2["U_Wind_PL_Mean"]**2 + data_2["V_Wind_PL_Mean"]**2)**0.5
    
    # Calculate wind direction
    # Formula from https://www.eol.ucar.edu/content/wind-direction-quick-reference
    # arctan2 calculates the direction from which the wind comes. +180 -> wind direction
    data_2["WindDirection_PL"] = np.arctan2(-data_2["U_Wind_PL_Mean"],-data_2["V_Wind_PL_Mean"])*(180/np.pi)+180
    

    # Make all angles smaller than 360
    if len(data_2.loc[data_2["WindDirection_PL"]>360])>0:
        print("Some angles are larger than 360")
        data.loc[data_2["WindDirection_PL"]>360] = data_2.loc[data_2["WindDirection_PL"]>360] - 360 # Not tested, because this line is most likely not necessary   
    
    # Calculate the overall heading of the stork
    Headings = [None] * len(data_2)
    for row in range(len(data_2)):
        X_diff = data_2["EndLong"].iloc()[row]-data_2["StartLong"].iloc()[row]
        Y_diff = data_2["EndLat"].iloc()[row]-data_2["StartLat"].iloc()[row]
        Headings[row] = np.arctan2(-X_diff,-Y_diff)*(180/np.pi)+180
    

    data_2["heading"] = Headings
    
    # Calculate windsupport and crosswind
    
    # Calculate the angle between the direction of the stork (heading) and the direction of the wind
    data_2["Angle_heading_WindPL"] = data_2["heading"] - data_2["WindDirection_PL"]
    
    # Determine the a-angle to do the calculations with
    data_2["Angle_a"] = abs(data_2["Angle_heading_WindPL"])
    
    # Calculate the w-angle
    data_2["Angle_w"] = 90-data_2["Angle_a"]

    # Calculate windsupport
    # Formula from https://doi.org/10.1186/2051-3933-1-4
    # Windsupport is in the direction of the heading
    data_2["WindSupport_PL"] = data_2["WindSpeed_PL"]*np.sin(data_2["Angle_w"]*np.pi/180)
    

    # Calculate crosswind
    # Formula from https://doi.org/10.1186/2051-3933-1-4
    data_2["CrossWind_PL"] = data_2["WindSpeed_PL"]*np.cos(data_2["Angle_w"]*np.pi/180)
        

    # Save the data
    data_2.to_csv(data_folder+file_name_out+"CrossCountrySpeedDay.csv")

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_S.pkl"
file_name_out = "DataAff"

# Execute the function and print the run time
start = datetime.datetime.now()
CrossCountrySpeedDay()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_S.pkl"
file_name_out = "DataCC"

# Execute the function and print the run time
start = datetime.datetime.now()
CrossCountrySpeedDay()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_S.pkl"
file_name_out = "DataCASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
CrossCountrySpeedDay()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="CrossCountrySpeedDay")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Average climbing and gliding properties

In [None]:
# Define function
def CalculateAVGClimbingGliding():

    #---------------------------------#
    #- Preparation of the data frame -#
    #---------------------------------#
    
    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)
    
    #----------------#
    #- Calculations -#
    #----------------#
    
    # Calculate the averages per burst
    avg_ClimbingB = data[~data.ClimbingID.isna()].groupby(["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A"],as_index=False).agg(
        Mean_ClimbingRate = ("ClimbingRate",'mean'),
        N_Locations = ("ClimbingRate",'count'),
        Min_Timestamp = ("timestamp", 'min'),
        Max_Timestamp = ("timestamp",'max'),
        SD_ClimbingRate = ("ClimbingRate",'std'),
        Mean_ClimbingSpeed = ("ground-speed",'mean'),
        SD_ClimbingSpeed = ("ground-speed",'std'),
        Mean_TempSL = ("Temp_SL",'mean'),
        SD_TempSL = ("Temp_SL",'std'),
        Mean_Elevation = ("Elevation",'mean'),
        SD_Elevation = ("Elevation",'std'),
        Last_Elevation = ("Elevation",'last'),
        First_Elevation = ("Elevation",'first'),
        Mean_Altitude = ("Altitude",'mean'),
        SD_Altitude = ("Altitude",'std'),
        Last_Altitude = ("Altitude",'last'),
        First_Altitude = ("Altitude",'first'),
        Mean_WindSpeedPL = ("WindSpeed_PL",'mean'),
        SD_WindSpeedPL = ("WindSpeed_PL",'std'),
        Mean_WindSupportPL = ("WindSupport_PL",'mean'),
        SD_WindSupportPL = ("WindSupport_PL",'std'),
        Mean_AirSpeedPL = ("AirSpeed_PL",'mean'),
        SD_AirSpeedPL = ("AirSpeed_PL",'std'),
        Mean_Heading = ("heading",'mean'),
        SD_Heading = ("heading",'std'),
        Mean_WindDirectionPL = ("WindDirection_PL",'mean'),
        SE_WindDirectionPL = ("WindDirection_PL",'mean')
    )
    
    avg_GlidingB = data[~data.GlidingID.isna()].groupby(["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A"],as_index=False).agg(
        Mean_SinkingSpeed = ("ClimbingRate",'mean'),
        N_Locations = ("ClimbingRate",'count'),
        Min_Timestamp = ("timestamp", 'min'),
        Max_Timestamp = ("timestamp",'max'),
        SD_SinkingSpeed = ("ClimbingRate",'std'),
        Mean_GlidingSpeed = ("ground-speed",'mean'),
        SD_GlidingSpeed = ("ground-speed",'std'),
        Mean_TempSL = ("Temp_SL",'mean'),
        SD_TempSL = ("Temp_SL",'std'),
        Mean_Elevation = ("Elevation",'mean'),
        SD_Elevation = ("Elevation",'std'),
        Last_Elevation = ("Elevation",'last'),
        First_Elevation = ("Elevation",'first'),
        Mean_Altitude = ("Altitude",'mean'),
        SD_Altitude = ("Altitude",'std'),
        Last_Altitude = ("Altitude",'last'),
        First_Altitude = ("Altitude",'first'),
        Mean_WindSpeedPL = ("WindSpeed_PL",'mean'),
        SD_WindSpeedPL = ("WindSpeed_PL",'std'),
        Mean_WindSupportPL = ("WindSupport_PL",'mean'),
        SD_WindSupportPL = ("WindSupport_PL",'std'),
        Mean_AirSpeedPL = ("AirSpeed_PL",'mean'),
        SD_AirSpeedPL = ("AirSpeed_PL",'std'),
        Mean_Heading = ("heading",'mean'),
        SD_Heading = ("heading",'std'),
        Mean_WindDirectionPL = ("WindDirection_PL",'mean'),
        SE_WindDirectionPL = ("WindDirection_PL",'mean')
    )
    
    # Save the data
    avg_ClimbingB.to_csv(data_folder+file_name_out+"_avgClimbingB.csv")
    avg_GlidingB.to_csv(data_folder+file_name_out+"_avgGlidingB.csv")

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_S.pkl"
file_name_out = "DataAff"

# Execute the function and print the run time
start = datetime.datetime.now()
CalculateAVGClimbingGliding()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_S.pkl"
file_name_out = "DataCC"

# Execute the function and print the run time
start = datetime.datetime.now()
CalculateAVGClimbingGliding()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_S.pkl"
file_name_out = "DataCASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
CalculateAVGClimbingGliding()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files for Affenberg
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="avgClimbingB")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="avgGlidingB")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Altitude at which storks leave thermals

In [None]:
# Define function
def AltThermal():

    #---------------------------------#
    #- Preparation of the data frame -#
    #---------------------------------#
    
    # Load the data
    data = pd.read_pickle(data_folder+file_name_in)
    
    #----------------#
    #- Calculations -#
    #----------------#

    # Find the altitude at the end of climbing segments
    last_Altitude = data.groupby(["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A","ClimbingID"],as_index=False).agg(
        Min_Timestamp = ("timestamp",'min'),
        Max_Timestamp = ("timestamp",'max'),
        Last_Altitude = ("Altitude",'last'),
        First_Altitude = ("Altitude",'first')
    )

    # Get the time at the start and end of each burst
    BurstStartEnds = data.groupby(["BurstID"],as_index=False).agg(
        StartBurst = ("timestamp",'min'),
        EndBurst = ("timestamp",'max')
    )

    # Add the start and end times of the burst to the data 
    last_Altitude = last_Altitude.merge(BurstStartEnds,on=["BurstID"],how="left")   

    # Remove the gliding segments at the end of the bursts
    last_Altitude = last_Altitude[last_Altitude["Max_Timestamp"]<=(last_Altitude["EndBurst"]+datetime.timedelta(seconds=5))]
    
    #-------------#
    #- Save data -#
    #-------------#
    
    # Save the data
    last_Altitude.to_csv(data_folder+file_name_out+"_LastAltitude.csv")

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_S.pkl"
file_name_out = "DataAff"

# Execute the function and print the run time
start = datetime.datetime.now()
AltThermal()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_S.pkl"
file_name_out = "DataCC"

# Execute the function and print the run time
start = datetime.datetime.now()
AltThermal()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_S.pkl"
file_name_out = "DataCASCB"

# Execute the function and print the run time
start = datetime.datetime.now()
AltThermal()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="LastAltitude")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

## Additional calculations for supplementary information

### Migration timing west of Black Forest

In [None]:
# Write a function
def TimingCC():

    # Load data
    data = pd.read_pickle(data_folder+file_name_in)
    
    # Filter the data
    ReleaseDeath = pd.read_csv(data_folder + "Release_Death.csv",sep=",",low_memory=False)
    data["Aviary2"] = data["tag-local-identifier"].map(ReleaseDeath[ReleaseDeath["Aviary"].isin(aviary)].set_index("tag.local.identifier")["Aviary"].to_dict())
    data.dropna(subset=["Aviary2"],inplace=True)

    # Group the data by day and get first timestamp
    data = data.groupby(["Aviary","tag-local-identifier","Individual"],as_index=False).agg(
        Segment = ("timestamp",'first')
    )
    
    # Save the data into a new csv file
    data.to_csv(data_folder+file_name_out)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_S.pkl"
file_name_out = "DataCC_MigTimingCC.csv"
aviary = ["CareCenter_2019","CareCenter_2020"]

# Execute the function and print the run time
start = datetime.datetime.now()
TimingCC()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_S.pkl"
file_name_out = "DataCASCB_MigTimingCC.csv"
aviary = ["CASCB_West"]

# Execute the function and print the run time
start = datetime.datetime.now()
TimingCC()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="MigTimingCC")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### # days between release and migration

In [None]:
# Write a function
def DaysRelMig():

    # Load data
    data = pd.read_pickle(data_folder+file_name_in)

    # Group the data by day and get first and last latitude and longitude
    data = data.groupby(["Aviary","tag-local-identifier","Individual"],as_index=False).agg(
        Release = ("Release",'first'),
        Segment = ("timestamp",'first')
    )
    
    # Calculate the time difference in days
    data["TimeDiff"] = data["Segment"]-data["Release"]
    data["DayDiff"] = (data["Segment"]-data["Release"]).dt.round('d').dt.days
    
    # Save the data into a new csv file
    data.to_csv(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_S.pkl"
file_name_out = "DataAff_DaysRelMig.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
DaysRelMig()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_S.pkl"
file_name_out = "DataCC_DaysRelMig.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
DaysRelMig()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="DaysRelMig")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Daily distance and southward displacement

In [None]:
# Write a function
def DailyDistDisp():

    # Load data
    data = pd.read_pickle(data_folder+file_name_in)

    # Group the data by day and get first and last latitude and longitude
    data = data.groupby(["Aviary","Day","tag-local-identifier","Individual"],as_index=False).agg(
        FirstLat = ("location-lat",'first'),
        LastLat = ("location-lat",'last'),
        FirstLong = ("location-long",'first'),
        LastLong = ("location-long",'last')
    )

    # Calculate daily distance
    Dist = [None] * len(data)
    for i in range(len(data)):
        Dist[i] = geopy.distance.distance((data["FirstLat"][i],data["FirstLong"][i]),(data["LastLat"][i],data["LastLong"][i])).km

    data["Dist"] = Dist

    # Calculate daily latitudinal displacement
    data["Disp"] = data["FirstLat"] - data["LastLat"]
    
    # Calculate daily southward displacement
    Dist2 = [None] * len(data)
    for i in range(len(data)):
        MeanLong = (data["FirstLong"][i] + data["LastLong"][i])/2
        Dist2[i] = geopy.distance.distance((data["FirstLat"][i],MeanLong),(data["LastLat"][i],MeanLong)).km

    data["DistSouth"] = Dist2
    
    # Save the data into a new csv file
    data.to_csv(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_R.pkl"
file_name_out = "DataAff_DailyDistDisp.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
DailyDistDisp()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_R.pkl"
file_name_out = "DataCC_DailyDistDisp.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
DailyDistDisp()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_R.pkl"
file_name_out = "DataCASCB_DailyDistDisp.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
DailyDistDisp()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="DailyDistDisp")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Thermalling conditions in Spain

In [None]:
# Most Affenberg storks stop around 41.7 degrees latitude

In [None]:
# Write a function
def ThermallingSpain():

    # Load data
    data = pd.read_pickle(data_folder+file_name_in)

    # Keep only data during flight (the main migration flight)
    data = data[(~data.StartFlight.isna())&(~data.EndFlight.isna())]
    data = data[data.timestamp>=data.StartFlight]
    data = data[data.timestamp<(data.EndFlight+datetime.timedelta(minutes=5))]

    # Keep only migration days (>50 km)
    Subset = data.groupby(["Day","tag-local-identifier"],as_index=False)[["location-long","location-lat"]].agg(['first','last'])
    Subset.reset_index(level=0, inplace=True)
    Subset.reset_index(level=0, inplace=True)
    
    Dist = [None] * len(Subset)
    for i in range(len(Subset)):
        Dist[i] = geopy.distance.distance((Subset["location-lat"]["first"][i],Subset["location-long"]["first"][i]),(Subset["location-lat"]["last"][i],Subset["location-long"]["last"][i])).km

    Subset["Dist"] = Dist
    Subset = Subset[Subset["Dist"]>=50]

    data["MigDay"] = False
    for i in data["tag-local-identifier"].unique():
        data.loc[(data["tag-local-identifier"]==i)&(data["Day"].isin(Subset[Subset["tag-local-identifier"]==i]["Day"])),["MigDay"]] = True
        
    data = data[data["MigDay"]]
    
    # Summarise the data
    data = data[~np.isnan(data["ClimbingID"])]
    data = data.groupby(["Aviary","Day","tag-local-identifier","Individual"],as_index=False).agg(
        Min_Timestamp = ("timestamp",'min'),
        First_Latitude = ("location-lat",'first'),
        Mean_ClimbingRate = ("ClimbingRate",'mean')
    )
    
    # Save the data into a new csv file
    data.to_csv(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_R.pkl"
file_name_out = "DataAff_ThermallingSpain.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
ThermallingSpain()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_R.pkl"
file_name_out = "DataCC_ThermallingSpain.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
ThermallingSpain()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_R.pkl"
file_name_out = "DataCASCB_ThermallingSpain.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
ThermallingSpain()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="ThermallingSpain")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Write a function
def ThermallingSpain_burst():

    # Load data
    data = pd.read_pickle(data_folder+file_name_in)

    # Keep only data during flight (the main migration flight)
    data = data[(~data.StartFlight.isna())&(~data.EndFlight.isna())]
    data = data[data.timestamp>=data.StartFlight]
    data = data[data.timestamp<(data.EndFlight+datetime.timedelta(minutes=5))]

    # Keep only migration days (>50 km)
    Subset = data.groupby(["Day","tag-local-identifier"],as_index=False)[["location-long","location-lat"]].agg(['first','last'])
    Subset.reset_index(level=0, inplace=True)
    Subset.reset_index(level=0, inplace=True)
    
    Dist = [None] * len(Subset)
    for i in range(len(Subset)):
        Dist[i] = geopy.distance.distance((Subset["location-lat"]["first"][i],Subset["location-long"]["first"][i]),(Subset["location-lat"]["last"][i],Subset["location-long"]["last"][i])).km

    Subset["Dist"] = Dist
    Subset = Subset[Subset["Dist"]>=50]

    data["MigDay"] = False
    for i in data["tag-local-identifier"].unique():
        data.loc[(data["tag-local-identifier"]==i)&(data["Day"].isin(Subset[Subset["tag-local-identifier"]==i]["Day"])),["MigDay"]] = True
        
    data = data[data["MigDay"]]
    
    # Summarise the data
    data = data[~np.isnan(data["ClimbingID"])]
    data = data.groupby(["Aviary","Day","tag-local-identifier","Individual","BurstID","Burst_A"],as_index=False).agg(
        Min_Timestamp = ("timestamp",'min'),
        First_Latitude = ("location-lat",'first'),
        Mean_ClimbingRate = ("ClimbingRate",'mean')
    )
    
    # Save the data into a new csv file
    data.to_csv(data_folder+file_name_out)

In [None]:
#=================#
#=== Affenberg ===#
#=================#

# Define objects
file_name_in = "DataAff_TempWind_R.pkl"
file_name_out = "DataAff_ThermalBurstSpain.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
ThermallingSpain_burst()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#==================#
#=== CareCenter ===#
#==================#

# Define objects
file_name_in = "DataCC_TempWind_R.pkl"
file_name_out = "DataCC_ThermalBurstSpain.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
ThermallingSpain_burst()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
#=============#
#=== CASCB ===#
#=============#

# Define objects
file_name_in = "DataCASCB_TempWind_R.pkl"
file_name_out = "DataCASCB_ThermalBurstSpain.csv"

# Execute the function and print the run time
start = datetime.datetime.now()
ThermallingSpain_burst()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Combine the data for the different studies
def CombineAll(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".csv")

    # Load all data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_csv(data_folder+"All_"+pattern+".csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll(pattern="ThermalBurstSpain")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### Get data to annotate for general patterns

In [None]:
# Combine the data for the different studies
def CombineAll2(pattern):

    # Find all file names
    all_files = glob.glob(data_folder+"Data"+"*"+pattern+".pkl")

    # Load all data files for Affenberg
    data_all = (pd.read_pickle(f) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new csv file
    data.to_pickle(data_folder+"All_"+pattern+".pkl")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CombineAll2(pattern="TempWind_S")
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Find the first and last date on which storks were in the segment
data_ = pd.read_pickle(data_folder+"All_TempWind_S.pkl")

print("First date 2019: ",data_[data_["StartYear"]==2019]["timestamp"].min())
print("First date 2020: ",data_[data_["StartYear"]==2020]["timestamp"].min())
print("Last date 2019: ",data_[data_["StartYear"]==2019]["timestamp"].max())
print("Last date 2020: ",data_[data_["StartYear"]==2020]["timestamp"].max())

In [None]:
# Find the first and last date on which storks from specific studies were in the segment
print("First date 2019 Aff: ",data_[(data_["StartYear"]==2019)&(data_["Aviary"]=="Affenberg")]["timestamp"].min())
print("First date 2020 Aff: ",data_[(data_["StartYear"]==2020)&(data_["Aviary"]=="Affenberg")]["timestamp"].min())
print("Last date 2019 Aff: ",data_[(data_["StartYear"]==2019)&(data_["Aviary"]=="Affenberg")]["timestamp"].max())
print("Last date 2020 Aff: ",data_[(data_["StartYear"]==2020)&(data_["Aviary"]=="Affenberg")]["timestamp"].max())

In [None]:
print("First date 2020 CASCB: ",data_[(data_["StartYear"]==2020)&(data_["Aviary"]=="CASCB")]["timestamp"].min())
print("Last date 2020 CASCB: ",data_[(data_["StartYear"]==2020)&(data_["Aviary"]=="CASCB")]["timestamp"].max())

In [None]:
# Make files for 2019 and 2020 with the track of single storks within the segment for the date sequence between 29 July and 25 September

#-------------#
#- Affenberg -#
#-------------#

#-------------------------#
#--- 2019 - stork 7017 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataAff_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==7017]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2019

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2019,7,29)-datetime.timedelta(days=1),datetime.datetime(2019,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2019_stork7017.csv")

In [None]:
#-------------------------#
#--- 2020 - stork 7017 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataAff_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==7017]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2020

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2020,7,29)-datetime.timedelta(days=1),datetime.datetime(2020,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
    
    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2020_stork7017.csv")

In [None]:
#-------------------------#
#--- 2019 - stork 7026 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataAff_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==7026]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2019

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2019,7,29)-datetime.timedelta(days=1),datetime.datetime(2019,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
    
    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2019_stork7026.csv")

In [None]:
#-------------------------#
#--- 2020 - stork 7026 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataAff_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==7026]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2020

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2020,7,29)-datetime.timedelta(days=1),datetime.datetime(2020,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
    
    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2020_stork7026.csv")

In [None]:
#-------------------------#
#--- 2019 - stork 7988 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataAff_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==7988]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2019

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2019,7,29)-datetime.timedelta(days=1),datetime.datetime(2019,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
    
    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2019_stork7988.csv")

In [None]:
#-------------------------#
#--- 2020 - stork 7988 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataAff_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==7988]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2020

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2020,7,29)-datetime.timedelta(days=1),datetime.datetime(2020,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
    
    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2020_stork7988.csv")

In [None]:
#---------#
#- CASCB -#
#---------#

#-------------------------#
#--- 2019 - stork 8030 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataCASCB_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==8030]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2019

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2019,7,29)-datetime.timedelta(days=1),datetime.datetime(2019,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
    
    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2019_stork8030.csv")

In [None]:
#-------------------------#
#--- 2020 - stork 8030 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataCASCB_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==8030]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2020

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2020,7,29)-datetime.timedelta(days=1),datetime.datetime(2020,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
    
    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2020_stork8030.csv")

In [None]:
#-------------------------#
#--- 2019 - stork 7999 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataCASCB_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==7999]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2019

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2019,7,29)-datetime.timedelta(days=1),datetime.datetime(2019,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
    
    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2019_stork7999.csv")

In [None]:
#-------------------------#
#--- 2020 - stork 7999 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataCASCB_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==7999]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2020

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2020,7,29)-datetime.timedelta(days=1),datetime.datetime(2020,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
    
    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2020_stork7999.csv")

In [None]:
#-------------------------#
#--- 2019 - stork 8046 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataCASCB_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==8046]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2019

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2019,7,29)-datetime.timedelta(days=1),datetime.datetime(2019,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
    
    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2019_stork8046.csv")

In [None]:
#-------------------------#
#--- 2020 - stork 8046 ---#
#-------------------------#

# Load data
data = pd.read_pickle(data_folder+"DataCASCB_TempWind_S.pkl")

# Subset the data to only contain one stork
data = data[data["tag-local-identifier"]==8046]

# Remove burst data
data["TimeDifference"] = data["timestamp"].diff().dt.total_seconds()
data = data[data["TimeDifference"]>1]

# Remove unnecessary columns
data = data[["timestamp","location-long","location-lat","height-above-ellipsoid","Aviary","heading"]]

# Add the year
data["Year"] = 2020

# Make a sequence for every date in between 29-7 and 25-9 for both 2019 and 2020
DateSeq = pd.date_range(datetime.datetime(2020,7,29)-datetime.timedelta(days=1),datetime.datetime(2020,9,25)+datetime.timedelta(days=1),freq="D")

# Calculate how many days the timestamp is from the minimum timestamp
data["DaysDiff"] = (data["timestamp"] - min(data["timestamp"])).dt.days

# Now, change the timestamps
data["timestamp_O"] = data["timestamp"]
data["timestamp"] = [datetime.datetime.combine(DateSeq[0]+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')

# Save the data into a separate object and set the timestamp back
data_append = data.copy()
data["timestamp"] = data["timestamp_O"]

for i in DateSeq[1:]:
    
    # Change the timestamp
    data["timestamp"] = [datetime.datetime.combine(i+datetime.timedelta(days=int(data["DaysDiff"].iloc[row])),data["timestamp"].dt.time.iloc[row]) for row in range(len(data))]
    data["timestamp"] = data["timestamp"].dt.strftime('%Y-%m-%d %H:%M:%S.%f')
    
    # Append the data
    data_append = data_append.append(data).copy()
    
    # Change the timestamp back
    data["timestamp"] = data["timestamp_O"]

# Save the data
data_append.to_csv(data_folder+"TestWindDiff/TestWindDiff_2020_stork8046.csv")

### General wind pattern

In [None]:
def MergeWindData():

    # Find all file names
    all_files = glob.glob(data_folder+"TestWindDiff/TestWindDiff_"+("[0-9]"*4)+"_stork"+("[0-9]"*4)+".csv-"+"*"+".csv")
    
    # Load the data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new file
    data.to_pickle(data_folder+"TestWindDiff/TestWindDiff_All.pkl")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
MergeWindData()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
# Define function
def CalculateWind():

    # Load data
    data = pd.read_pickle(data_folder+"TestWindDiff/TestWindDiff_All.pkl")
    
    # Rename columns
    data.rename({
        "ECMWF ERA5 PL U Wind":"U_Wind_PL",
        "ECMWF ERA5 PL V Wind":"V_Wind_PL"
    }, axis=1, inplace=True)
    
    # Convert timestamps
    data["timestamp"] = pd.to_datetime(data["timestamp"],format="%Y-%m-%d %H:%M:%S.%f")
    
    #--------------------------------------#
    #- Calculate wind speed and direction -#
    #--------------------------------------#
    
    # The U component of the wind is the west-east component
    # The V component of the wind is the south-north component

    # Calculate horizontal windspeed
    data["WindSpeed_PL"] = (data["U_Wind_PL"]**2 + data["V_Wind_PL"]**2)**0.5

    # Calculate wind direction
    # Formula from https://www.eol.ucar.edu/content/wind-direction-quick-reference
    # arctan2 calculates the direction from which the wind comes. +180 -> wind direction
    data["WindDirection_PL"] = np.arctan2(-data["U_Wind_PL"],-data["V_Wind_PL"])*(180/np.pi)+180
    
    # Make all angles smaller than 360
    if len(data.loc[data["WindDirection_PL"]>360])>0:
        print("Some angles are larger than 360")
        data.loc[data["WindDirection_PL"]>360] = data.loc[data["WindDirection_PL"]>360] - 360 # Not tested, because this line is most likely not necessary
        
    #---------------------------------------#
    #- Calculate windsupport and crosswind -#
    #---------------------------------------#

    # Calculate the angle between the direction of the stork (heading) and the direction of the wind
    data["Angle_heading_WindPL"] = data["heading"] - data["WindDirection_PL"]

    # Determine the a-angle to do the calculations with
    data["Angle_a"] = abs(data["Angle_heading_WindPL"])

    # Calculate the w-angle
    data["Angle_w"] = 90-data["Angle_a"]

    # Calculate windsupport
    # Formula from https://doi.org/10.1186/2051-3933-1-4
    # Windsupport is in the direction of the heading
    data["WindSupport_PL"] = data["WindSpeed_PL"]*np.sin(data["Angle_w"]*np.pi/180)
    
    #-----------------#
    #- Save the data -#
    #-----------------#
    
    # Save the data into a new file
    data.to_pickle(data_folder+"TestWindDiff/TestWindDiff_All.pkl")
    data.to_csv(data_folder+"TestWindDiff/TestWindDiff_All.csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CalculateWind()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### General temperature pattern

In [None]:
def MergeTempData():

    # Find all file names
    all_files = glob.glob(data_folder+"TestTempDiff/TestTempDiff_"+("[0-9]"*4)+"_stork"+("[0-9]"*4)+".csv-"+"*"+".csv")
    
    # Load the data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new file
    data.to_pickle(data_folder+"TestTempDiff/TestTempDiff_All.pkl")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
MergeTempData()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
def CalcAvgTemp():
    
    # Load data
    data = pd.read_pickle(data_folder+"TestTempDiff/TestTempDiff_All.pkl")
    
    # Transform timestamps and add the date without time
    data["timestamp"] = pd.to_datetime(data["timestamp"],format="%Y-%m-%d %H:%M:%S.%f")
    data["Day"] = data["timestamp"].dt.date
    
    # Calculate average temperatures per day
    data2 = data.groupby(["Day","Year"],as_index=False).agg(
        Temp = ("ECMWF ERA5 SL Temperature (2 m above Ground)",'mean'),
        FTS = ("timestamp",'first')
    )
    
    # Add day of the year
    data2["YD"] = data2["FTS"].dt.strftime("%j").astype(int)
    
    # Save the data into a new file
    data.to_csv(data_folder+"TestTempDiff/TestTempDiff_All.csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CalcAvgTemp()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

### General pattern boundary layer height

In [None]:
def MergeBLHData():

    # Find all file names
    all_files = glob.glob(data_folder+"TestBLHDiff/TestBLHDiff_"+("[0-9]"*4)+"_stork"+("[0-9]"*4)+".csv-"+"*"+".csv")
    
    # Load the data files
    data_all = (pd.read_csv(f,sep=',',low_memory=False) for f in all_files)

    # Merge the files together
    data = pd.concat(data_all)

    # Save the data into a new file
    data.to_pickle(data_folder+"TestBLHDiff/TestBLHDiff_All.pkl")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
MergeBLHData()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)

In [None]:
def CalcAvgBLH():
    
    # Load data
    data = pd.read_pickle(data_folder+"TestBLHDiff/TestBLHDiff_All.pkl")
    
    # Transform timestamps and add the date without time
    data["timestamp"] = pd.to_datetime(data["timestamp"],format="%Y-%m-%d %H:%M:%S.%f")
    data["Day"] = data["timestamp"].dt.date
    
    # Calculate average temperatures per day
    data2 = data.groupby(["Day","Year"],as_index=False).agg(
        BLH = ("ECMWF ERA5 SL Boundary Layer Height",'mean'),
        FTS = ("timestamp",'first')
    )
    
    # Add day of the year
    data2["YD"] = data2["FTS"].dt.strftime("%j").astype(int)
    
    # Save the data into a new file
    data.to_csv(data_folder+"TestBLHDiff/TestBLHDiff_All.csv")

In [None]:
# Execute the function and print the run time
start = datetime.datetime.now()
CalcAvgBLH()
print(datetime.datetime.now())
print(datetime.datetime.now()-start)