In [10]:
import numpy as np
import pandas as pd
from my_globals import *
import random

In [11]:
def invalid_animal_to_na(animal_string):
    if not animal_string in APENHEUL_ANIMALS:
        animal_string = np.nan
    return animal_string


def invalid_timeslot_to_na(timeslot_string):
    if not timeslot_string in TIMESLOTS:
        timeslot_string = np.nan
    return timeslot_string


def min_to_sec(min):
    sec = min
    partial = str(min).split(".", 1)
    if len(partial) > 1:
        sec = int(partial[0]) * 60 + int(partial[1])
    elif partial[0] != "nan":
        sec = int(partial[0]) * 60
    elif sec == "nan":
        sec = None

    return sec


def to_upper_but_fillna(s):
    if s == "nan":  # or not s:
        s = None
    else:
        s = s.upper()
    return s


def calc_new_schedule(
    observations: pd.DataFrame, old_schedule: pd.DataFrame
) -> pd.DataFrame:
    # print('running calc_new_schedule()...')

    # aggregate Time (s) per Animal/Timeslot combination
    observations = (
        observations.groupby(["Animal", "Timeslot"])["Time (s)"].sum().reset_index()
    )

    # check if there exist duplicate ANIMAL/TIMESLOT combinations, if so, aggregate their TIME REMAINING
    unique_rows_old_schedule = old_schedule[["ANIMAL", "TIMESLOT"]].drop_duplicates()
    if len(unique_rows_old_schedule) < len(old_schedule):
        print(
            f"Duplicate ANIMAL/TIMESLOT combinations were found in {CSV_SCHEDULE_NAME} and merged"
        )
        # df = pd.DataFrame(, columns=["ANIMAL","TIMESLOT",'TIME REMAINING'])
        old_schedule["TIME REMAINING"] = (
            old_schedule["TIME REMAINING"].astype(float).astype(int)
        )
        old_schedule = (
            old_schedule.groupby(["ANIMAL", "TIMESLOT"])["TIME REMAINING"]
            .sum()
            .reset_index()
        )

    old_schedule = old_schedule.to_numpy()
    observations = observations.to_numpy()

    for obs in observations:
        # print(old_schedule)
        idx = []
        idx.append(
            np.where(np.all(obs[:2] == old_schedule[:, :2], axis=1))
        )  # append indexes in old_schedule where obs (animal/timeslot combination) is found
        # print(idx, '\n', old_schedule)
        old_schedule_idxs = idx[0][0]
        time_remaining_list = []
        for i in old_schedule_idxs:
            time_remaining_list.append(
                old_schedule[i, 2]
            )  # time remaining is found at index 2 or -1
        idxs_of_lowest_time_rem = [
            i
            for i, j in enumerate(time_remaining_list)
            if j == min(time_remaining_list)
        ]
        idx_of_lowest_time_rem = idxs_of_lowest_time_rem[0]
        idx_of_lowest_time_rem = old_schedule_idxs[idx_of_lowest_time_rem]
        row_to_be_subtracted_from = old_schedule[
            idx_of_lowest_time_rem
        ]  # row with lowest time remaining
        new_time_remaining = row_to_be_subtracted_from[-1] - obs[-1]
        if new_time_remaining > 2 * 60:
            old_schedule[idx_of_lowest_time_rem, -1] = new_time_remaining
        elif (
            new_time_remaining == new_time_remaining
        ):  # check if new_time_remaining is nan
            print(
                f"removed {old_schedule[idx_of_lowest_time_rem,:]}, as there is less than 2 minutes of observations time left"
            )
            old_schedule = np.delete(
                old_schedule, idx_of_lowest_time_rem, axis=0
            )  # delete row if time remaining is lower than 2 minutes

    new_schedule = pd.DataFrame(old_schedule, columns=COLUMN_HEADERS)

    return new_schedule


def get_indexes_for_timeslots():
    xs = []
    for animal_index in range(len(APENHEUL_ANIMALS)):
        xs.append((animal_index) * len(TIMESLOTS))

    idxs = []
    for y in range(len(TIMESLOTS)):
        shuffled_idxs_for_this_timeslot = list(map(lambda x: x + y, xs))
        # random.seed(RANDOM_SEED)
        random.shuffle(shuffled_idxs_for_this_timeslot)
        idxs.append(shuffled_idxs_for_this_timeslot)

    res = dict()
    for timeslot, idx in zip(TIMESLOTS, idxs):
        res[timeslot] = idx

    return res

def reorder(list, index_list):
    res=[]
    for x in index_list:
        res.append(list[x])
    return res

In [12]:
schedule = pd.read_csv(f"{CSV_INITIAL_SCHEDULE_NAME}", sep=";")
true_obs = pd.read_excel(f"{XLSX_TRUE_OBS_NAME}")
true_obs["Animal"] = true_obs["Animal"].apply(invalid_animal_to_na)
true_obs["Timeslot"] = true_obs["Timeslot"].apply(invalid_timeslot_to_na)
true_obs = true_obs.dropna()
true_obs = true_obs.iloc[:, 2:5]
true_obs = true_obs.astype(str)


true_obs["Animal"] = true_obs["Animal"].apply(to_upper_but_fillna)
true_obs["Timeslot"] = true_obs["Timeslot"].apply(to_upper_but_fillna)
true_obs["Time (s)"] = true_obs["Time (s)"].apply(min_to_sec)
true_obs["Time (s)"] = true_obs["Time (s)"].astype(float).astype(int)

In [13]:
new_schedule = calc_new_schedule(true_obs, schedule)

In [14]:
indexes = get_indexes_for_timeslots()

sorted_schedule = []
split_residuals = []
for idx in indexes:
    idx_list = indexes[idx]
    df = pd.DataFrame(columns=COLUMN_HEADERS)
    split_length = int(len(idx_list) / NO_OBS_PER_TIMESLOT)
    split_list = [
        idx_list[i : len(idx_list) : split_length] for i in range(split_length)
    ]  # split idx list into lists s.t. there are sublists with length==NO_OBS_PER_TIMESLOT
    split_residual = [
        x[NO_OBS_PER_TIMESLOT] for x in split_list if len(x) > NO_OBS_PER_TIMESLOT
    ]  # cut off any sublist that is 'one-off' due to split_lengths being a fractal
    split_list = [
        x[:NO_OBS_PER_TIMESLOT] for x in split_list
    ]  # cut off any sublist that is 'one-off' due to split_lengths being a fractal
    sorted_schedule.append(split_list)
    split_residuals += split_residual
    # sorted_schedule += split_list
    for i in idx_list[:NO_OBS_PER_TIMESLOT]:
        x = new_schedule.iloc[i, :].to_numpy()

sorted_schedule = np.array(sorted_schedule)
reshaped_simpler = np.hstack(sorted_schedule)
reordering_index = reshaped_simpler.reshape(
    len(indexes) * split_length * NO_OBS_PER_TIMESLOT
)

In [18]:
new_schedule_list = new_schedule.to_numpy().tolist()
reordering_index_list = reordering_index.tolist()
reordering_index_list += split_residuals  # add residuals to come to 52 rows s.t. index and schedule_list are equally long
# schedule_list = schedule_list[:len(reordering_index_list)] #subtract residuals to come to 48 rows s.t. index and schedule_list are equally long

new_schedule_list = reorder(new_schedule_list, reordering_index_list)

# remove number in front of timeslot code
for row in new_schedule_list:
    row[1] = row[1].split('.')[1]
print(new_schedule_list)

[['TA', 'EM', 10800], ['MU', 'EM', 10800], ['BI', 'EM', 10800], ['SA', 'LM', 10800], ['KE', 'LM', 10800], ['FA', 'LM', 10800], ['SG', 'EA', 10800], ['HA', 'EA', 10800], ['KA', 'EA', 10800], ['FA', 'LA', 10800], ['TA', 'LA', 10800], ['KA', 'LA', 10800], ['SA', 'EM', 10800], ['SG', 'EM', 10800], ['AS', 'EM', 10800], ['TU', 'LM', 10800], ['SW', 'LM', 10800], ['NO', 'LM', 10800], ['SW', 'EA', 10800], ['BI', 'EA', 10800], ['AS', 'EA', 10800], ['TU', 'LA', 10800], ['SG', 'LA', 10800], ['AS', 'LA', 10800], ['KA', 'EM', 10800], ['TU', 'EM', 10800], ['HA', 'EM', 10800], ['TA', 'LM', 10800], ['MU', 'LM', 10800], ['BI', 'LM', 10800], ['MU', 'EA', 10800], ['FA', 'EA', 10800], ['KE', 'EA', 10800], ['NO', 'LA', 10800], ['MU', 'LA', 10800], ['SA', 'LA', 10800], ['SW', 'EM', 10800], ['KE', 'EM', 10800], ['NO', 'EM', 10800], ['SG', 'LM', 10800], ['HA', 'LM', 10800], ['KA', 'LM', 10800], ['TU', 'EA', 10800], ['NO', 'EA', 10800], ['SA', 'EA', 10800], ['HA', 'LA', 10800], ['BI', 'LA', 10800], ['KE', 'LA',

In [19]:
np.savetxt(f"{CSV_SCHEDULE_NAME}", new_schedule_list, delimiter=";", fmt="% s")