In [1]:
import pandas as pd
import math
import events
import os

MINUTES = 15
SRC_DIR = 'dataset/bob_all_processed_mins'
OUT_DIR = f'dataset/transformed_minutes/interval_{str(MINUTES)}m'
os.makedirs(OUT_DIR, exist_ok=True)
files = os.scandir(SRC_DIR)

In [2]:
files = os.scandir(SRC_DIR)

In [3]:
def has_event(record: pd.Series, e: str):
    event = f'{e}.next.dif'
    return not (record[event] == 'NA' or record[event] == '' or record[event] == None or math.isnan(record[event]))

def has_event_indexes(df: pd.DataFrame, e: str):
    event = f'{e}.next.dif'
    indexes = events.get_event_indexes(df, event)
    return len(indexes) > 0, indexes

def to_out_filename(name: str, queencell: bool, feeding: bool, honey: bool, treatment: bool, died: bool, swarming: bool):
    queencell = 'q' if queencell else ''
    feeding = 'f' if feeding else ''
    honey = 'h' if honey else ''
    treatment = 't' if treatment else ''
    died = 'd' if died else ''
    swarming = 's' if swarming else ''
    return f'{name.split(".")[0]}-{queencell}{feeding}{honey}{treatment}{died}{swarming}.csv'

In [4]:
for file in files:
    df = pd.read_csv(file.path, dtype={
        't_i_1': float,
        't_i_2': float,
        't_i_3': float,
        't_i_4': float,
        't_o': float,
        'weight_kg': float,
        "weight_delta": float,
        'numeric.time': float,
        'h': float,
        't': float,
        'p': float,
    }, low_memory=False, parse_dates=['time'], index_col='time', date_format='%Y-%m-%d %H:%M:%S')

    df = df.infer_objects(copy=False)

    
    print(f'Processing {file.name}:')
    
    has_queencell = events.populate_event_column(df, 'queencell')
    has_feeding = events.populate_event_column(df, 'feeding')
    has_honey = events.populate_event_column(df, 'honey')
    has_treatment = events.populate_event_column(df, 'treatment')
    has_died = events.populate_event_column(df, 'died')
    has_swarming = events.populate_event_column(df, 'swarming')
    
    print(f'  Queencell: {has_queencell}')
    print(f'  Feeding: {has_feeding}')
    print(f'  Honey: {has_honey}')
    print(f'  Treatment: {has_treatment}')
    print(f'  Died: {has_died}')
    print(f'  Swarming: {has_swarming}')
    
    # odf = pd.DataFrame(columns=columns)
    odf = df.resample(f'{MINUTES}min').agg({
        'X.1': 'first',
        'X': 'first',
        'key': 'first',

        't_i_1': 'mean',
        't_i_2': 'mean',
        't_i_3': 'mean',
        't_i_4': 'mean',
        't_i_5': 'mean',
        't_o': 'mean',
        
        'weight_kg': 'mean',
        
        'h': 'mean',
        't': 'mean',
        'p': 'mean',
        
        'year': 'first',
        'month': 'first',
        'day': 'first',
        'hour': 'first',
        'minute': 'first',
        
        'queencell.next.dif': 'first',
        'feeding.next.dif': 'first',
        'honey.next.dif': 'first',
        'treatment.next.dif': 'first',
        'died.next.dif': 'first',
        'swarming.next.dif': 'first',
        
        'swarming': 'max',
        'queencell': 'max',
        'feeding': 'max',
        'honey': 'max',
        'treatment': 'max',
        'died': 'max',
    })
    odf['time'] = odf.index
    
    out_filename = to_out_filename(file.name, has_queencell, has_feeding, has_honey, has_treatment, has_died, has_swarming)
    odf.to_csv(f'{OUT_DIR}/{out_filename}', index=True, index_label='time')

Processing 2020_79.csv:
  Queencell: False
  Feeding: True
  Honey: False
  Treatment: True
  Died: False
  Swarming: True
Processing 2022_97.csv:
  Queencell: False
  Feeding: False
  Honey: False
  Treatment: False
  Died: False
  Swarming: False
Processing 2022_152.csv:
  Queencell: False
  Feeding: False
  Honey: True
  Treatment: False
  Died: False
  Swarming: False
Processing 2022_82.csv:
  Queencell: False
  Feeding: False
  Honey: False
  Treatment: False
  Died: False
  Swarming: False
Processing 2022_96.csv:
  Queencell: False
  Feeding: False
  Honey: False
  Treatment: False
  Died: False
  Swarming: False
Processing 2022_69.csv:
  Queencell: False
  Feeding: False
  Honey: False
  Treatment: False
  Died: False
  Swarming: False
Processing 2020_87.csv:
  Queencell: True
  Feeding: False
  Honey: True
  Treatment: False
  Died: False
  Swarming: False
Processing 2020_93.csv:
  Queencell: False
  Feeding: False
  Honey: False
  Treatment: False
  Died: False
  Swarming: Fal