In [1]:
import os
import glob
import math
import pandas as pd
import numpy as np
import itertools
import random
import requests
import xml.etree.ElementTree as ET
import csv

import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib.patches as patches

import tensorflow as tf
from tensorflow import keras
from keras import backend as K
from keras.utils import np_utils
from keras.utils.vis_utils import model_to_dot
from keras.callbacks import EarlyStopping
from IPython.display import SVG

#import tensorflow_probability as tfp

from sklearn.metrics import classification_report, confusion_matrix
from sklearn import preprocessing
from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer, normalize
from sklearn.preprocessing import MinMaxScaler
import warnings

warnings.filterwarnings('ignore')

from ipywidgets import FloatProgress
from IPython.display import display

#fig_width = 12
plt.rcParams["font.size"] = 40
plt.rcParams['axes.labelsize'] = 40
plt.rcParams['axes.labelweight'] = 'bold'
#plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
#plt.rcParams.keys()

In [2]:
from load_utils import time_change, setup_dir, find_xml_filenames, find_csv_filenames

In [3]:
omizu_path = r"G:\JaimeMorales\Codes\omizunagidori\export\Omizunagidori"
umineko_path = r"G:\JaimeMorales\Codes\omizunagidori\export\Umineko"
database_path = r"G:\JaimeMorales\Codes\omizunagidori\database"
database_l_path = r"G:\JaimeMorales\Codes\omizunagidori\database\labels"
database_o_acc_path = r"G:\JaimeMorales\Codes\omizunagidori\database\omizunagidori"
database_u_acc_path = r"G:\JaimeMorales\Codes\omizunagidori\database\umineko"

In [4]:
def make_labels(paths, label_wr_dir = r'G:\JaimeMorales\Codes\biodata\database\labels', fn_end = 17):
    
    for path in paths:
        label_path = path
        label_dir_p, label_fn = os.path.split(label_path)
        wr_fn = label_fn[:fn_end]
        tree = ET.parse(label_path)
        root = tree.getroot()
        filename = os.path.join(label_wr_dir,wr_fn+'_labels.csv')
        
        with open(filename,"w") as f:            
            csv_writer = csv.writer(f)
            header = ["event_type","start", "end"]
            csv_writer.writerow(header)
            for labellist in root.iter("labellist"):
                timestampStart = labellist[1].text
                timestampStart = timestampStart.replace('-','')
                timestampStart = timestampStart.replace('T',' ')      
                timestampStart = timestampStart.replace('Z','')
                timestampEnd = labellist[2].text
                timestampEnd = timestampEnd.replace('-','')
                timestampEnd = timestampEnd.replace('T',' ')
                timestampEnd = timestampEnd.replace('Z','')

                row = [labellist[0].text, labellist[1].text, labellist[2].text]
                row = [labellist[0].text,timestampStart,timestampEnd]
                csv_writer.writerow(row)
            
        print('created labels for >>> ',filename)
        
    return

# Create label files

In [5]:
label_path_o = os.path.join(omizu_path,'labels')
label_paths_o = find_xml_filenames(label_path_o)
make_labels(label_paths_o, r'G:\JaimeMorales\Codes\omizunagidori\database\labels', fn_end = 17)

created labels for >>>  G:\JaimeMorales\Codes\omizunagidori\database\labels\Omizunagidori2018_labels.csv
created labels for >>>  G:\JaimeMorales\Codes\omizunagidori\database\labels\Omizunagidori2019_labels.csv
created labels for >>>  G:\JaimeMorales\Codes\omizunagidori\database\labels\Omizunagidori2020_labels.csv
created labels for >>>  G:\JaimeMorales\Codes\omizunagidori\database\labels\Omizunagidori2021_labels.csv
created labels for >>>  G:\JaimeMorales\Codes\omizunagidori\database\labels\Omizunagidori2022_labels.csv


In [6]:
label_path_u = os.path.join(umineko_path,'labels')
label_paths_u = find_xml_filenames(label_path_u)
make_labels(label_paths_u, r'G:\JaimeMorales\Codes\omizunagidori\database\labels', fn_end = 11)

created labels for >>>  G:\JaimeMorales\Codes\omizunagidori\database\labels\Umineko2018_labels.csv
created labels for >>>  G:\JaimeMorales\Codes\omizunagidori\database\labels\Umineko2019_labels.csv
created labels for >>>  G:\JaimeMorales\Codes\omizunagidori\database\labels\Umineko2022_labels.csv


# Separate sensors

## Omizunagidori filenames

### 2018 file names

In [7]:
O_Y18_save_folder = os.path.join(database_o_acc_path,'2018')
O_Y18_raw_path = os.path.join(omizu_path,'raw')
O_Y18_raw_paths = find_csv_filenames(O_Y18_raw_path, suffix = ".csv", year = '2018')

### 2019 file names

In [8]:
O_Y19_save_folder = os.path.join(database_o_acc_path,'2019')
O_Y19_raw_path = os.path.join(omizu_path,'raw')
O_Y19_raw_paths = find_csv_filenames(O_Y19_raw_path, suffix = ".csv", year = '2019')

### 2020 file names

In [9]:
O_Y20_save_folder = os.path.join(database_o_acc_path,'2020')
O_Y20_raw_path = os.path.join(omizu_path,'raw')
O_Y20_raw_paths = find_csv_filenames(O_Y20_raw_path, suffix = ".csv", year = '2020')

### 2021 file names

In [10]:
O_Y21_save_folder = os.path.join(database_o_acc_path,'2021')
O_Y21_raw_path = os.path.join(omizu_path,'raw')
O_Y21_raw_paths = find_csv_filenames(O_Y21_raw_path, suffix = ".csv", year = '2021')

### 2022 file names

In [11]:
O_Y22_save_folder = os.path.join(database_o_acc_path,'2022')
O_Y22_raw_path = os.path.join(omizu_path,'raw')
O_Y22_raw_paths = find_csv_filenames(O_Y22_raw_path, suffix = ".csv", year = '2022')

### ALL Omizunagidori files

In [12]:
O_all_save_folder = [O_Y18_save_folder, O_Y19_save_folder, O_Y20_save_folder, O_Y21_save_folder, O_Y22_save_folder]
O_all_raw_paths = [O_Y18_raw_paths, O_Y19_raw_paths, O_Y20_raw_paths, O_Y21_raw_paths, O_Y22_raw_paths]

## Umineko filenames

### 2018 file names

In [13]:
U_Y18_save_folder = os.path.join(database_u_acc_path,'2018')
U_Y18_raw_path = os.path.join(umineko_path,'raw')
U_Y18_raw_paths = find_csv_filenames(U_Y18_raw_path, suffix = ".csv", year = '2018')

### 2019 file names

In [14]:
U_Y19_save_folder = os.path.join(database_u_acc_path,'2019')
U_Y19_raw_path = os.path.join(umineko_path,'raw')
U_Y19_raw_paths = find_csv_filenames(U_Y19_raw_path, suffix = ".csv", year = '2019')

### 2022 file names

In [15]:
U_Y22_save_folder = os.path.join(database_u_acc_path,'2022')
U_Y22_raw_path = os.path.join(umineko_path,'raw')
U_Y22_raw_paths = find_csv_filenames(U_Y22_raw_path, suffix = ".csv", year = '2022')

### All umineko files

In [16]:
U_all_save_folder = [U_Y18_save_folder, U_Y19_save_folder, U_Y22_save_folder]
U_all_raw_paths = [U_Y18_raw_paths, U_Y19_raw_paths, U_Y22_raw_paths]

# Sensor specific dataframe creation

In [17]:
def separate_by_sensor(filename, save_folder, sensor='acc', time_format="%Y%m%d_%H:%M:%S.%f"):
    data = pd.read_csv(filename, parse_dates=["timestamp"])    
    data["timestamp"] = pd.to_datetime(data["timestamp"],format=time_format)
    
    if sensor == 'acc':
        new_df = data.drop(['logger_id', 'latitude', 'longitude', 'gps_status', 'gyro_x', 'gyro_y', 'gyro_z', 'mag_x', 'mag_y', 'mag_z', 'illumination', 'pressure', 'temperature'],axis=1)
    else:
        new_df = data
        
    name = filename[-11:-4]
    name = name.replace('_','')
    name = name.replace('00','')
    save_name = os.path.join(save_folder,name+'_acc.csv')
    new_df.to_csv(save_name,index=False)
    
    data_df = pd.read_csv(save_name, parse_dates=["timestamp"])
    #data_df = new_df
    l = []
    #for i in range(len(data)):
        #l.append(data_df['timestamp'][i].replace('+00:00',''))
    #data_df['timestamp'] = l
    #data_df['timestamp'][0] = data_df['timestamp'][0]+'.000000'
    
    #data_df = pd.to_datetime(data_df["timestamp"],format=time_format)
    
    data_df = time_change(data_df)
    
    #data_df.to_csv(os.path.join(save_folder,name+'t_acc.csv'),index=False)
    return data_df
    
def join_by_year(read_dir):
    pre_df_fn = os.listdir(read_dir)[0]
    pre_df = pd.read_csv(os.path.join(read_dir, pre_df_fn),parse_dates=["timestamp"])
    all_bird_df = pd.DataFrame(columns = pre_df.columns)
    all_bird_lst = []
    for file in os.listdir(read_dir):
        if file.endswith(".csv"):
            path = os.path.join(read_dir, file)
            print(os.path.join(read_dir, file))
            df = pd.read_csv(path,parse_dates=["timestamp"])
            all_bird_lst.append(df)
            #all_bird_df = all_bird_df._append(df, ignore_index=True)
            #all_bird_df = pd.concat([all_bird_df,df])

    all_bird_df = pd.concat(all_bird_lst)

    birds = list(all_bird_df.drop_duplicates(subset=['animal_tag'],keep = 'first')['animal_tag'])
    print(birds)
    
    #all_bird_df.to_csv(os.path.join(read_dir, 'all_bird_df.csv'),index=False)

    #print(all_bird_df)

    all_bird_df.drop(['activity_class'],axis = 1,inplace = True)
    all_bird_df.dropna(inplace = True)
    all_bird_df.reset_index(inplace = True)
    all_bird_df.drop(['index'],axis = 1,inplace = True)
    all_bird_df.to_csv(os.path.join(read_dir, 'all_bird_df_Y' + os.path.split(read_dir)[-1] +'_WL.csv'),index=False)
    
    labels_df = all_bird_df.drop_duplicates(subset=['label'],keep = 'first')['label']
    labels_l = list(labels_df)
    print('labels' + os.path.split(read_dir)[-1]+':')
    print(labels_l)
    labels_df.to_csv(os.path.join(read_dir, 'label_df_Y' + os.path.split(read_dir)[-1] +'.csv'),index=False)
    
    return all_bird_df

## Create acc files - OMIZUNAGIDORI

In [18]:
O_all_bird_Y_df_l = []
year_df_t_l = []

for i in range(len(O_all_save_folder)):
    bird_df_t_l =[]
    for raw_path in O_all_raw_paths[i]:
        print('Acc sensor df from: ', raw_path)
        print('Saved to: ', O_all_save_folder[i]) 
        bird_acc_df = separate_by_sensor(raw_path,O_all_save_folder[i])
        bird_df_t_l.append(bird_acc_df)
    year_df_t = pd.concat(bird_df_t_l)
    year_df_t_l.append(year_df_t)
    all_bird = join_by_year(O_all_save_folder[i])
    O_all_bird_Y_df_l.append(all_bird)


Omizu_all_t_df = pd.concat(year_df_t_l)
Omizu_all_t_df.drop(['activity_class'], axis=1, inplace=True)
Omizu_all_t_df.dropna(inplace=True)
#print('timechange dataframe: ')
#print(Omizu_all_t_df)
Omizu_all_t_df.to_csv(os.path.join(database_o_acc_path,'Omizu_all_t_df.csv'), index = False)
Omizu_all_df = pd.concat(O_all_bird_Y_df_l)
O_labels_df = pd.DataFrame(Omizu_all_df.drop_duplicates(subset=['label'],keep = 'first')['label'])
O_labels_df.reset_index(inplace=True)
O_labels_df.drop(['index'],axis=1,inplace=True)
O_labels_df.to_csv(os.path.join(database_l_path,'O_labels_df.csv'),index = False)

Acc sensor df from:  G:\JaimeMorales\Codes\omizunagidori\export\Omizunagidori\raw\Omizunagidori2018_raw_data_9B16995_lb0001.csv
Saved to:  G:\JaimeMorales\Codes\omizunagidori\database\omizunagidori\2018
Acc sensor df from:  G:\JaimeMorales\Codes\omizunagidori\export\Omizunagidori\raw\Omizunagidori2018_raw_data_9B24590_lb0002.csv
Saved to:  G:\JaimeMorales\Codes\omizunagidori\database\omizunagidori\2018
Acc sensor df from:  G:\JaimeMorales\Codes\omizunagidori\export\Omizunagidori\raw\Omizunagidori2018_raw_data_9B34075_lb0003.csv
Saved to:  G:\JaimeMorales\Codes\omizunagidori\database\omizunagidori\2018
Acc sensor df from:  G:\JaimeMorales\Codes\omizunagidori\export\Omizunagidori\raw\Omizunagidori2018_raw_data_9B36347_lb0004.csv
Saved to:  G:\JaimeMorales\Codes\omizunagidori\database\omizunagidori\2018
Acc sensor df from:  G:\JaimeMorales\Codes\omizunagidori\export\Omizunagidori\raw\Omizunagidori2018_raw_data_9B36360_lb0005.csv
Saved to:  G:\JaimeMorales\Codes\omizunagidori\database\omiz

## Create acc files - UMINEKO

In [None]:
U_all_bird_Y_df_l = []
year_df_t_l = []

for i in range(len(U_all_save_folder)):
    bird_df_t_l =[]
    for raw_path in U_all_raw_paths[i]:
        print('Acc sensor df from: ', raw_path)
        print('Saved to: ', U_all_save_folder[i]) 
        bird_acc_df = separate_by_sensor(raw_path,U_all_save_folder[i])
        bird_df_t_l.append(bird_acc_df)
    year_df_t = pd.concat(bird_df_t_l)
    year_df_t_l.append(year_df_t)
    all_bird = join_by_year(U_all_save_folder[i])
    U_all_bird_Y_df_l.append(all_bird)


Umineko_all_t_df = pd.concat(year_df_t_l)
Umineko_all_t_df.drop(['activity_class'], axis=1, inplace=True)
Umineko_all_t_df.dropna(inplace=True)
#print('timechange dataframe: ')
#print(Umineko_all_t_df)
Umineko_all_t_df.to_csv(os.path.join(database_u_acc_path,'Umineko_all_t_df.csv'), index = False)
umineko_all_df = pd.concat(U_all_bird_Y_df_l)
U_labels_df = pd.DataFrame(umineko_all_df.drop_duplicates(subset=['label'],keep = 'first')['label'])
U_labels_df.reset_index(inplace=True)
U_labels_df.drop(['index'],axis=1,inplace=True)
U_labels_df.to_csv(os.path.join(database_l_path,'U_labels_df.csv'),index = False)

# Assign labels, separate years, separate birds

## Year times

In [None]:
def num_labels(acc_df,lab):
    
    new_df=acc_df
    new_df['l_val']=100

    i=0
    for a in lab.label:
        #print(a,':')
        new_df.loc[new_df['label'] == a, 'l_val'] = i
        #print(new_df)
        i=i+1
    
    new_df.fillna(100,inplace=True)   
    
    return new_df

def divide_years(acc_df,write_folder,year_times):
    
    new_df = acc_df
    new_df['year']='0'
    years = list(year_times.keys())

    for year in years:
        print(year_times[year][0])
        year_df = new_df[new_df['timestamp']>year_times[year][0]]
        year_df = year_df[year_df['timestamp']<year_times[year][1]]
        new_df['year'] = np.where(new_df['timestamp'].between(year_times[year][0],year_times[year][1]), year, new_df['year'])
        year_df = new_df[new_df['year']==year]
        year_df.to_csv(os.path.join(write_folder,year,'l_'+year+'_acc.csv'),index = False)      

### Load labels

In [None]:
omizu_labels_all_df = pd.read_csv(os.path.join(database_l_path,'O_labels_df.csv'))
omizu_l_dict = omizu_labels_all_df['label'].to_dict()

In [None]:
umineko_labels_all_df = pd.read_csv(os.path.join(database_l_path,'U_labels_df.csv'))
umineko_l_dict = umineko_labels_all_df['label'].to_dict()

## Omizunagidori

In [None]:
o_year_times = {'2018':[1514768460000,1546304460000],'2019':[1546304460000,1577840460000],'2020':[1577840460000,1609462860000],'2021':[1609462860000,1640998860000],'2022':[1640998860000,1672534860000]}

In [None]:
Omizu_all_t_df = pd.read_csv(os.path.join(database_o_acc_path,'omizu_all_t_df.csv'))
omizu_lab_all_df = num_labels(Omizu_all_t_df,omizu_labels_all_df)
divide_years(omizu_lab_all_df,database_o_acc_path,o_year_times)

## Umineko

In [None]:
u_year_times = {'2018':[1514768460000,1546304460000],'2019':[1546304460000,1577840460000],'2022':[1640998860000,1672534860000]}

In [None]:
Umineko_all_t_df = pd.read_csv(os.path.join(database_u_acc_path,'umineko_all_t_df.csv'))
umineko_lab_all_df = num_labels(Umineko_all_t_df,umineko_labels_all_df)
divide_years(umineko_lab_all_df,database_u_acc_path,u_year_times)

# Vizualize transformed data
## Load DataFrame

In [None]:
viz_path = r"G:\JaimeMorales\Codes\omizunagidori\database\omizunagidori\2022\l_2022_acc.csv"

In [None]:
viz_df = pd.read_csv(viz_path,parse_dates=["timestamp"])
viz_df

In [None]:
bird_df = viz_df[viz_df['animal_tag']=='LB01']
bird_df

In [None]:
bird_l_df = bird_df[bird_df['label']!='unknown']
bird_l_df

In [None]:
view_df = bird_l_df


In [None]:
def plot_two_data_segments(data, start_idx, lenght, label_name = "l_val"):
    
    fig = plt.figure(num=1, figsize=(50,30), dpi=300)
    axL = fig.add_axes([0, 0, 1, 1])
    plot_one_data_segment(axL, data, start_idx, lenght, label_name = label_name)
    fig.savefig(r"G:\JaimeMorales\Codes\omizunagidori\figures\timeseries.svg", format = 'svg', dpi=500, bbox_inches = 'tight')
    fig.tight_layout()
    fig.show()
    
def plot_one_data_segment(ax, data, start_idx, length, label_name = "label"):
    if start_idx + length > len(data) - 1:
        start_idx = len(data) - 1 - length
    if start_idx < 0:
        start_idx = 0
    ax.plot(data["timestamp"][start_idx:start_idx + length], data['acc_x'][start_idx:start_idx + length], '-' , color = 'red', linewidth=12)
    ax.plot(data["timestamp"][start_idx:start_idx + length], data['acc_y'][start_idx:start_idx + length], '-' , color = 'green', linewidth=12)
    ax.plot(data["timestamp"][start_idx:start_idx + length], data['acc_z'][start_idx:start_idx + length], '-' , color = 'blue', linewidth=12)
    ax.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    ax.tick_params(axis='y', which='both', bottom=False, top=False, labelbottom=False)
    #ax.set_xlabel("time [s]")

## Plot dataset

In [None]:
plot_two_data_segments(viz_df, 12000, 2000)