In [1]:
import numpy as np
import pandas as pa
import pickle
from datetime import datetime
import librosa.display
from matplotlib import pyplot as plt
from sklearn.metrics import recall_score, precision_score

from src.utils import get_framed_label, train_test_split, from_boolean_array_to_intervals
from src.data import load_annotation
from src.data import load_radar, load_water_distance, load_weight_sensor, load_audio
from src import make_dataset

import warnings
warnings.filterwarnings('ignore')

with open('../models/trained_models/defecation-rf-sources-extended-embedding-0202.pkl', 'rb') as f:
    defecate_model = pickle.load(f)

1. get use ids that have low precision but look good
2. write a function that get the x, y and the prediction of that use id

In [2]:
config = {
    'USE_IDS': [],
    'DATAFRAME_PATH': "C:/Users/Jiajun/Desktop/download-project/data/raw/data_frames",
    'ANNOTATION_PATH': "C:/Users/Jiajun/Desktop/download-project/data/processed/Annotation.csv",
    'FEATURE_NAMES': ['Max', 'Min', 'Mean', 'Median', 'LogVariance', 'LinearTrend'],
    'SOURCE_NAMES': ['TotalWeight', 'RadarSum', 'AudioDelay4'],
    'WINDOW_SECONDS': 2,
    'HOP_SECONDS': 1,
    'CATEGORY': '',
}

annotations = load_annotation.get_annotation(config['ANNOTATION_PATH'])

In [28]:
defecate_train_config = {
    'USE_IDS': [],
    'DATAFRAME_PATH': "C:/Users/Jiajun/Desktop/download-project/data/raw/data_frames",
    'ANNOTATION_PATH': "C:/Users/Jiajun/Desktop/download-project/data/processed/Annotation.csv",
    'FEATURE_NAMES': ['Max', 'Min', 'Mean', 'Median', 'LogVariance', 'LinearTrend'],
    'SOURCE_NAMES': ['TotalWeight', 'RadarSum', 'AudioDelay4'],
    'WINDOW_SECONDS': 2,
    'HOP_SECONDS': 1,
    'CATEGORY': "Defecation"    
}

eval_config = {
    'DEFECATE_TRAIN_CONFIG': defecate_train_config,
    'USE_IDS': [1870, 1875, 1882, 1890, 1944, 1947, 1955, 1994, 1995, 1999, 1839],
    'DEFECATE_MODEL': defecate_model,
    'THRESHOLD': 0.3
}

In [31]:
eval_defecate = EvalDefecate(eval_config)
res = eval_defecate.eval_all()

updating 1870
updating 1875
updating 1882
updating 1890
updating 1944
updating 1947
updating 1955
updating 1994
updating 1995
updating 1999
updating 1839


In [35]:
pa.DataFrame(res).T

Unnamed: 0,recall,precision,true_interval,pred_interval
1870,1.0,0.6,"[[37.0, 47.0], [49.0, 52.0]]","[[37.88, 48.88], [50.88, 52.88]]"
1875,0.57,0.8,"[[71.0, 79.0]]","[[74.92, 74.92], [77.92, 80.92]]"
1882,0.89,0.73,"[[11.0, 21.0], [31.0, 34.0]]","[[9.92, 16.92], [18.92, 18.92], [32.92, 33.92]]"
1890,1.0,0.4,"[[40.0, 44.0]]","[[39.92, 43.92]]"
1944,1.0,0.5,"[[15.0, 19.0]]","[[15.92, 18.92]]"
1947,1.0,1.0,"[[12.0, 21.0]]","[[12.92, 18.92]]"
1955,0.86,0.86,"[[25.0, 34.0]]","[[27.92, 33.92]]"
1994,1.0,0.44,"[[16.0, 21.0], [25.0, 28.0]]","[[15.92, 20.92], [25.92, 27.92]]"
1995,0.94,0.54,"[[22.0, 27.0], [75.0, 85.0], [87.0, 93.0]]","[[20.96, 32.96], [34.96, 36.96], [78.96, 81.96..."
1999,0.78,0.95,"[[16.0, 26.0], [27.0, 37.0], [55.0, 63.0]]","[[15.92, 24.92], [28.92, 28.92], [32.92, 35.92..."


In [30]:
class EvalDefecate:
    def __init__(self, eval_config):
        self.defecate_train_config = eval_config['DEFECATE_TRAIN_CONFIG']
        self.use_ids = eval_config['USE_IDS']
        self.defecate_model = eval_config['DEFECATE_MODEL']
        self.threshold = eval_config['THRESHOLD']
    
    def get_x_and_y(self, use_i):
        config = self.defecate_train_config.copy()
        config['USE_IDS'] = [use_i]
        dataset_i = make_dataset.RandomForestExtended(config)
        x_i, y_i = dataset_i.get_features_and_labels_from_users()
        self.t0 = dataset_i.framed_timestamps[0][0]
        return x_i, y_i
    
    def get_ypred(self, use_i):
        self.x_i, self.y_i = self.get_x_and_y(use_i)
        ypred_i = (self.defecate_model.predict_proba(self.x_i)[:, 1] > self.threshold).astype(int)
        return ypred_i
    
    def output(self, use_i):
        ypred_i = self.get_ypred(use_i)
        return {
            'true_boolean': self.y_i,
            'pred_boolean': ypred_i,
            'true_interval': [i[:2] for i in annotations[use_i] if i[-1] == "Defecation"],
            'pred_interval': from_boolean_array_to_intervals(ypred_i, t0=self.t0)
        }
    
    def eval_all(self):
        res = {}
        for use_i in self.use_ids:
            out = self.output(use_i)
            res[use_i] = {
                'recall': round( recall_score(out['true_boolean'], out['pred_boolean']), 2),
                'precision': round( precision_score(out['true_boolean'], out['pred_boolean']), 2),
                'true_interval': out['true_interval'],
                'pred_interval': out['pred_interval']
            }
        return res