In [1]:
import numpy as np
import pandas as pa
import pickle
import librosa.display

from src.utils import get_framed_label, train_test_split, from_boolean_array_to_intervals, get_annotated_intervals
from src.data import load_annotation
from src.data import load_radar, load_water_distance, load_weight_sensor, load_audio
from src import make_dataset

from matplotlib import pyplot as plt

In [2]:
config = {
    'USE_IDS': [],
    'DATAFRAME_PATH': "C:/Users/Jiajun/Desktop/download-project/data/raw/data_frames",
    'ANNOTATION_PATH': "C:/Users/Jiajun/Desktop/download-project/data/processed/Annotation.csv",
    'WINDOW_SECONDS': 3,
    'HOP_SECONDS': 1,
    'CATEGORY': 'Defecation',
}

complete_ids = load_annotation.get_complete_ids(
    annotation_filename = config['ANNOTATION_PATH'],
    category = config['CATEGORY']
)

annotations = load_annotation.get_annotation(config['ANNOTATION_PATH'])

In [3]:
selected_ids = complete_ids[(complete_ids < 1900) & (complete_ids > 1000)]

TRAIN_IDS, TEST_IDS = train_test_split(selected_ids)

#TRAIN_IDS = [987, 960, 954, 964, 968, 979, 976, 993, 953, 982, 984, 995, 985, 958]
#TEST_IDS = [989, 970, 971, 986, 978, 992]

print(f"Category: {config['CATEGORY']}")
print(f"Training {len(TRAIN_IDS)} use_ids: {TRAIN_IDS[:5]}...")
print(f"Testing  {len(TEST_IDS)} use_ids: {TEST_IDS[:5]}...")

Category: Defecation
Training 12 use_ids: [1831, 1863, 1862, 1830, 1881]...
Testing  3 use_ids: [1854, 1882, 1890]...


In [4]:
train_config = config.copy()
test_config = config.copy()

train_config['USE_IDS'] = TRAIN_IDS
test_config['USE_IDS'] = TEST_IDS

dataset = {}
dataset['train'] = make_dataset.Seq2SeqDatasetDefecate(train_config)
dataset['test'] = make_dataset.Seq2SeqDatasetDefecate(test_config)

In [5]:
train_x, train_y = [], []
for i in range(len(dataset['train'])):
    x, y = dataset['train'][i]
    train_x.append(x.numpy())
    train_y.append(y.numpy())
train_x = np.concatenate(train_x)
train_y = np.concatenate(train_y)

In [6]:
test_x, test_y = [], []
for i in range(len(dataset['test'])):
    x, y = dataset['test'][i]
    test_x.append(x.numpy())
    test_y.append(y.numpy())

test_x = np.concatenate(test_x)
test_y = np.concatenate(test_y)

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [8]:
rf = RandomForestClassifier(n_estimators=5)
rf.fit(train_x, train_y)

RandomForestClassifier(n_estimators=5)

In [9]:
print (classification_report(
    y_true=test_y,
    y_pred=np.array(rf.predict_proba(test_x)[:, 1] > 0.3, dtype=int))
)

              precision    recall  f1-score   support

           0       1.00      0.98      0.99       552
           1       0.57      0.92      0.71        13

    accuracy                           0.98       565
   macro avg       0.78      0.95      0.85       565
weighted avg       0.99      0.98      0.98       565



In [10]:
print (classification_report(
    y_true=test_y,
    y_pred=np.array(rf.predict_proba(test_x)[:, 1] > 0.4, dtype=int))
)

              precision    recall  f1-score   support

           0       1.00      0.99      0.99       552
           1       0.65      0.85      0.73        13

    accuracy                           0.99       565
   macro avg       0.82      0.92      0.86       565
weighted avg       0.99      0.99      0.99       565



# Model

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from src.seq2seq.train import train_test_split, train, test
from src.seq2seq.model import LSTMClassifier

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [14]:
# model
NUM_EPOCHS = 5

model = LSTMClassifier(input_dim = 60, hidden_dim = 64, output_dim = 2, num_layers = 2)
optimizer = optim.SGD(model.parameters(), lr = 0.3)
criterion = nn.CrossEntropyLoss()

In [15]:
# training
for epoch in range(NUM_EPOCHS):
    print ("Epoch : {}".format(epoch))
    train(
        dataset = dataset['train'],
        batch_size = 5,
        model = model,
        optimizer = optimizer,
        criterion = criterion
    )
    
    test(
        dataset = dataset['test'],
        model = model,
        criterion = criterion
    )

Epoch : 0
Batch : 1 / Loss : 0.7707870006561279
Batch : 2 / Loss : 0.20072075724601746
Batch : 3 / Loss : 0.16629882156848907
Test loss: 0.13599988569815954
Epoch : 1
Batch : 1 / Loss : 0.1702488213777542
Batch : 2 / Loss : 0.16729328036308289
Batch : 3 / Loss : 0.14816823601722717
Test loss: 0.11231497426827748
Epoch : 2
Batch : 1 / Loss : 0.15201470255851746
Batch : 2 / Loss : 0.1638912409543991
Batch : 3 / Loss : 0.1344224065542221
Test loss: 0.10826782261331876
Epoch : 3
Batch : 1 / Loss : 0.14575885236263275
Batch : 2 / Loss : 0.16236014664173126
Batch : 3 / Loss : 0.12545129656791687
Test loss: 0.09899155174692471
Epoch : 4
Batch : 1 / Loss : 0.1388704478740692
Batch : 2 / Loss : 0.1517849862575531
Batch : 3 / Loss : 0.1161779910326004
Test loss: 0.09926529352863629


In [16]:
# eval
THRESHOLD = 0.3
predictions = np.array([])
labels = np.array([])

for use_i in TEST_IDS:
    eval_config = config.copy()
    eval_config['USE_IDS'] = [use_i]
    feature, label = make_dataset.Seq2SeqDatasetDefecate(eval_config)[0]
    shape = feature.shape
    ypred = model(feature.view(shape[0], 1, shape[1])).squeeze(dim=0)
    m = nn.Softmax(dim=1)
    prediction = m(ypred)[:, 1].long()
    
    predictions = np.concatenate([predictions, prediction.numpy()])
    labels = np.concatenate([labels, label.numpy()])

In [None]:
from sklearn.metrics import classification_report

In [None]:
print (classification_report(
    y_true=labels,
    y_pred=predictions))

In [17]:
predictions

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.