In [11]:
import json
import numpy as np
import pandas as pd
from collections import defaultdict

## Check Official Train/Test Length

In [12]:
official_split_txt = '/workspace/pj_resp/ssast/src/finetune/icbhi/data/official_split.txt'
lines = open(official_split_txt).read().splitlines()
train_set = []
eval_set = []

for line in lines:
    basename, fold = line.strip().split('\t')
    if fold == 'train':
        train_set.append(basename)
    else:
        eval_set.append(basename)

In [13]:
len_train_set = len(train_set)
len_test_set = len(eval_set)
len_all = len_train_set + len_test_set

print('Train Set Length:', len_train_set, f'({round(len_train_set/len_all*100,1)})')
print('Test Set Length:', len_test_set, f'({round(len_test_set/len_all*100,1)})')
print('Total Length:', len_train_set+len_test_set)

Train Set Length: 539 (58.6)
Test Set Length: 381 (41.4)
Total Length: 920


## Check Official Train/Test Segment Length

In [14]:
train_json_path = '/workspace/pj_resp/ssast/src/finetune/icbhi/data/icbhi_train.json'
test_json_path = '/workspace/pj_resp/ssast/src/finetune/icbhi/data/icbhi_eval.json'

with open(train_json_path, 'r') as f:
    train_json = json.load(f)
    
with open(test_json_path, 'r') as f:
    test_json = json.load(f)

In [19]:
len_train_seg = len(train_json['data'])
len_test_seg = len(test_json['data'])
len_all = len_train_seg + len_test_seg

print('Train Segment Length:', len_train_seg, f'({round(len_train_seg/len_all*100,1)})')
print('Test Segment Length:', len_test_seg, f'({round(len_test_seg/len_all*100,1)})')
print('Total Length:', len_train_seg+len_test_seg)

Train Segment Length: 4142 (60.0)
Test Segment Length: 2756 (40.0)
Total Length: 6898


## Check Label Ratio

In [16]:
train_lb_dict = defaultdict(int)
test_lb_dict = defaultdict(int)

for i in range(len_train_seg):
    label = train_json['data'][i]['labels']
    train_lb_dict[label] += 1
    
for i in range(len_test_seg):
    label = test_json['data'][i]['labels']
    test_lb_dict[label] += 1

In [17]:
print('Train Segment "Normal" count:', train_lb_dict['normal'], f"({round(train_lb_dict['normal']/len_train_seg*100,1)})")
print('Train Segment "Crackle&Wheezing" count:', train_lb_dict['crackle&wheezing'], f"({round(train_lb_dict['crackle&wheezing']/len_train_seg*100,1)})")
print('Train Segment "Crackle" count:', train_lb_dict['crackle'], f"({round(train_lb_dict['crackle']/len_train_seg*100,1)})")
print('Train Segment "Wheezing" count:', train_lb_dict['wheezing'], f"({round(train_lb_dict['wheezing']/len_train_seg*100,1)})")

Train Segment "Normal" count: 2063 (49.8)
Train Segment "Crackle&Wheezing" count: 363 (8.8)
Train Segment "Crackle" count: 1215 (29.3)
Train Segment "Wheezing" count: 501 (12.1)


In [18]:
print('Test Segment "Normal" count:', test_lb_dict['normal'], f"({round(test_lb_dict['normal']/len_test_seg*100,1)})")
print('Test Segment "Crackle&Wheezing" count:', test_lb_dict['crackle&wheezing'], f"({round(test_lb_dict['crackle&wheezing']/len_test_seg*100,1)})")
print('Test Segment "Crackle" count:', test_lb_dict['crackle'], f"({round(test_lb_dict['crackle']/len_test_seg*100,1)})")
print('Test Segment "Wheezing" count:', test_lb_dict['wheezing'], f"({round(test_lb_dict['wheezing']/len_test_seg*100,1)})")

Test Segment "Normal" count: 1579 (57.3)
Test Segment "Crackle&Wheezing" count: 143 (5.2)
Test Segment "Crackle" count: 649 (23.5)
Test Segment "Wheezing" count: 385 (14.0)
