In [1]:
import os
import json
import csv

In [45]:
# load configs
with open(os.path.join(os.path.abspath(''), "Configs", "configs.json"), "r") as json_file:
    configs = json.load(json_file)

In [10]:
# load results
results = []
with open(os.path.join(os.path.abspath(''), "results.csv"), newline='') as csv_file:
    reader = csv.DictReader(csv_file, delimiter=';')
    for row in reader:
        results.append(row)

len(results)

800

In [None]:
# clear empty feature results
def clear_empty_feature_results(results):
    cleared_results = []
    for result in results:
        if result['lpc_weight'] == '0' and result['lpcc_weight'] == '0' and result['mfcc_weight'] == '0' and result['delta_mfcc_weight'] == '0':
            continue
        cleared_results.append(result)
    return cleared_results
results = clear_empty_feature_results(results)
len(results)

In [None]:
# clear empty configs
def clear_empty_configs(configs):
    cleared_configs = []
    for config in configs:
        if config['lpc_weight'] == 0 and config['lpcc_weight'] == 0 and config['mfcc_weight'] == 0 and config['delta_mfcc_weight'] == 0:
            continue
        cleared_configs.append(config)
    return cleared_configs
configs = clear_empty_configs(configs)
len(configs)

In [None]:
# get config for result
def get_config_for_result(result):
    # amount_of_frames;size_of_frame;lpc_weight;lpc_order;mfcc_weight;mfcc_order;lpcc_weight;lpcc_order;delta_mfcc_weight;delta_mfcc_order
    for config in configs:
        if (
            config['amount_of_frames'] == int(result['amount_of_frames']) and
            config['size_of_frame'] == int(result['size_of_frame']) and
            config['lpc_weight'] == int(result['lpc_weight']) and
            config['lpc_order'] == int(result['lpc_order']) and
            config['mfcc_weight'] == int(result['mfcc_weight']) and
            config['mfcc_order'] == int(result['mfcc_order']) and
            config['lpcc_weight'] == int(result['lpcc_weight']) and
            config['lpcc_order'] == int(result['lpcc_order']) and
            config['delta_mfcc_weight'] == int(result['delta_mfcc_weight']) and
            config['delta_mfcc_order'] == int(result['delta_mfcc_order'])
        ):
            return config

In [None]:
# get results for config
def get_results_for_config(config):
    matched_results = []
    for result in results:
        if (
            config['amount_of_frames'] == int(result['amount_of_frames']) and
            config['size_of_frame'] == int(result['size_of_frame']) and
            config['lpc_weight'] == int(result['lpc_weight']) and
            config['lpc_order'] == int(result['lpc_order']) and
            config['mfcc_weight'] == int(result['mfcc_weight']) and
            config['mfcc_order'] == int(result['mfcc_order']) and
            config['lpcc_weight'] == int(result['lpcc_weight']) and
            config['lpcc_order'] == int(result['lpcc_order']) and
            config['delta_mfcc_weight'] == int(result['delta_mfcc_weight']) and
            config['delta_mfcc_order'] == int(result['delta_mfcc_order'])
        ):
            matched_results.append(result)
    return matched_results

In [None]:
for config in configs:
    all_results_for_config = get_results_for_config(config)
    # only the first 300 results are relevant
    config['results'] = all_results_for_config[:300]

In [None]:
# check if there are always 300 results for each config
false_configs = []
for config in configs:
    if len(config['results']) != 300:
        print(f"Config {config['id']} has {len(config['results'])} results")
        false_configs.append(config)

len(false_configs)

In [None]:
# new results (with only the first 300 results for each config)
new_results = []
for config in configs:
    for result in config['results']:
        new_results.append(result)

results = new_results
len(results)

In [3]:
# get absolute accuracy of result
def get_absolute_accuracy(result) -> float:
    correct_speaker = result['correct_speaker_id']
    accuracy = result[f'speaker_{correct_speaker}']
    return float(accuracy)

In [None]:
test_accuracy = get_absolute_accuracy(results[0])
print(test_accuracy)

In [None]:
# # check which amount_of_frames is better (10000 or 15000)
# results_with_10000_frames = []
# results_with_15000_frames = []
# for result in results:
#     if result['amount_of_frames'] == '10000':
#         results_with_10000_frames.append(result)
#     elif result['amount_of_frames'] == '15000':
#         results_with_15000_frames.append(result)
#     else:
#         print('Error: amount_of_frames is not 10000 or 15000')

# # get average accuracy for each amount_of_frames
# avg_accuracy_10000_frames = 0
# avg_accuracy_15000_frames = 0
# for result in results_with_10000_frames:
#     avg_accuracy_10000_frames += get_absolute_accuracy(result)
# for result in results_with_15000_frames:
#     avg_accuracy_15000_frames += get_absolute_accuracy(result)
# avg_accuracy_10000_frames /= len(results_with_10000_frames)
# avg_accuracy_15000_frames /= len(results_with_15000_frames)

# avg_accuracy_10000_frames, avg_accuracy_15000_frames


In [None]:
# results = results_with_15000_frames

In [None]:
# # check which size_of_frame is better (400 or 600)
# results_with_400_frames = []
# results_with_600_frames = []
# for result in results:
#     if result['size_of_frame'] == '400':
#         results_with_400_frames.append(result)
#     elif result['size_of_frame'] == '600':
#         results_with_600_frames.append(result)
#     else:
#         print('Error: size_of_frame is not 400 or 600')

# # get average accuracy for each size_of_frame
# avg_accuracy_400_frames = 0
# avg_accuracy_600_frames = 0
# for result in results_with_400_frames:
#     avg_accuracy_400_frames += get_absolute_accuracy(result)
# for result in results_with_600_frames:
#     avg_accuracy_600_frames += get_absolute_accuracy(result)
# avg_accuracy_400_frames /= len(results_with_400_frames)
# avg_accuracy_600_frames /= len(results_with_600_frames)

# avg_accuracy_400_frames, avg_accuracy_600_frames

In [None]:
# results = results_with_600_frames

In [None]:
# get best 10 configs (avg accuracy)
best_configs = []
for config in configs:
    avg_accuracy = 0
    for result in config['results']:
        avg_accuracy += get_absolute_accuracy(result)
    avg_accuracy /= len(config['results'])
    
    # check if config is better than worst config in best_configs
    if len(best_configs) < 10:
        best_configs.append({
            'id': config['id'],
            'avg_accuracy': avg_accuracy
        })
    else:
        worst_best_config = best_configs[0]
        for best_config in best_configs:
            if best_config['avg_accuracy'] < worst_best_config['avg_accuracy']:
                worst_best_config = best_config
        if avg_accuracy > worst_best_config['avg_accuracy']:
            best_configs.remove(worst_best_config)
            best_configs.append({
                'id': config['id'],
                'avg_accuracy': avg_accuracy
            })
            
    # sort best_configs by avg_accuracy
    best_configs = sorted(best_configs, key=lambda k: k['avg_accuracy'], reverse=True)


# print best configs
for best_config in best_configs:
    print(f"Config {best_config['id']} has avg accuracy of {best_config['avg_accuracy']}")

In [14]:
# load results
results = []
with open(os.path.join(os.path.abspath(''), "results.csv"), newline='') as csv_file:
    reader = csv.DictReader(csv_file, delimiter=';')
    for row in reader:
        results.append(row)

len(results)

# get best 10 neural networks (avg accuracy)
neural_networks = []
for result in results:

    current_id = result['neural_network_id']

    # check if neural network is already in neural_networks
    already_in_neural_networks = False
    for neural_network in neural_networks:
        if neural_network['id'] == current_id:
            already_in_neural_networks = True
            break

    # if neural network is not already in neural_networks, add it
    if not already_in_neural_networks:
        avg_accuracy = 0
        counter = 0
        for result2 in results:
            if result2['neural_network_id'] == current_id:
                avg_accuracy += get_absolute_accuracy(result2)
                counter += 1
        avg_accuracy /= counter
        neural_networks.append({
            'id': result['neural_network_id'],
            'avg_accuracy': avg_accuracy
        })

    # sort neural_networks by avg_accuracy
    neural_networks = sorted(neural_networks, key=lambda k: k['avg_accuracy'], reverse=True)

# print best neural networks
for neural_network in neural_networks[:100]:
    print(f"Neural network {neural_network['id']} has avg accuracy of {neural_network['avg_accuracy']}")

Neural network ea8c5f43-b64b-4533-8f98-5f904d5f5cc3 has avg accuracy of 0.8137045796820161
Neural network f037a739-053e-43f2-9cbf-dcac31f5e61e has avg accuracy of 0.79853250949079
Neural network 087501f9-7e39-4679-8811-f66b056a7b17 has avg accuracy of 0.7958374340391782
Neural network af1c9af9-c0f4-4016-9f50-5e38cfd21d3e has avg accuracy of 0.7953441940098319
Neural network 28abdb1c-baee-430c-98cb-9b6580fa3901 has avg accuracy of 0.7918943995092471
Neural network 9c5faf80-d03d-4ebf-8d33-427fae1323d7 has avg accuracy of 0.7881798688641435
Neural network b180672a-1f8b-410c-a29d-b3805b39d739 has avg accuracy of 0.7849120016774915
Neural network 9e5a1937-4600-4185-ad1d-052ab9d75b99 has avg accuracy of 0.7845734005928071
Neural network 58d7f36b-446d-4eaa-b43f-1e750ccb8b09 has avg accuracy of 0.7807185085773548
Neural network 6bdb36dd-82a1-40de-988f-8dc82a953e84 has avg accuracy of 0.7630860904857508
Neural network b6ec8c07-90fb-46b4-b015-2e408b0a5a57 has avg accuracy of 0.05
