In [1]:
import time
import requests
from tqdm import tqdm
from sklearn.metrics import classification_report
from scipy import spatial
from sklearn.model_selection import ShuffleSplit
import numpy as np

from IPython.display import clear_output

In [2]:
api_url = "https://voice-api.dapt.tech"
api_url_stage = "https://staging-voice-api.dapt.tech"
ml_api = "http://52.86.72.117:5000/features"
ml_api_compare = "http://52.86.72.117:5000/compare"

In [3]:
user_ids = [18, 19, 20, 21, 22, 23, 36, 38, 39, 40, 42, 43, 44, 46, 47, 48, 49, 50, 52, 53, 56, 57, 58, 61, 62, 64, 66, 68, 69, 70, 71, 72, 78, 79, 80, 82, 83, 84, 85, 86]  # new one started from 36; 80(?); 45 wrong read # another new from 
user_ids_stage = [10, 23, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38]

In [9]:
# get admin tokens
username = 'admin'  # put username here
password = 'A7mQP4ukB4zdc4cL'  # put password here

params = {
    "username": username,
    'password': password
}

r = requests.post(f'{api_url}/api/v1/auth/login/', json=params)
token = r.json().get('access')

r = requests.post(f'{api_url_stage}/api/v1/auth/login/', json=params)
token_stage = r.json().get('access')

In [10]:
headers = {"Authorization": f"Bearer {token}"}
headers_stage = {"Authorization": f"Bearer {token_stage}"}

In [11]:
# get user info

users = []

def get_user_info(user_id, api_url, headers):
    user_info_url = f"{api_url}/api/v1/user/{user_id}"
    r = requests.get(user_info_url, headers=headers)
    res = r.json()
    audio_samples = res.get("audio_samples")
    imprint = list(cdict['file'] for cdict in audio_samples if cdict["sample_type"] == "1")[0]
    checkin = list(cdict['file'] for cdict in audio_samples if cdict["sample_type"] == "2")[0]
    checkout = list(cdict['file'] for cdict in audio_samples if cdict["sample_type"] == "3")[0]
    user_info = {
        'id': r.json().get('id'),
        'imprint': imprint,
        'checkin': checkin,
        'checkout': checkout
    }
    return user_info

# get from prod
for user_id in tqdm(user_ids): 
    user_info = get_user_info(user_id, api_url, headers)
    users.append(user_info)
    # print(f'got {user_id}')

100%|██████████| 40/40 [00:31<00:00,  1.28it/s]


In [12]:
# get from stage
for user_id in tqdm(user_ids_stage): 
    user_info = get_user_info(user_id, api_url_stage, headers_stage)
    users.append(user_info)
    # print(f'got {user_id}')

100%|██████████| 13/13 [00:09<00:00,  1.31it/s]


In [13]:
# get features

users_features = []

def get_features(api_url, file_url, mean_on=False):
    params = {
        "url": file_url,
        'mean': mean_on
    }
    r = requests.get(api_url, params=params)
    res = r.json()
    features = res['features']
    return features

for user in tqdm(users):
    try:
        user_id = user['id']
        # print(f'User {user_id}')
        imprint = get_features(ml_api, user['imprint'], mean_on=True)
        # print(' got imprint')
        checkin = get_features(ml_api, user['checkin'])
        # print(' got checkin')
        checkout = get_features(ml_api, user['checkout'])
        # print(' got checkout')
        user_features = {
            'id': user['id'],
            'imprint': imprint,
            'checkin': checkin,
            'checkout': checkout
        }
        users_features.append(user_features)
    except Exception as e:
        pass
        # print(' got exception!!!')
    # print('\n')

100%|██████████| 53/53 [10:53<00:00, 12.33s/it]


In [14]:
for user_features in users_features:
    user_features['imprint']['d_vector'] = user_features['imprint']['d_vector'][:256]
    user_features['checkin']['d_vector'] = user_features['checkin']['d_vector'][:256]
    user_features['checkout']['d_vector'] = user_features['checkout']['d_vector'][:256]

In [15]:
# make random splits of users

rs = ShuffleSplit(n_splits=10, test_size=.1)

In [16]:
# compare

verification_thresold = 0.69
target_names = ['verified', 'rejected']


def compare_d_vector_locally(features_1, features_2):
    return 1 - spatial.distance.cosine(features_1['d_vector'], features_2['d_vector'])


def general_test(users_features, rs, feature_for_compare_1, feature_for_compare_2):
    # iterate splits
    
    accuracies = list()
    
    for split_idx, (verif_index, reject_index) in enumerate(rs.split(users_features)):
        clear_output(wait=True)
        print(f'SPLIT {split_idx}')
        labels = list()
        preds = list()

        for idx, user in enumerate(users_features):
            if idx in verif_index:
                labels.append(0)
            else:
                labels.append(1)

            user_id = user['id']    
            similarities = []
            for idx2, user2 in enumerate(users_features):
                if idx2 in verif_index:
                    user_id_2 = user2['id']
                    similarity = compare_d_vector_locally(user[feature_for_compare_1], user2[feature_for_compare_2])
                    similarities.append(similarity)
                else:
                    similarities.append(0)

            # choose most similar
            max_similarity = max(similarities)
            most_similar_idx = similarities.index(max_similarity)

            if max_similarity > verification_thresold:
                if idx in verif_index:
                    # check
                    if most_similar_idx == idx:
                        preds.append(0)
                    else:
                        preds.append(1)
                else:
                    preds.append(0)
            else:
                preds.append(1)
        report = classification_report(labels, preds, target_names=target_names)
        print(report)
        report = classification_report(labels, preds, target_names=target_names, output_dict=True)
        accuracies.append(report.get('accuracy'))
        # time.sleep(2)

    mean_accuracy = np.mean(accuracies)
    print(f'\n\nMean accuracy: {mean_accuracy}')

In [17]:
general_test(users_features, rs, 'imprint', 'checkin')

SPLIT 9
              precision    recall  f1-score   support

    verified       0.93      0.84      0.88        44
    rejected       0.22      0.40      0.29         5

    accuracy                           0.80        49
   macro avg       0.57      0.62      0.58        49
weighted avg       0.85      0.80      0.82        49



Mean accuracy: 0.7959183673469388


In [18]:
general_test(users_features, rs, 'imprint', 'checkout')

SPLIT 9
              precision    recall  f1-score   support

    verified       0.93      0.89      0.91        44
    rejected       0.29      0.40      0.33         5

    accuracy                           0.84        49
   macro avg       0.61      0.64      0.62        49
weighted avg       0.86      0.84      0.85        49



Mean accuracy: 0.826530612244898


In [19]:
general_test(users_features, rs, 'checkin', 'checkout')

SPLIT 9
              precision    recall  f1-score   support

    verified       0.95      0.91      0.93        44
    rejected       0.43      0.60      0.50         5

    accuracy                           0.88        49
   macro avg       0.69      0.75      0.72        49
weighted avg       0.90      0.88      0.89        49



Mean accuracy: 0.883673469387755


In [20]:
general_test(users_features, rs, 'checkout', 'checkin')

SPLIT 9
              precision    recall  f1-score   support

    verified       0.98      0.95      0.97        44
    rejected       0.67      0.80      0.73         5

    accuracy                           0.94        49
   macro avg       0.82      0.88      0.85        49
weighted avg       0.95      0.94      0.94        49



Mean accuracy: 0.9102040816326531


In [21]:
general_test(users_features, rs, 'checkin', 'imprint')

SPLIT 9
              precision    recall  f1-score   support

    verified       0.93      0.91      0.92        44
    rejected       0.33      0.40      0.36         5

    accuracy                           0.86        49
   macro avg       0.63      0.65      0.64        49
weighted avg       0.87      0.86      0.86        49



Mean accuracy: 0.8734693877551021


In [22]:
general_test(users_features, rs, 'checkout', 'imprint')

SPLIT 9
              precision    recall  f1-score   support

    verified       0.95      0.95      0.95        44
    rejected       0.60      0.60      0.60         5

    accuracy                           0.92        49
   macro avg       0.78      0.78      0.78        49
weighted avg       0.92      0.92      0.92        49



Mean accuracy: 0.8775510204081634
