# Segment diarization

This script splits the wavs of each video to segments based on the `facerec_segment` results.

In [1]:
import os
import numpy as np
import wavefile
import pickle
from SphereDiar.embed import VLAD, amsoftmax_loss
from SphereDiar.meeting_corpus_util import *
from SphereDiar.SphereDiar import *
from librosa.util import frame
from librosa.feature import mfcc
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import scale
from keras.models import load_model
import warnings
#warnings.filterwarnings("ignore", message="Numerical issues were encountered ")

Using TensorFlow backend.


Replaced StandardScaler with MinMaxScales because the StandardScaler produces hundreds of warnings about numerical issues. With MinMax there are no numeric issues.

In [2]:
def fe(s, fs = 16000):
    mfcc_feat = mfcc(s, n_mfcc = 30, sr = fs, n_fft=512, hop_length=160)  
    return MinMaxScaler().fit_transform(mfcc_feat)
    #return scale(mfcc_feat, axis=1)

In [3]:
model_path = "./SphereDiar/models/current_best.h5"
emb_model = load_model(model_path, custom_objects={'VLAD': VLAD, 'amsoftmax_loss': amsoftmax_loss})
SD = SphereDiar(emb_model)
video_mapping = {"175": "5531550228324592939.mp4",
                 "176": "5534228999422914578.mp4",
                 "177": "5539381671692122744.mp4",
                 "178": "5542003749222140011.mp4",
                 "179": "5544574287152993687.mp4",
                 "180": "5544620672795594434.mp4",
                 "181": "5547193787702629969.mp4",
                 "182": "5549784941472309008.mp4",
                 "183": "5552368364300855101.mp4",
                 "184": "5555325449284154780.mp4",
                 "185": "5555360238519252381.mp4"}





Instructions for updating:
dim is deprecated, use axis instead










In [4]:
seg_dirs = [d.name for d in os.scandir("./data") if d.is_dir() and d.name.startswith("segments")]
seg_dirs.sort()
seg_dirs

['segments_add1_keep1_cut0.5',
 'segments_add1_keep1_cut0.7',
 'segments_add1_keep2_cut0.5',
 'segments_add1_keep2_cut0.7',
 'segments_add1_keepall_cut0.5',
 'segments_add1_keepall_cut0.7',
 'segments_add2_keep1_cut0.5',
 'segments_add2_keep1_cut0.7',
 'segments_add2_keep2_cut0.5',
 'segments_add2_keep2_cut0.7',
 'segments_add2_keepall_cut0.5',
 'segments_add2_keepall_cut0.7']

In [5]:
failed = []
for d in seg_dirs:
    print(f"Dealing with dir {d} next.")
    results = {"5531550228324592939.mp4": [],
               "5534228999422914578.mp4": [],
               "5539381671692122744.mp4": [],
               "5542003749222140011.mp4": [],
               "5544574287152993687.mp4": [],
               "5544620672795594434.mp4": [],
               "5547193787702629969.mp4": [],
               "5549784941472309008.mp4": [],
               "5552368364300855101.mp4": [],
               "5555325449284154780.mp4": [],
               "5555360238519252381.mp4": []}
    datapath = f"./data/{d}"
    wavs = [f.name for f in os.scandir(datapath) if f.name.endswith(".wav")]
    wavs.sort()
    for wavfile in wavs:
        print(f"Diarizing file {wavfile} now.")
        (rate, sig) = wavefile.load(f"{datapath}/{wavfile}")
        signal = sig[0]
        S = np.transpose(frame(signal, int(2000*16), int(500*16)))
        X = list(map(lambda s: fe(s, 16000), S))
        X = np.array(np.swapaxes(X, 1, 2))
        X = X.astype(np.float16)
        num_timesteps = X.shape[1]

        if num_timesteps != 201:
            emb_model.layers.pop(0)
            new_input = Input(batch_shape=(None, num_timesteps, 30))
            new_output = emb_model(new_input)
            emb_model = Model(new_input, new_output)

        embs = emb_model.predict(X)
        try:
            SD.cluster(rounds=10, clust_range=[2, 8], num_cores = -2, embeddings=embs)
            spk_labels = SD.speaker_labels_
        except ValueError:
            spk_labels = []
            failed.append(f"{datapath}/{wavfile}")
        short_name = wavfile[:3]
        long_name = video_mapping[short_name]
        results[long_name].append((wavfile, spk_labels))
    print("Saving results to a pickle.")
    pickle.dump(results, open(f"{datapath}/segmented_diarization.pickle", 'wb'))

Dealing with dir segments_add1_keep1_cut0.5 next.
Diarizing file 175_001.wav now.
Diarizing file 175_002.wav now.
Diarizing file 175_003.wav now.
Diarizing file 175_004.wav now.
Diarizing file 175_005.wav now.
Diarizing file 175_006.wav now.
Diarizing file 175_007.wav now.
Diarizing file 175_008.wav now.
Diarizing file 175_009.wav now.
Diarizing file 175_010.wav now.
Diarizing file 175_011.wav now.
Diarizing file 175_012.wav now.
Diarizing file 175_013.wav now.
Diarizing file 175_014.wav now.
Diarizing file 175_015.wav now.
Diarizing file 175_016.wav now.
Diarizing file 175_017.wav now.
Diarizing file 175_018.wav now.
Diarizing file 175_019.wav now.
Diarizing file 175_020.wav now.
Diarizing file 175_021.wav now.
Diarizing file 175_022.wav now.
Diarizing file 175_023.wav now.
Diarizing file 175_024.wav now.
Diarizing file 175_025.wav now.
Diarizing file 175_026.wav now.
Diarizing file 175_027.wav now.
Diarizing file 175_028.wav now.
Diarizing file 175_029.wav now.
Diarizing file 175_030

Diarizing file 177_066.wav now.
Diarizing file 177_067.wav now.
Diarizing file 177_068.wav now.
Diarizing file 177_069.wav now.
Diarizing file 177_070.wav now.
Diarizing file 177_071.wav now.
Diarizing file 177_072.wav now.
Diarizing file 177_073.wav now.
Diarizing file 177_074.wav now.
Diarizing file 177_075.wav now.
Diarizing file 177_076.wav now.
Diarizing file 177_077.wav now.
Diarizing file 177_078.wav now.
Diarizing file 177_079.wav now.
Diarizing file 177_080.wav now.
Diarizing file 177_081.wav now.
Diarizing file 177_082.wav now.
Diarizing file 177_083.wav now.
Diarizing file 177_084.wav now.
Diarizing file 177_085.wav now.
Diarizing file 178_001.wav now.
Diarizing file 178_002.wav now.
Diarizing file 178_003.wav now.
Diarizing file 178_004.wav now.
Diarizing file 178_005.wav now.
Diarizing file 178_006.wav now.
Diarizing file 178_007.wav now.
Diarizing file 178_008.wav now.
Diarizing file 178_009.wav now.
Diarizing file 178_010.wav now.
Diarizing file 178_011.wav now.
Diarizin

Diarizing file 182_009.wav now.
Diarizing file 182_010.wav now.
Diarizing file 182_011.wav now.
Diarizing file 182_012.wav now.
Diarizing file 182_013.wav now.
Diarizing file 182_014.wav now.
Diarizing file 182_015.wav now.
Diarizing file 182_016.wav now.
Diarizing file 182_017.wav now.
Diarizing file 182_018.wav now.
Diarizing file 182_019.wav now.
Diarizing file 182_020.wav now.
Diarizing file 182_021.wav now.
Diarizing file 182_022.wav now.
Diarizing file 182_023.wav now.
Diarizing file 182_024.wav now.
Diarizing file 182_025.wav now.
Diarizing file 182_026.wav now.
Diarizing file 182_027.wav now.
Diarizing file 182_028.wav now.
Diarizing file 182_029.wav now.
Diarizing file 182_030.wav now.
Diarizing file 182_031.wav now.
Diarizing file 182_032.wav now.
Diarizing file 182_033.wav now.
Diarizing file 182_034.wav now.
Diarizing file 182_035.wav now.
Diarizing file 182_036.wav now.
Diarizing file 182_037.wav now.
Diarizing file 182_038.wav now.
Diarizing file 182_039.wav now.
Diarizin

Diarizing file 185_044.wav now.
Diarizing file 185_045.wav now.
Diarizing file 185_046.wav now.
Diarizing file 185_047.wav now.
Diarizing file 185_048.wav now.
Diarizing file 185_049.wav now.
Diarizing file 185_050.wav now.
Diarizing file 185_051.wav now.
Diarizing file 185_052.wav now.
Diarizing file 185_053.wav now.
Diarizing file 185_054.wav now.
Diarizing file 185_055.wav now.
Diarizing file 185_056.wav now.
Diarizing file 185_057.wav now.
Diarizing file 185_058.wav now.
Diarizing file 185_059.wav now.
Diarizing file 185_060.wav now.
Diarizing file 185_061.wav now.
Diarizing file 185_062.wav now.
Diarizing file 185_063.wav now.
Diarizing file 185_064.wav now.
Diarizing file 185_065.wav now.
Diarizing file 185_066.wav now.
Diarizing file 185_067.wav now.
Saving results to a pickle.
Dealing with dir segments_add1_keep1_cut0.7 next.
Diarizing file 175_001.wav now.
Diarizing file 175_002.wav now.
Diarizing file 175_003.wav now.
Diarizing file 175_004.wav now.
Diarizing file 175_005.wav

Diarizing file 181_007.wav now.
Diarizing file 181_008.wav now.
Diarizing file 181_009.wav now.
Diarizing file 181_010.wav now.
Diarizing file 181_011.wav now.
Diarizing file 181_012.wav now.
Diarizing file 181_013.wav now.
Diarizing file 181_014.wav now.
Diarizing file 181_015.wav now.
Diarizing file 181_016.wav now.
Diarizing file 181_017.wav now.
Diarizing file 181_018.wav now.
Diarizing file 181_019.wav now.
Diarizing file 181_020.wav now.
Diarizing file 181_021.wav now.
Diarizing file 181_022.wav now.
Diarizing file 181_023.wav now.
Diarizing file 181_024.wav now.
Diarizing file 181_025.wav now.
Diarizing file 181_026.wav now.
Diarizing file 181_027.wav now.
Diarizing file 181_028.wav now.
Diarizing file 181_029.wav now.
Diarizing file 181_030.wav now.
Diarizing file 181_031.wav now.
Diarizing file 181_032.wav now.
Diarizing file 181_033.wav now.
Diarizing file 181_034.wav now.
Diarizing file 181_035.wav now.
Diarizing file 181_036.wav now.
Diarizing file 181_037.wav now.
Diarizin

Saving results to a pickle.
Dealing with dir segments_add1_keep2_cut0.5 next.
Diarizing file 175_001.wav now.
Diarizing file 175_002.wav now.
Diarizing file 175_003.wav now.
Diarizing file 175_004.wav now.
Diarizing file 175_005.wav now.
Diarizing file 175_006.wav now.
Diarizing file 175_007.wav now.
Diarizing file 175_008.wav now.
Diarizing file 175_009.wav now.
Diarizing file 175_010.wav now.
Diarizing file 175_011.wav now.
Diarizing file 175_012.wav now.
Diarizing file 175_013.wav now.
Diarizing file 175_014.wav now.
Diarizing file 175_015.wav now.
Diarizing file 175_016.wav now.
Diarizing file 175_017.wav now.
Diarizing file 175_018.wav now.
Diarizing file 175_019.wav now.
Diarizing file 175_020.wav now.
Diarizing file 175_021.wav now.
Diarizing file 175_022.wav now.
Diarizing file 175_023.wav now.
Diarizing file 175_024.wav now.
Diarizing file 175_025.wav now.
Diarizing file 175_026.wav now.
Diarizing file 175_027.wav now.
Diarizing file 175_028.wav now.
Diarizing file 175_029.wav

Diarizing file 178_028.wav now.
Diarizing file 178_029.wav now.
Diarizing file 178_030.wav now.
Diarizing file 178_031.wav now.
Diarizing file 178_032.wav now.
Diarizing file 178_033.wav now.
Diarizing file 178_034.wav now.
Diarizing file 178_035.wav now.
Diarizing file 178_036.wav now.
Diarizing file 178_037.wav now.
Diarizing file 178_038.wav now.
Diarizing file 178_039.wav now.
Diarizing file 178_040.wav now.
Diarizing file 178_041.wav now.
Diarizing file 178_042.wav now.
Diarizing file 178_043.wav now.
Diarizing file 178_044.wav now.
Diarizing file 178_045.wav now.
Diarizing file 178_046.wav now.
Diarizing file 178_047.wav now.
Diarizing file 178_048.wav now.
Diarizing file 178_049.wav now.
Diarizing file 178_050.wav now.
Diarizing file 178_051.wav now.
Diarizing file 179_001.wav now.
Diarizing file 179_002.wav now.
Diarizing file 179_003.wav now.
Diarizing file 179_004.wav now.
Diarizing file 179_005.wav now.
Diarizing file 179_006.wav now.
Diarizing file 179_007.wav now.
Diarizin

Diarizing file 183_032.wav now.
Diarizing file 183_033.wav now.
Diarizing file 183_034.wav now.
Diarizing file 183_035.wav now.
Diarizing file 183_036.wav now.
Diarizing file 183_037.wav now.
Diarizing file 183_038.wav now.
Diarizing file 183_039.wav now.
Diarizing file 183_040.wav now.
Diarizing file 183_041.wav now.
Diarizing file 183_042.wav now.
Diarizing file 183_043.wav now.
Diarizing file 183_044.wav now.
Diarizing file 183_045.wav now.
Diarizing file 183_046.wav now.
Diarizing file 183_047.wav now.
Diarizing file 183_048.wav now.
Diarizing file 183_049.wav now.
Diarizing file 183_050.wav now.
Diarizing file 183_051.wav now.
Diarizing file 183_052.wav now.
Diarizing file 183_053.wav now.
Diarizing file 183_054.wav now.
Diarizing file 183_055.wav now.
Diarizing file 183_056.wav now.
Diarizing file 183_057.wav now.
Diarizing file 183_058.wav now.
Diarizing file 183_059.wav now.
Diarizing file 183_060.wav now.
Diarizing file 183_061.wav now.
Diarizing file 183_062.wav now.
Diarizin

Diarizing file 177_015.wav now.
Diarizing file 177_016.wav now.
Diarizing file 177_017.wav now.
Diarizing file 177_018.wav now.
Diarizing file 177_019.wav now.
Diarizing file 178_001.wav now.
Diarizing file 178_002.wav now.
Diarizing file 178_003.wav now.
Diarizing file 178_004.wav now.
Diarizing file 178_005.wav now.
Diarizing file 178_006.wav now.
Diarizing file 178_007.wav now.
Diarizing file 178_008.wav now.
Diarizing file 178_009.wav now.
Diarizing file 178_010.wav now.
Diarizing file 178_011.wav now.
Diarizing file 178_012.wav now.
Diarizing file 178_013.wav now.
Diarizing file 178_014.wav now.
Diarizing file 178_015.wav now.
Diarizing file 178_016.wav now.
Diarizing file 178_017.wav now.
Diarizing file 179_001.wav now.
Diarizing file 179_002.wav now.
Diarizing file 179_003.wav now.
Diarizing file 179_004.wav now.
Diarizing file 179_005.wav now.
Diarizing file 179_006.wav now.
Diarizing file 179_007.wav now.
Diarizing file 179_008.wav now.
Diarizing file 179_009.wav now.
Diarizin

Diarizing file 175_031.wav now.
Diarizing file 175_032.wav now.
Diarizing file 175_033.wav now.
Diarizing file 175_034.wav now.
Diarizing file 175_035.wav now.
Diarizing file 175_036.wav now.
Diarizing file 175_037.wav now.
Diarizing file 175_038.wav now.
Diarizing file 175_039.wav now.
Diarizing file 175_040.wav now.
Diarizing file 175_041.wav now.
Diarizing file 175_042.wav now.
Diarizing file 175_043.wav now.
Diarizing file 175_044.wav now.
Diarizing file 175_045.wav now.
Diarizing file 175_046.wav now.
Diarizing file 175_047.wav now.
Diarizing file 175_048.wav now.
Diarizing file 175_049.wav now.
Diarizing file 175_050.wav now.
Diarizing file 175_051.wav now.
Diarizing file 175_052.wav now.
Diarizing file 175_053.wav now.
Diarizing file 175_054.wav now.
Diarizing file 175_055.wav now.
Diarizing file 175_056.wav now.
Diarizing file 175_057.wav now.
Diarizing file 175_058.wav now.
Diarizing file 175_059.wav now.
Diarizing file 175_060.wav now.
Diarizing file 175_061.wav now.
Diarizin

Diarizing file 177_034.wav now.
Diarizing file 177_035.wav now.
Diarizing file 177_036.wav now.
Diarizing file 177_037.wav now.
Diarizing file 177_038.wav now.
Diarizing file 177_039.wav now.
Diarizing file 177_040.wav now.
Diarizing file 177_041.wav now.
Diarizing file 177_042.wav now.
Diarizing file 177_043.wav now.
Diarizing file 177_044.wav now.
Diarizing file 177_045.wav now.
Diarizing file 177_046.wav now.
Diarizing file 177_047.wav now.
Diarizing file 177_048.wav now.
Diarizing file 177_049.wav now.
Diarizing file 177_050.wav now.
Diarizing file 177_051.wav now.
Diarizing file 177_052.wav now.
Diarizing file 177_053.wav now.
Diarizing file 177_054.wav now.
Diarizing file 177_055.wav now.
Diarizing file 177_056.wav now.
Diarizing file 177_057.wav now.
Diarizing file 177_058.wav now.
Diarizing file 177_059.wav now.
Diarizing file 177_060.wav now.
Diarizing file 177_061.wav now.
Diarizing file 177_062.wav now.
Diarizing file 177_063.wav now.
Diarizing file 177_064.wav now.
Diarizin

Diarizing file 179_089.wav now.
Diarizing file 179_090.wav now.
Diarizing file 179_091.wav now.
Diarizing file 179_092.wav now.
Diarizing file 179_093.wav now.
Diarizing file 179_094.wav now.
Diarizing file 179_095.wav now.
Diarizing file 179_096.wav now.
Diarizing file 179_097.wav now.
Diarizing file 179_098.wav now.
Diarizing file 179_099.wav now.
Diarizing file 180_001.wav now.
Diarizing file 180_002.wav now.
Diarizing file 180_003.wav now.
Diarizing file 180_004.wav now.
Diarizing file 180_005.wav now.
Diarizing file 180_006.wav now.
Diarizing file 180_007.wav now.
Diarizing file 180_008.wav now.
Diarizing file 180_009.wav now.
Diarizing file 180_010.wav now.
Diarizing file 180_011.wav now.
Diarizing file 180_012.wav now.
Diarizing file 180_013.wav now.
Diarizing file 180_014.wav now.
Diarizing file 180_015.wav now.
Diarizing file 180_016.wav now.
Diarizing file 180_017.wav now.
Diarizing file 180_018.wav now.
Diarizing file 180_019.wav now.
Diarizing file 180_020.wav now.
Diarizin

Diarizing file 183_023.wav now.
Diarizing file 183_024.wav now.
Diarizing file 183_025.wav now.
Diarizing file 183_026.wav now.
Diarizing file 183_027.wav now.
Diarizing file 183_028.wav now.
Diarizing file 183_029.wav now.
Diarizing file 183_030.wav now.
Diarizing file 183_031.wav now.
Diarizing file 183_032.wav now.
Diarizing file 183_033.wav now.
Diarizing file 183_034.wav now.
Diarizing file 183_035.wav now.
Diarizing file 183_036.wav now.
Diarizing file 183_037.wav now.
Diarizing file 183_038.wav now.
Diarizing file 183_039.wav now.
Diarizing file 183_040.wav now.
Diarizing file 183_041.wav now.
Diarizing file 183_042.wav now.
Diarizing file 183_043.wav now.
Diarizing file 183_044.wav now.
Diarizing file 183_045.wav now.
Diarizing file 183_046.wav now.
Diarizing file 183_047.wav now.
Diarizing file 183_048.wav now.
Diarizing file 183_049.wav now.
Diarizing file 183_050.wav now.
Diarizing file 183_051.wav now.
Diarizing file 183_052.wav now.
Diarizing file 183_053.wav now.
Diarizin

Diarizing file 185_086.wav now.
Diarizing file 185_087.wav now.
Diarizing file 185_088.wav now.
Diarizing file 185_089.wav now.
Diarizing file 185_090.wav now.
Diarizing file 185_091.wav now.
Diarizing file 185_092.wav now.
Diarizing file 185_093.wav now.
Diarizing file 185_094.wav now.
Diarizing file 185_095.wav now.
Diarizing file 185_096.wav now.
Diarizing file 185_097.wav now.
Diarizing file 185_098.wav now.
Diarizing file 185_099.wav now.
Diarizing file 185_100.wav now.
Saving results to a pickle.
Dealing with dir segments_add1_keepall_cut0.7 next.
Diarizing file 175_001.wav now.
Diarizing file 175_002.wav now.
Diarizing file 175_003.wav now.
Diarizing file 175_004.wav now.
Diarizing file 175_005.wav now.
Diarizing file 175_006.wav now.
Diarizing file 175_007.wav now.
Diarizing file 175_008.wav now.
Diarizing file 175_009.wav now.
Diarizing file 175_010.wav now.
Diarizing file 175_011.wav now.
Diarizing file 175_012.wav now.
Diarizing file 175_013.wav now.
Diarizing file 175_014.w

Diarizing file 176_089.wav now.
Diarizing file 176_090.wav now.
Diarizing file 176_091.wav now.
Diarizing file 176_092.wav now.
Diarizing file 176_093.wav now.
Diarizing file 176_094.wav now.
Diarizing file 176_095.wav now.
Diarizing file 176_096.wav now.
Diarizing file 176_097.wav now.
Diarizing file 176_098.wav now.
Diarizing file 176_099.wav now.
Diarizing file 176_100.wav now.
Diarizing file 176_101.wav now.
Diarizing file 176_102.wav now.
Diarizing file 176_103.wav now.
Diarizing file 176_104.wav now.
Diarizing file 176_105.wav now.
Diarizing file 176_106.wav now.
Diarizing file 176_107.wav now.
Diarizing file 176_108.wav now.
Diarizing file 176_109.wav now.
Diarizing file 176_110.wav now.
Diarizing file 176_111.wav now.
Diarizing file 176_112.wav now.
Diarizing file 176_113.wav now.
Diarizing file 176_114.wav now.
Diarizing file 176_115.wav now.
Diarizing file 176_116.wav now.
Diarizing file 176_117.wav now.
Diarizing file 176_118.wav now.
Diarizing file 176_119.wav now.
Diarizin

Diarizing file 180_010.wav now.
Diarizing file 180_011.wav now.
Diarizing file 180_012.wav now.
Diarizing file 180_013.wav now.
Diarizing file 180_014.wav now.
Diarizing file 180_015.wav now.
Diarizing file 180_016.wav now.
Diarizing file 180_017.wav now.
Diarizing file 180_018.wav now.
Diarizing file 181_001.wav now.
Diarizing file 181_002.wav now.
Diarizing file 181_003.wav now.
Diarizing file 181_004.wav now.
Diarizing file 181_005.wav now.
Diarizing file 181_006.wav now.
Diarizing file 181_007.wav now.
Diarizing file 181_008.wav now.
Diarizing file 181_009.wav now.
Diarizing file 181_010.wav now.
Diarizing file 181_011.wav now.
Diarizing file 181_012.wav now.
Diarizing file 181_013.wav now.
Diarizing file 181_014.wav now.
Diarizing file 181_015.wav now.
Diarizing file 181_016.wav now.
Diarizing file 181_017.wav now.
Diarizing file 181_018.wav now.
Diarizing file 181_019.wav now.
Diarizing file 181_020.wav now.
Diarizing file 181_021.wav now.
Diarizing file 181_022.wav now.
Diarizin

Diarizing file 183_030.wav now.
Diarizing file 183_031.wav now.
Diarizing file 183_032.wav now.
Diarizing file 183_033.wav now.
Diarizing file 183_034.wav now.
Diarizing file 183_035.wav now.
Diarizing file 183_036.wav now.
Diarizing file 183_037.wav now.
Diarizing file 183_038.wav now.
Diarizing file 183_039.wav now.
Diarizing file 183_040.wav now.
Diarizing file 183_041.wav now.
Diarizing file 183_042.wav now.
Diarizing file 183_043.wav now.
Diarizing file 183_044.wav now.
Diarizing file 183_045.wav now.
Diarizing file 183_046.wav now.
Diarizing file 183_047.wav now.
Diarizing file 183_048.wav now.
Diarizing file 183_049.wav now.
Diarizing file 183_050.wav now.
Diarizing file 183_051.wav now.
Diarizing file 183_052.wav now.
Diarizing file 183_053.wav now.
Diarizing file 183_054.wav now.
Diarizing file 183_055.wav now.
Diarizing file 183_056.wav now.
Diarizing file 183_057.wav now.
Diarizing file 183_058.wav now.
Diarizing file 183_059.wav now.
Diarizing file 183_060.wav now.
Diarizin

Diarizing file 185_035.wav now.
Diarizing file 185_036.wav now.
Diarizing file 185_037.wav now.
Diarizing file 185_038.wav now.
Diarizing file 185_039.wav now.
Diarizing file 185_040.wav now.
Diarizing file 185_041.wav now.
Diarizing file 185_042.wav now.
Diarizing file 185_043.wav now.
Diarizing file 185_044.wav now.
Diarizing file 185_045.wav now.
Diarizing file 185_046.wav now.
Diarizing file 185_047.wav now.
Diarizing file 185_048.wav now.
Diarizing file 185_049.wav now.
Diarizing file 185_050.wav now.
Diarizing file 185_051.wav now.
Diarizing file 185_052.wav now.
Diarizing file 185_053.wav now.
Diarizing file 185_054.wav now.
Diarizing file 185_055.wav now.
Diarizing file 185_056.wav now.
Diarizing file 185_057.wav now.
Diarizing file 185_058.wav now.
Diarizing file 185_059.wav now.
Diarizing file 185_060.wav now.
Diarizing file 185_061.wav now.
Diarizing file 185_062.wav now.
Diarizing file 185_063.wav now.
Diarizing file 185_064.wav now.
Diarizing file 185_065.wav now.
Diarizin

KeyboardInterrupt: 

In [7]:
pickle.dump(failed, open("data/failed_diar.pickle", 'wb'))

In [39]:
#d = os.path.dirname(fails[0])
results = pickle.load(open(f"{d}/segmented_diarization.pickle", 'rb'))

In [40]:
K = 7
failed = []
for fail in fails:
    if d != os.path.dirname(fail):
        pickle.dump(results, open(f"{d}/segmented_diarization2.pickle", 'wb'))
        d = os.path.dirname(fail)
        results = pickle.load(open(f"{d}/segmented_diarization.pickle", 'rb'))
        
    print(f"Diarizing file {fail} now.")
    (rate, sig) = wavefile.load(f"{fail}")
    signal = sig[0]
    S = np.transpose(frame(signal, int(2000*16), int(500*16)))
    X = list(map(lambda s: fe(s, 16000), S))
    X = np.array(np.swapaxes(X, 1, 2))
    X = X.astype(np.float16)
    num_timesteps = X.shape[1]

    if num_timesteps != 201:
        emb_model.layers.pop(0)
        new_input = Input(batch_shape=(None, num_timesteps, 30))
        new_output = emb_model(new_input)
        emb_model = Model(new_input, new_output)

    embs = emb_model.predict(X)
    try:
        SD.cluster(rounds=10, clust_range=[2, K], num_cores = -2, embeddings=embs)
        spk_labels = SD.speaker_labels_
    except ValueError:
        spk_labels = []
        failed.append(fail)
        continue
    base = os.path.basename(fail)
    long_name = video_mapping[base[:3]]
    j = results[long_name].index((base, []))
    results[long_name][j] = (base, spk_labels, K)
    

Diarizing file ./data/segments_add1_keep1_cut0.5/175_008.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/175_018.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/175_023.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/175_027.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/175_045.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/175_064.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/175_085.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/175_092.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/175_099.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/176_010.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/176_019.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/176_036.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/176_039.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/176_049.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.5/176_060.wav now.
Diarizing 

Diarizing file ./data/segments_add1_keep1_cut0.7/179_017.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/179_020.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/179_028.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/179_029.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/179_032.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/180_001.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/180_005.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/181_007.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/181_008.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/181_019.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/181_023.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/181_024.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/181_030.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/181_052.wav now.
Diarizing file ./data/segments_add1_keep1_cut0.7/182_007.wav now.
Diarizing 

Diarizing file ./data/segments_add1_keep2_cut0.7/185_002.wav now.
Diarizing file ./data/segments_add1_keep2_cut0.7/185_004.wav now.
Diarizing file ./data/segments_add1_keep2_cut0.7/185_008.wav now.
Diarizing file ./data/segments_add1_keep2_cut0.7/185_014.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/175_001.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/175_003.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/175_009.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/175_021.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/175_022.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/175_023.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/175_030.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/175_039.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/175_041.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/175_059.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/175_0

Diarizing file ./data/segments_add1_keepall_cut0.5/180_016.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/180_017.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/180_022.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/180_024.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/181_004.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/181_008.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/181_015.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/181_022.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/181_023.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/181_030.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/181_038.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/181_044.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/181_060.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.5/181_069.wav now.
Diarizing file ./data/segments_add1_keepall_cut0

Diarizing file ./data/segments_add1_keepall_cut0.7/175_026.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_031.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_034.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_035.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_037.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_040.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_042.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_043.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_044.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_046.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_049.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_051.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_053.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/175_055.wav now.
Diarizing file ./data/segments_add1_keepall_cut0

Diarizing file ./data/segments_add1_keepall_cut0.7/178_004.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_006.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_009.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_013.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_024.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_030.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_037.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_040.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_046.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_048.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_049.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_050.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/178_053.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/179_004.wav now.
Diarizing file ./data/segments_add1_keepall_cut0

Diarizing file ./data/segments_add1_keepall_cut0.7/183_065.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/183_067.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/183_069.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/183_071.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/183_073.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/183_075.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/183_086.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/183_103.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/183_105.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/184_004.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/184_006.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/184_007.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/184_009.wav now.
Diarizing file ./data/segments_add1_keepall_cut0.7/184_014.wav now.
Diarizing file ./data/segments_add1_keepall_cut0

Diarizing file ./data/segments_add2_keep1_cut0.5/185_047.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_003.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_006.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_008.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_017.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_027.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_030.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_039.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_044.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_047.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_050.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_061.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_065.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/175_096.wav now.
Diarizing file ./data/segments_add2_keep1_cut0.7/176_001.wav now.
Diarizing 

Diarizing file ./data/segments_add2_keep2_cut0.5/175_023.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/175_027.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/176_002.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/176_003.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/176_030.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/177_037.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/178_020.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/178_025.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/179_005.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/181_020.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/182_011.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/183_001.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/183_010.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/183_021.wav now.
Diarizing file ./data/segments_add2_keep2_cut0.5/183_031.wav now.
Diarizing 

Diarizing file ./data/segments_add2_keepall_cut0.7/176_067.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/176_075.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/176_076.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/176_077.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/176_080.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/176_081.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/177_010.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/177_018.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/177_031.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/177_039.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/177_047.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/177_048.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/177_052.wav now.
Diarizing file ./data/segments_add2_keepall_cut0.7/177_063.wav now.
Diarizing file ./data/segments_add2_keepall_cut0