# Capstone Project
## Part 5 - Diarisation

In this part we now apply the recogniser model to our chunks and append the prediction onto the dataframe.


In this stage we can also cleanup for short/long phrases and other transcription artifacts

In [110]:
import pandas as pd
import numpy as np
import joblib
import importlib
import json
import capstone
import sktools
import os

In [161]:
path = 'data/VAD/'
out_path = 'output/'
sources = path + 'chunks'
file_names = ['.'.join(x.split('.')[0::]) for x in os.listdir(sources) if x.endswith('.wav')]
file_list = [os.path.join(sources,fname) for fname in
               os.listdir(sources) if fname.endswith('.wav')] 
file_list.sort()
#file_list

In [112]:
file_names.sort()
#file_names

In [118]:
df = pd.read_csv('data/VAD/transcription.csv')
df = df[['min','max','dur','in','out','transcription']]
df

Unnamed: 0,min,max,dur,in,out,transcription
0,35,292,256,1120.084922,9344.708496,yeah that is why they're put into your writing...
1,293,610,316,9376.710922,19521.480077,yeah it's very very very hardly yeah I don't e...
2,611,982,370,19553.482504,31426.382682,but the idea is to try to be as objective as p...
3,983,1410,426,31458.385108,45123.421162,yep so so just discussion of what you see here...
4,1411,1510,198,45155.423589,48323.663798,so when you hear the term critical thinking wh...
...,...,...,...,...,...,...
83,24635,25044,408,788379.773287,801468.765667,role play
84,25045,25341,295,801500.768093,810973.486295,if you have a good one seven north in about th...
85,25342,25517,350,811005.488721,816605.913334,here's an article I got on from Harvard Busine...
86,26045,26145,99,833503.194450,836703.437085,yep so you already


#### Verification Data

A set of labels was manually put together in order to be able to evaluate the accuracy of the model.

Must be noted that the model is already somewhat handicapped, as there are some segments of audio with overlapping speech.

In [128]:
chunk_labels = pd.read_csv('data/VAD/chunk_labels.csv', header=None)

#### Pickle load

We selected 3 models in the previous chapter, so we will compare results

In [130]:
SVC10 = joblib.load('pre_models/SVC10.joblib')
Tree10 = joblib.load('pre_models/Tree10.joblib')
Ada1020 = joblib.load('pre_models/Ada1020.joblib')

classes = json.loads(open('pre_models/recognizer_labels.json').read())
classes = {int(k):v for k,v in labels.items()}

In [131]:
classes

{0: 'NA',
 7: 'chaitanya_rao',
 5: 'jin_yang',
 8: 'adi_krishnan',
 6: 'ismael_abufon',
 4: 'shivir_pokharel',
 3: 'vidya_nayak',
 2: 'wyatt_watson',
 1: 'yang_liu'}

In [137]:
SVC10_df = capstone.audio.id_me(file_list, SVC10,classes)
SVC10_df['y']=chunk_labels
SVC10_df['res'] = SVC10_df['id'] == SVC10_df['y']
SVC10_df

Unnamed: 0,id,speaker,conf,filename,y,res
0,6.0,ismael_abufon,"{1.0: 0.050387596899224806, 3.0: 0.13178294573...",chunk_s0000,3,False
1,7.0,chaitanya_rao,"{1.0: 0.050314465408805034, 2.0: 0.10062893081...",chunk_s0001,7,True
2,7.0,chaitanya_rao,"{1.0: 0.08602150537634409, 2.0: 0.083333333333...",chunk_s0002,7,True
3,7.0,chaitanya_rao,"{1.0: 0.17289719626168223, 2.0: 0.261682242990...",chunk_s0003,7,True
4,7.0,chaitanya_rao,"{1.0: 0.02, 2.0: 0.13, 3.0: 0.03, 5.0: 0.03, 6...",chunk_s0004,7,True
...,...,...,...,...,...,...
83,4.0,shivir_pokharel,"{1.0: 0.00975609756097561, 2.0: 0.017073170731...",chunk_s0083,4,True
84,4.0,shivir_pokharel,"{1.0: 0.020202020202020204, 2.0: 0.00673400673...",chunk_s0084,4,True
85,7.0,chaitanya_rao,"{1.0: 0.06818181818181818, 2.0: 0.261363636363...",chunk_s0085,7,True
86,7.0,chaitanya_rao,"{1.0: 0.09900990099009901, 2.0: 0.099009900990...",chunk_s0086,7,True


In [157]:
a=SVC10_df['res'].sum()
print('SVC10 Accuracy was : %s , with %s correct predictions' % (a/88 , a))

SVC10 Accuracy was : 0.9545454545454546 , with 84 correct predictions


In [150]:
Tree10_df = capstone.audio.id_me(file_list, Tree10,classes)
Tree10_df['y']=chunk_labels
Tree10_df['res'] = Tree10_df['id'] == Tree10_df['y']
Tree10_df

Unnamed: 0,id,speaker,conf,filename,y,res
0,3.0,vidya_nayak,"{1.0: 0.03488372093023256, 2.0: 0.127906976744...",chunk_s0000,3,True
1,7.0,chaitanya_rao,"{1.0: 0.09433962264150944, 2.0: 0.106918238993...",chunk_s0001,7,True
2,7.0,chaitanya_rao,"{1.0: 0.08602150537634409, 2.0: 0.096774193548...",chunk_s0002,7,True
3,7.0,chaitanya_rao,"{1.0: 0.12850467289719625, 2.0: 0.212616822429...",chunk_s0003,7,True
4,7.0,chaitanya_rao,"{1.0: 0.03, 2.0: 0.18, 3.0: 0.06, 4.0: 0.02, 5...",chunk_s0004,7,True
...,...,...,...,...,...,...
83,4.0,shivir_pokharel,"{1.0: 0.02926829268292683, 2.0: 0.068292682926...",chunk_s0083,4,True
84,4.0,shivir_pokharel,"{1.0: 0.06060606060606061, 2.0: 0.043771043771...",chunk_s0084,4,True
85,7.0,chaitanya_rao,"{1.0: 0.07386363636363637, 2.0: 0.1875, 3.0: 0...",chunk_s0085,7,True
86,7.0,chaitanya_rao,"{1.0: 0.07920792079207921, 2.0: 0.138613861386...",chunk_s0086,7,True


In [164]:
a=Tree10_df['res'].sum()
print('Tree10 Accuracy was : %s , with %s correct predictions' % (a/88 , a))

Tree10 Accuracy was : 0.9204545454545454 , with 81 correct predictions


In [158]:
Ada1020_df = capstone.audio.id_me(file_list, Ada1020,classes)
Ada1020_df['y']=chunk_labels
Ada1020_df['res'] = Ada1020_df['id'] == Ada1020_df['y']
Ada1020_df

Unnamed: 0,id,speaker,conf,filename,y,res
0,3.0,vidya_nayak,"{1.0: 0.03488372093023256, 2.0: 0.062015503875...",chunk_s0000,3,True
1,7.0,chaitanya_rao,"{1.0: 0.04716981132075472, 2.0: 0.094339622641...",chunk_s0001,7,True
2,7.0,chaitanya_rao,"{1.0: 0.08602150537634409, 2.0: 0.072580645161...",chunk_s0002,7,True
3,7.0,chaitanya_rao,"{1.0: 0.13317757009345793, 2.0: 0.228971962616...",chunk_s0003,7,True
4,7.0,chaitanya_rao,"{1.0: 0.04, 2.0: 0.11, 3.0: 0.03, 5.0: 0.02, 6...",chunk_s0004,7,True
...,...,...,...,...,...,...
83,4.0,shivir_pokharel,"{1.0: 0.03170731707317073, 2.0: 0.034146341463...",chunk_s0083,4,True
84,4.0,shivir_pokharel,"{1.0: 0.026936026936026935, 2.0: 0.04377104377...",chunk_s0084,4,True
85,7.0,chaitanya_rao,"{1.0: 0.10795454545454546, 2.0: 0.261363636363...",chunk_s0085,7,True
86,7.0,chaitanya_rao,"{1.0: 0.06930693069306931, 2.0: 0.128712871287...",chunk_s0086,7,True


In [159]:
a=Ada1020_df['res'].sum()
print('Ada1020 Accuracy was : %s , with %s correct predictions' % (a/88 , a))

Ada1020 Accuracy was : 0.9545454545454546 , with 84 correct predictions


### DF Merging

The selected winner is merged with the transcription df to conform the final output

In [162]:
df['id'] = Ada1020_df['id']
df['speaker'] = Ada1020_df['speaker']
df

Unnamed: 0,min,max,dur,in,out,transcription,id,speaker
0,35,292,256,1120.084922,9344.708496,yeah that is why they're put into your writing...,3.0,vidya_nayak
1,293,610,316,9376.710922,19521.480077,yeah it's very very very hardly yeah I don't e...,7.0,chaitanya_rao
2,611,982,370,19553.482504,31426.382682,but the idea is to try to be as objective as p...,7.0,chaitanya_rao
3,983,1410,426,31458.385108,45123.421162,yep so so just discussion of what you see here...,7.0,chaitanya_rao
4,1411,1510,198,45155.423589,48323.663798,so when you hear the term critical thinking wh...,7.0,chaitanya_rao
...,...,...,...,...,...,...,...,...
83,24635,25044,408,788379.773287,801468.765667,role play,4.0,shivir_pokharel
84,25045,25341,295,801500.768093,810973.486295,if you have a good one seven north in about th...,4.0,shivir_pokharel
85,25342,25517,350,811005.488721,816605.913334,here's an article I got on from Harvard Busine...,7.0,chaitanya_rao
86,26045,26145,99,833503.194450,836703.437085,yep so you already,7.0,chaitanya_rao


#### Save Output

In [163]:
df.to_csv(f'{out_path}_output.csv')

In [107]:
importlib.reload(capstone)

<module 'capstone' from '/Users/irav/Documents/IOD-Local/1_Modules-Labs/_Projects/Capstone/capstone.py'>

>>