# Распознавание речи с помощью алгоритма nnet3

In [1]:
from pathlib import Path
import pandas as pd
from kaldi.asr import NnetLatticeFasterRecognizer
from kaldi.decoder import LatticeFasterDecoderOptions
from kaldi.nnet3 import NnetSimpleComputationOptions
from kaldi.util.table import SequentialMatrixReader, CompactLatticeWriter

## Определение параметров

In [2]:
MODEL_DIR = Path('../model')
SCP_PATH = 'data/example_segments.scp'
S2U_PATH = 'data/spk2utt'
TRANS_PATH = 'data/transcriptions'

In [3]:
decoder_opts = LatticeFasterDecoderOptions()
decoder_opts.beam = 13
decoder_opts.max_active = 7000
decodable_opts = NnetSimpleComputationOptions()
decodable_opts.acoustic_scale = 1.0
decodable_opts.frame_subsampling_factor = 3

## Определение модели распознавания

In [4]:
model = str(MODEL_DIR / 'final.mdl')
graph = str(MODEL_DIR / 'HCLG.fst')
words = str(MODEL_DIR / 'words.txt')

In [5]:
asr = NnetLatticeFasterRecognizer.from_files(model, graph, words,
                                             decoder_opts=decoder_opts, 
                                             decodable_opts=decodable_opts)

## Выполнение распознавания

In [11]:
feats_rspec = ("ark:compute-mfcc-feats --config=" + str(MODEL_DIR / 'conf/mfcc.conf') + " scp:" + SCP_PATH + " ark:- |")
ivectors_rspec = (feats_rspec + "ivector-extract-online2 "
                "--config=" + str(MODEL_DIR / 'conf/ivector_extractor.conf') + " "
                "ark:" + S2U_PATH + " ark:- ark:- |")
lat_wspec = "ark:| gzip -c > lat.gz"   
with SequentialMatrixReader(feats_rspec) as feats_reader, \
    SequentialMatrixReader(ivectors_rspec) as ivectors_reader, \
    CompactLatticeWriter(lat_wspec) as lat_writer:
    for (fkey, feats), (ikey, ivectors) in zip(feats_reader, ivectors_reader):
        assert(fkey == ikey)
        out = asr.decode((feats, ivectors))
        lat_writer[fkey] = out['lattice']
        with open(TRANS_PATH, 'a') as f:
            f.write(fkey + '\t' + out['text'].lower() + '\n')

In [12]:
transcriptions = pd.read_csv(TRANS_PATH, header=None, sep='\t', names=['Сегмент', 'Транскрибация'])
transcriptions

Unnamed: 0,Сегмент,Транскрибация
0,example.0-2-0000746-0000832,пять
1,example.0-2-0001277-0001377,бом
2,example.1-2-0000078-0000700,наталья благодарю вас за ожиданьем по данному ...
3,example.1-2-0000700-0000886,постарайтесь пожалуйста омовением
4,example.1-2-0000940-0001262,я в свою очередь с вами прощайте всего дом про...
