[Links:]()
+ [Link](https://machinelearningmastery.com/calculate-bleu-score-for-text-python/) to compute BLEU score

### Individual N-Gram Scores

In [1]:
# n-gram individual BLEU
from nltk.translate.bleu_score import sentence_bleu

candidate = 'this is a test'
reference = 'this is a cat'

def compute_bleu(candidate, reference, gram=1):
    candidate = candidate.split(" ")
    reference = [reference.split(" ")]
    weights = [0, 0, 0, 0]
    weights[gram - 1] = 1 
    """
    print(candidate)
    print(reference)
    print(weights)
    """
    return sentence_bleu(reference, candidate, weights=weights) 
        
print('Individual 1-gram: %f' % compute_bleu(candidate, reference, gram=1))
print('Individual 2-gram: %f' % compute_bleu(candidate, reference, gram=2))
print('Individual 3-gram: %f' % compute_bleu(candidate, reference, gram=3))
print('Individual 4-gram: %f' % compute_bleu(candidate, reference, gram=4))

print("Mean: %f" % (sum([compute_bleu(candidate, reference, gram=i) for i in range(4)]) / 4))

Individual 1-gram: 0.750000
Individual 2-gram: 0.666667
Individual 3-gram: 0.500000
Individual 4-gram: 0.000000
Mean: 0.479167


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


### Cumulative N-Gram Scores

In [2]:
from nltk.translate.bleu_score import sentence_bleu
reference = [['this', 'is', 'small', 'test']]
candidate = ['this', 'is', 'a', 'test']
score = sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25))
print(score)

1.0547686614863434e-154


The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


### Compute BLEU from metadata.csv

In [3]:
from nltk.translate.bleu_score import sentence_bleu


meta = "/root/PycharmProjects/Tensorflow-2.X/07 - DeepSpeech Vad Transcriber + Create Wavs/audio/Going to public places/metadata.csv"



def compute_bleu(candidate, reference, gram=1):
    """n-gram BLEU
     gram: int [1-4]
     return: float 
    """
    candidate = candidate.split(" ")
    reference = [reference.split(" ")]
    weights = [0, 0, 0, 0]
    weights[gram - 1] = 1 
    return sentence_bleu(reference, candidate, weights=weights) 

total = {"bleu1":[], "bleu2":[]}
with open(meta, "r", encoding="utf-8") as file:
    lines = file.read().splitlines()
    for line in lines:
        if line.startswith('MV'):
            name, text, transcript, bleu = line.split('|')
            bleu = compute_bleu(transcript, text, gram=1)
            total["bleu1"].append(bleu)
        elif line.startswith('SA'):
            name, text, transcript, bleu = line.split('|')
            bleu = compute_bleu(transcript, text, gram=1)
            total["bleu2"].append(bleu)
            
print(f"Total bleu1: {sum(total['bleu1']) / len(total['bleu1'])}")
print(f"Total bleu2: {sum(total['bleu2']) / len(total['bleu2'])}")

"""
Total bleu1: 0.5347381994290764
Total bleu2: 0.6913090188969255
"""

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Total bleu1: 0.6125758244644451
Total bleu2: 0.73698545859529


'\nTotal bleu1: 0.5347381994290764\nTotal bleu2: 0.6913090188969255\n'

### Compute new BLEU with trained model

In [4]:
import os

import deepspeech
import pydub
from nltk.translate.bleu_score import sentence_bleu

class DeepSpeech:
    def __init__(self, model, scorer, alpha=0.931289039105002, beta=1.1834137581510284):
        self.model = deepspeech.Model(model)
        self.model.enableExternalScorer(scorer)
        self.model.setScorerAlphaBeta(alpha, beta)
   
    def transcribe(self, audio):
        audio_segment = pydub.AudioSegment.from_wav(audio)
        audio_segment = audio_segment.set_frame_rate(16000)
        assert audio_segment.frame_rate == 16000 
        samples = audio_segment.get_array_of_samples()
        return self.model.stt(samples)

def counter(num=1, length=3):
    """Counter etc. 0001, 0002
    Attributes:
    num (int) integer etc. 1 ==> 0001
        length (int) length of counter etc. 3 ==> 001
    Return:
        (str) etc. 0001
    """
    number = '0' * length + str(num)
    number = number[len(number)-length:]
    return number

def compute_bleu(candidate, reference, gram=1):
    candidate = candidate.split(" ")
    reference = [reference.split(" ")]
    weights = [0, 0, 0, 0]
    weights[gram - 1] = 1 
    return sentence_bleu(reference, candidate, weights=weights)     

# PATHS
model_path = 'model/output_graph.tflite'
scorer_path = 'model/output_graph.scorer'
base = "/root/PycharmProjects/Tensorflow-2.X/07 - DeepSpeech Vad Transcriber + Create Wavs/audio/Good manners"
meta = f"{base}/metadata.csv"
# VARIABLES
total = {"bleu1":[], "bleu2":[]}
model = DeepSpeech(model_path, scorer_path)

with open(meta, "r", encoding="utf-8") as file:
    lines = file.read().splitlines()
    for count, line in enumerate(lines, 1):
        name, text, _, _ = line.split('|')
        audio_path = f"{base}/wavs/{name}.wav"
        transcript = model.transcribe(audio_path)
        
        if line.startswith('MV'):    
            bleu = compute_bleu(transcript, text, gram=1)
            total["bleu1"].append(bleu)
        elif line.startswith('SA'):
            bleu = compute_bleu(transcript, text, gram=1)
            total["bleu2"].append(bleu)
    
        print(f"\rProcess: {counter(len(lines) - count, length=3)}", end="\r")
            
            
print(f"Total bleu1: {sum(total['bleu1']) / len(total['bleu1'])}")
print(f"Total bleu2: {sum(total['bleu2']) / len(total['bleu2'])}")

TensorFlow: v2.3.0-6-g23ad988
DeepSpeech: v0.9.3-0-gf2e9c85


Total bleu1: 0.8120169619005472
Total bleu2: 0.9720834323256764
