# Experiments

In [98]:
import gradio as gr
import librosa
import torch
import jiwer
import numpy as np
import pandas as pd
# import warnings

from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, HubertForCTC, Speech2TextProcessor, Speech2TextForConditionalGeneration
# from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForMaskedLM

# warnings.filterwarnings("ignore")

The `Wav2Vec2Processor` can be used for tokenization as well as feature extraction depending on the `__call__`. More info [here](https://huggingface.co/transformers/model_doc/wav2vec2.html#transformers.Wav2Vec2Processor.__call__)

In [161]:
#load wav2vec2 tokenizer and model deprecated way
# tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
# model = Wav2Vec2ForMaskedLM.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")

# load the models and their processor
# processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-robust-ft-libri-960h")
# model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-robust-ft-libri-960h")
processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")



ffmpeg converting mp3 into a flac with sr=16000 and for initial 45 sec <br>
```sh
ffmpeg -i apple/Q4\ FY21\ Apple\ Quarterly\ Earnings\ Cal.mp3 -ar 16000  -t 45 apple/Q4\ FY21\ Apple\ Quarterly\ Earnings\ Cal45sec_1.flac
```
ffmpeg converting mp3 into a flac with sr=16000 and for a specific time period <br>
```sh
ffmpeg -i apple/Q4\ FY21\ Apple\ Quarterly\ Earnings\ Cal.mp3 -ar 16000  -ss 00:00:05 -t 00:00:10 apple/Q4\ FY21\ Apple\ Quarterly\ Earnings\ Cal45sec_1.flac
```

In [148]:
# define speech-to-text function for Hubert and wav2vec2 with processor and CTC
def asr_transcript(audio_file):
    transcript = ""

    # Stream over 20 seconds chunks
    stream = librosa.stream(
        audio_file, block_length=20, frame_length=16000, hop_length=16000
    )

    for speech in stream:
        if len(speech.shape) > 1:
            speech = speech[:, 0] + speech[:, 1]

        input_values = processor(speech, sampling_rate=16_000, return_tensors="pt").input_values
        with torch.no_grad():
            logits = model(input_values).logits
            predicted_ids = torch.argmax(logits, dim=-1)
        transcription = processor.batch_decode(predicted_ids)[0]
        transcript += transcription.lower() + ". "

    return transcript


In [19]:
# define speech-to-text function for Hubert and wav2vec2 with processor and CTC with librosa load
def asr_transcript(audio_file):
    transcript = ""

    # Stream over 20 seconds chunks
    stream, _ = librosa.load(
        audio_file, sr=16000
    )
    # stream = librosa.stream(
    #     audio_file, block_length=20, frame_length=16000, hop_length=16000
    # )

    # for speech in stream:
        # if len(speech.shape) > 1:
        #     speech = speech[:, 0] + speech[:, 1]

    input_values = processor(stream, sampling_rate=16_000, return_tensors="pt").input_values
    with torch.no_grad():
        logits = model(input_values).logits
        predicted_ids = torch.argmax(logits, dim=-1)
    transcription = processor.batch_decode(predicted_ids)[0]
    transcript += transcription.lower() + ". "

    return transcript


In [None]:
asr_transcript(r"../../../speech2text/apple/Audio/2021-Oct-29-AAPL.OQ-139435924054.flac")

In [162]:
# wav2vec2_full_tr = asr_transcript(r"../../../speech2text/apple/Audio/2021-Oct-29-AAPL.OQ-139435924054.flac")

In [None]:
# with open("../../../speech2text/apple/hubert_2021-Oct-29-AAPL.OQ-139435924054.txt",'wt') as file:
#     file.write(wav2vec2_full_tr)

In [118]:
asr_transcript(r"../../../speech2text/ytclipcc/audio.flac")

"do you want to know what it is tthe matrix is everywhereit is all around us even now in this very room you can see it when you look out your window or when you turn on your television you can feel it when you go to work when you go to church when you pay your taxes it is the world that has been pulled over your eyes to blind you from the truth what truth that you are a slave nio like everyone else you were born into bondage born into a prison that you cannot smell or taste or touch a prison for your mine unfortunately no one can be told what the matric is you have to see it for yourself this is your last chance after this there is no turning back you take the blue pill the story ends you wake up in your bed and believe whatever you want  you take the red pill you stay in wonderland and i show you how deep the rabbit home goes remember all i'm offering is the truth nothing more. "

# define speech-to-text function with wav2vec2 tokenizer and Wav2Vec2ForMaskedLM
def asr_transcript(audio_file):
    transcript = ""

    # Stream over 20 seconds chunks
    stream = librosa.stream(
        audio_file, block_length=20, frame_length=16000, hop_length=16000
    )

    for speech in stream:
        if len(speech.shape) > 1:
            speech = speech[:, 0] + speech[:, 1]

        input_values = tokenizer(speech, return_tensors="pt").input_values
        with torch.no_grad():
            logits = model(input_values).logits
            predicted_ids = torch.argmax(logits, dim=-1)
        transcription = tokenizer.batch_decode(predicted_ids)[0]
        transcript += transcription + "."

    return transcript


In [163]:
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-large-librispeech-asr")
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-large-librispeech-asr")

In [164]:
transcript = ""
stream = librosa.stream(
    # "../../../speech2text/apple/Audio/2021-Oct-29-AAPL.OQ-139435924054-14m-16m12s.flac", block_length=20, frame_length=16000, hop_length=16000
    "../../../speech2text/apple/Audio/2021-Oct-29-AAPL.OQ-139435924054.flac", block_length=20, frame_length=16000, hop_length=16000
)
for speech in stream:
    if len(speech.shape) > 1:
        speech = speech[:, 0] + speech[:, 1]

    inputs = processor(speech, sampling_rate=16_000, return_tensors="pt")
    generated_ids = model.generate(input_ids=inputs["input_features"], attention_mask=inputs["attention_mask"])

    transcription = processor.batch_decode(generated_ids)[0]
    transcript += transcription + ". "
# transcript

  input_lengths = (input_lengths - 1) // 2 + 1


In [None]:
with open("../../../speech2text/apple/s2t_2021-Oct-29-AAPL.OQ-139435924054.txt",'wt') as file:
    file.write(transcript)

In [1]:
import pandas as pd

In [2]:
tr = pd.read_csv("../../../speech2text/apple/apple_tr.txt",names=['text'])

In [36]:
from word2number import w2n

In [13]:
str_conv = []
for i in tr.text.str[3000:5000].values[0].split():
    try:
        str_conv.append(w2n.word_to_num(i))
    except:
        str_conv.append(i)

In [7]:
tr.text.str[3000:5000].values[0]

"N SHORTAGES AND COVED RELATED MANUFACTURING DISRUPTIONS EVEN SO WE SET AN ALL TIME RECORD FOR MAC.AND QUARTERLY RECORDS FOR IPHONE IPAD WERABLES HOME AND ACCESSORIES REPRESENTING THIRTY PER CENT YEAR OVER YEAR GROWTH IN PRODUCTS OUR SERVICES BUSINESS PERFORMED BETTER THAN WE EXPECTED WHERE WE HIT AN ALL TIME RECORD OF EIGHTEEN POINT THREE BILLION DOLLARS AND GREW TWENTY SIX PER CENT YEAR OVER YEAR AND WE SET.RDERLY RECORDS IN EVERY GEOGRAPHIC SEGMENT WITH STRONG DOUBLE DIGIT GROWTH ACROSS THE BOARD DURING FISCAL TWENTY TWENTY ONE WE EARNED NEARLY ONE THIRD OF OUR REVENUE FROM EMERGING MARKETS AND DOUBLED OUR BUSINESS IN INDIA AND VIA NAM WE ARE OPTIMISTIC ABOUT THE FUTURE ESPECIALLY AS WE SEE ST.DEMAND FOR OUR NEW PRODUCTS AT THE END OF THE SEPTEMBER QUARTER WE INTRODUCE OUR I PHONE THIRTEEN LINE UP AS WELL AS THE APPLE WATCH SERIES SEVEN I PAD AND I PAD MANY ALL OF WHICH REPRESENT SIGNIFICANT ADVANCES THE IPHONE THIRTEEN AND IPHONE THIRTEEN MANY ALONGSIDE THE IPHONE THIRTEEN PRO AND 

In [36]:
tr.text.str[3000:5000].tolist()

["N SHORTAGES AND COVED RELATED MANUFACTURING DISRUPTIONS EVEN SO WE SET AN ALL TIME RECORD FOR MAC.AND QUARTERLY RECORDS FOR IPHONE IPAD WERABLES HOME AND ACCESSORIES REPRESENTING THIRTY PER CENT YEAR OVER YEAR GROWTH IN PRODUCTS OUR SERVICES BUSINESS PERFORMED BETTER THAN WE EXPECTED WHERE WE HIT AN ALL TIME RECORD OF EIGHTEEN POINT THREE BILLION DOLLARS AND GREW TWENTY SIX PER CENT YEAR OVER YEAR AND WE SET.RDERLY RECORDS IN EVERY GEOGRAPHIC SEGMENT WITH STRONG DOUBLE DIGIT GROWTH ACROSS THE BOARD DURING FISCAL TWENTY TWENTY ONE WE EARNED NEARLY ONE THIRD OF OUR REVENUE FROM EMERGING MARKETS AND DOUBLED OUR BUSINESS IN INDIA AND VIA NAM WE ARE OPTIMISTIC ABOUT THE FUTURE ESPECIALLY AS WE SEE ST.DEMAND FOR OUR NEW PRODUCTS AT THE END OF THE SEPTEMBER QUARTER WE INTRODUCE OUR I PHONE THIRTEEN LINE UP AS WELL AS THE APPLE WATCH SERIES SEVEN I PAD AND I PAD MANY ALL OF WHICH REPRESENT SIGNIFICANT ADVANCES THE IPHONE THIRTEEN AND IPHONE THIRTEEN MANY ALONGSIDE THE IPHONE THIRTEEN PRO AND

In [79]:
stream = librosa.stream(
        r"../../../speech2text/apple/Q4 FY21 Apple Quarterly Earnings Cal45sec_1.flac", block_length=10, frame_length=16000, hop_length=16000
    )
# stream, _ = librosa.load(
#         r"../../../speech2text/apple/Q4 FY21 Apple Quarterly Earnings Cal45sec_1.flac", sr=16000,
#     )

In [77]:
transcript = ""
input_values = tokenizer(stream, return_tensors="pt", padding='longest').input_values
with torch.no_grad():
    logits = model(input_values).logits
    predicted_ids = torch.argmax(logits, dim=-1)
transcription = tokenizer.batch_decode(predicted_ids)[0]
transcript += transcription + "."

In [78]:
transcript

"GOOD DAY AND WELCOME TO THE APPLE C FOR FISCAL YEAR TWENTY TWENTY ONE EARNINGS CONFERENCE CALL TO DAY'S CALL IS BEING RECORDED AT THIS TIME FOR OPENING REMARKS AND INTRODUCTIONS I WOULD LIKE TO TURN THE CALL OVER TO TAGES CALAN DIRECTOR OF INVESTIDRELATIONS AND CORPORATE FINANCE PLEASE GO AHEAD THANK YOU GOOD AFTERNOON AND THANK YOU FOR JOINING US SPEAKING TO DAY FIRST IS APPLE C E O TIM COOK AND HE'LL BE FOLLOWED BY C F O LUCA MAISTRY AFTER THAT WE'LL OPEN THE CALL TO QUESTIONS FROM ANALYSTS PLEASE NOTE THAT SOME OF THE INFORMATION YOU'LL HEAR DURING OUR DISCUSSION TO DAY WILL CONSIST OF FORWARD LOOKING STATEMENTS INCLUDING WITHOUT LIMITATION THOSE REGARDING REVENUE GROSS MARGIN OPERATING EXPENSES OTHER."

# 2. DEFINE GRADIO INTERFACE

In [4]:
gradio_ui = gr.Interface(
    fn=asr_transcript,
    title="Speech-to-Text with HuggingFace+Wav2Vec2",
    description="Upload an audio clip, and let AI do the hard work of transcribing",
    inputs=gr.inputs.Audio(label="Upload Audio File", type="filepath"),
    outputs=gr.outputs.Textbox(label="Auto-Transcript")
)


## 2.1 LAUNCH

In [None]:
#gradio_ui.launch(share=True)
gradio_ui.launch()

## WER

In [110]:
gt = '''
Good day, and welcome to the Apple q four Fiscal Year 2021 Earnings Conference Call. Today's call is being recorded. At this time, for opening remarks and introductions, 
I would like to turn the call over to Tejas Gala, Director of Investor Relations and Corporate Finance. Please go ahead. Thank you. Good afternoon, and thank you for joining us. 
Speaking today first is Apple's CEO, Tim Cook; and he'll be followed by CFO, Luca Maestri. After that, we'll open the call to questions from analysts.  
Please note that some of the information you'll hear during our discussion today will consist of forward-looking statements, including, without limitation, those regarding revenue, gross margin, operating expenses, other
'''
hypothesis_s2t = '''
good day and welcome to the apple cue for fiscal year twenty one earning's comforts call to days call is being recorded at this time for opening remarks and introductions 
i like to turn the call over to pages college director of investor relations in corpus finance please go ahead thank you good afternoon and think. he rejoining us speaking 
to day first is apple seat yeo tim cook and he'll be followed by cepho lukamiestry after that we'll open the call to questions from annalist please note that some of the information 
you'll hear during our discussion to day will consist of forward looking statements including without limitation. those regarding revenue gross margin operating expenses other.
'''
hypothesis_hubert = '''
good day and welcome to the appl cue four fiscal year twenty twenty one earnings conference call to day's call is being recorded at this time for opening remarks and introductions 
i would like to turn the call over to tages cala director of invested relations in corporete finance please go ahead thank you good afternoon and thank you for joining us speaking 
to day first is apple se e o tim cook and he'll be followed by c f o lucca myistry after that we'll open the call to questions from annalysts please note that some of the information 
you'll hear during our discussion to day will consist of forward looking statements including without limitation those regarding revenue gross margin operating expenses other in.
'''
hypothesis_wav2vec = '''
good day and welcome to the apple cu for fiscal year twenty twenty one earnings conference call to day's call is being recorded at this time for opening remarks and introductions 
i would like to turn the call over to tajuscarlan director of investor relations and corporate finance please go ahead thank you good afternoon and thank you for joining us speaking 
to day first is apple c  o tim cook and he'll be followed by c f o luka maistry after that we'll open the call to questions from analist please note that some of the information you'll hear 
during our discussion to day will consist of forward looking statements including without limitation those regarding revenue gross margin operating expenses other in.
'''

In [111]:
transformation = jiwer.Compose([
    jiwer.ToLowerCase(),
    jiwer.RemoveWhiteSpace(replace_by_space=True),
    jiwer.Strip(),
    jiwer.ExpandCommonEnglishContractions(),
    jiwer.RemoveEmptyStrings(),
    jiwer.RemovePunctuation(),
    jiwer.RemoveMultipleSpaces(),
    jiwer.ReduceToListOfListOfWords(word_delimiter= ' '),
])
('s2t:',round(jiwer.wer(gt,hypothesis_s2t,truth_transform = transformation, hypothesis_transform = transformation)*100,ndigits=2),
'hubert:',round(jiwer.wer(gt,hypothesis_hubert,truth_transform = transformation, hypothesis_transform = transformation)*100,ndigits=2),
'wav2vec:',round(jiwer.wer(gt,hypothesis_wav2vec,truth_transform = transformation, hypothesis_transform = transformation)*100,ndigits=2)
)

('s2t:', 25.21, 'hubert:', 23.53, 'wav2vec:', 20.17)

In [112]:
gt = '''
This fiscal year, we reported three hundred sixty six billion of revenue, which represents thirty three percent annual growth. We also achieved more than twenty percent growth across all of our product 
categories and in every geographic segment. And today, Apple is reporting another very strong quarter. Demand was very robust, and we set a new September quarter record of 
eighty three point four billion, up twenty nine percent from last year and in line with what we discussed on our last call despite larger than expected supply constraints.  
We estimate these constraints had around a six billion revenue dollar impact, driven primarily by industry wide silicon shortages and COVID related manufacturing disruptions. 
Even so, we set an all time record for Mac and quarterly records for iPhone, iPad, Wearables, Home and Accessories, representing thirty percent year over year growth in products.  
Our Services business performed better than we expected, where we hit an all time record of eighteen point three billion and grew twenty six percent year over year.
'''
hypothesis_s2t = '''
this does for year we reported three hundred and sixty six billion dollars in revenue which represents thirty three per cent annual growth we also achieved more than twenty 
per cent growth across all our product categories and in every geographic segment and to day apples reporting a nut. very strong quarter demand was very robust and we set a 
new september quarter record of eighty three point four billion dollars up twenty nine per cent from last year and in line with what we discussed on our last call despite larger 
than expected supply constraints we estimate these constraints. franks had around a six million dollar revenue dollar impact driven primarily by industry wide silicate shortages 
and covet related manufacturing disruptions even so we set an all time record for mac and quarterly records for iphone i paid wearable's home in accessories representing thirty for. 
you're of your growth in products our services business performed better than we expected where we had an all time record of eighteen point three million dollars and grew twenty six per cent year over year.

'''
hypothesis_hubert = '''
this fiscal year we reported three hundred and sixty six billion dollars in revenue which represents thirty three per cent annual growth we also achieved more than twenty per cent 
growth across all ofour product categories and in every geographic segmet and today apple is reporting another very strong quarter demand was very robust and we set a new september 
quarter record of eighty three point four billion dollars up twenty nine per cent from last year and in line with what we discussed on our last call despite larger than expected supply 
constraints we estimate these constraints had around a six billion dollar revenue dollar impact driven primarily by industry wide sillican shortages and coved related manufacturing disruptions 
even so we set an all time record for mak and quarterly records for iphone ipad waraple's home and accessories representing thirty per cent yeary year growth in products our services business 
performed better than we expected where we hit an all time record of eighteen point three billion dollars and grew twenty six per cent year over year.
'''
hypothesis_wav2vec = '''
this fiscal year we reported three hundred and sixty six billion dollars in revenue which represents thirty three per cent annual growth we also achieved more than twenty per cent growth 
across all of our product categories and in every geographic segment and today apple is reporting another very strong quarter demand was very robust and we set a new september quarter record of 
eighty three point four billion dollars of twenty nine per cent from last year and in line with what we discussed on our last call despite larger than expected supply constraints we estimate these 
constraints had around a six billion dollar revenue dollar impact driven primarily by industry wide silican shortages and covet related manufacturing disruptions even so we set an all time record for 
macc and quarterly records for iphone ipad at weraple's home and accessories representing thirty per cent year oer year growth in products our services business performed better than we expected where 
we head an all time record of eighteen point three billion dollars and grew twenty six per cent year over year.
'''

In [113]:
transformation = jiwer.Compose([
    jiwer.ToLowerCase(),
    jiwer.RemoveWhiteSpace(replace_by_space=True),
    jiwer.Strip(),
    jiwer.ExpandCommonEnglishContractions(),
    jiwer.RemoveEmptyStrings(),
    jiwer.RemovePunctuation(),
    jiwer.RemoveMultipleSpaces(),
    jiwer.ReduceToListOfListOfWords(word_delimiter= ' '),
])
('s2t:',round(jiwer.wer(gt,hypothesis_s2t,truth_transform = transformation, hypothesis_transform = transformation)*100,ndigits=2),
'hubert:',round(jiwer.wer(gt,hypothesis_hubert,truth_transform = transformation, hypothesis_transform = transformation)*100,ndigits=2),
'wav2vec:',round(jiwer.wer(gt,hypothesis_wav2vec,truth_transform = transformation, hypothesis_transform = transformation)*100,ndigits=2)
)

('s2t:', 22.35, 'hubert:', 14.71, 'wav2vec:', 14.71)

In [114]:
gt = '''
We set a September quarter revenue record of eighty three point four billion, an increase of nearly nineteen billion or twenty nine percent from a year ago despite larger than expected supply constraints.  
We also reached new q four records in every geographic segment with strong double digit growth in each one of them.
And it was a record September quarter for both products and services. On the products side, revenue was sixty five point one billion, up thirty percent over a year ago as
we experienced better-than-expected demand for our products despite supply constraints that we estimated at around six billion.  
We grew in each of our product categories with an all time record for Mac and September quarter records for iPhone, for iPad and for Wearables, Home and Accessories. This level of sales performance,
combined with the unmatched loyalty of our customers and the strength of our ecosystem, drove our installed base of active devices to a new all time record.  
Our Services set an all time revenue record of eighteen point three billion, up twenty six percent over a year ago, with September quarter records in every geographic segment and in every Services category.  
Company gross margin was forty two point two percent, down one hundred and ten basis points from last quarter due to higher costs and a different mix of products,
partially offset by leverage. Products gross margin was thirty four point three percent, down one hundred seventy basis points sequentially as higher cost structures were partially
offset by leverage and mix. Services gross margin was seventy point five percent, up seventy basis points sequentially, mainly due to a different mix.  
Net income of twenty point six billion and diluted earnings per share of one point two four, both grew over sixty percent year over year and were September quarter records.
'''
hypothesis_s2t = '''
we set a september quarter revenue record of eighty three point four billion and increase of nearly nineteen billion or twenty nine per cent from a year ago despised larger 
than expected supply constrains we also reached new cuefal records in every geographic segment with strong double double doubled. jit drawled in each one of them and he was a
regular september quarter for both products and services on the product side revenue was sixty five point one billion a thirty per cent over a year ago as we experienced better
than expected demand for our products despite. by constraints that we estimated at the round six million dollars we grew in each of our present categories with an all time
record for mac and september quarter records for iphon for eye path and for wearable's home and accessories this level of safe performance come. with the immense loyalty of
our customers and the strength of our equal system drove our installed days of active devices to a new old american our services sat an old time revenue record of eighteen point 
three million up twenty six per cent over a year ago with september. quarter records in every geographic segment and in every serviceous category company gross marching was forty 
two point two per cent down one hundred ten basis points from last quarter due to higher costs and at different mix of products partially offset by leverage products grows margin. 
was thirty four point three per cent down one hundred seventy paces point sequentially a higher costructures were partially offset by leverage and mixed services grows marching was 
seventy point five per cent up seventy paces point sequentially mainly due to a different mix nerienka. of twenty point six billion and a unit earnings for share of a dollar and twenty 
four cents both grew over sixty per cent year over year and were september quarter records.
'''
hypothesis_hubert = '''
we set a september quarter revenue record of eighty three point four billion an increase of nearly nineteen billion or twenty nine per cent from a year ago despite larger than expected 
supply constraints we also reached new cue four records in every geographic segment with strong double digit growth in each one of them and it was a reckled september quarter for 
both products and services on the product's side revenue was sixty five point one billion up thirty per cent over a year ago as we experienced better than expected demand for our 
products despite supply constraints that we estimated at a round six billion dollars we grew in each of our produrt categories with an old time record for mak and september quarter 
records for iphon for ypad and for wearables home and accessories this level of sales performance combined with the unmatched loyalty of our customers and the strength of our eco system 
drove our installed base of active devices to a new ald time record our services set an old time revenue record of eighteen point three billion up twenty six per cent over a year ago with 
september quarter records in every geographic segment and in every services category company gross margin was forty two point two per cent down one hundred ten basis points from last quarter 
due to higher costs and at different mix of products partially offset by leverage product's gross margin was thirty four point three per cent down one hundred seventy basis point sequentially 
as higher cost structures were partially offset by leverages and mik services gross margin was seventy point five per cent up seventy basis point sequentially mainly due to a different mix net 
income of twenty point six billion and diluted earnings per share of a dollar and twenty four cents both grew over sixty per cent year over year and were september quarter record.
'''
hypothesis_wav2vec = '''
we sat a september quarter revenue record of eighty three point four billion an increase of nearly nineteen billion or twenty nine per cent from a year ago despied larger than expected supply 
constrains we also reached new cuw four records in every geographic segment with strong double digit groath in each one of them and it was a recs september quarter for both products and services 
on the product side revenue was sixty five point one billion up thirty per cent over a year ago as we experienced better than expected demand for our products despite suppl constrains that we 
estimated at around six billion dollars we grew in each of our produc categories with an alltime record for mack and september quarter records for iyphone for iypad and for warabols home and 
accessories this level of sale performancs combined with the amatch loyalty of our customers and the strength of our eco system drove our installed base of active devices to a new altime 
record our services sat an all time revenue record of eighteen point three billion up twenty six per cent over a year ago with september uarter records in every geographic segment and in 
every services category company gross margin was forty two point two per cent down one hundred  ten basis points from last quarter due to higher costs and at different mix of products 
partially offset by leverage products gross margin was thirty four point three per cent down one hundred  seventy basis point sequentially as higher cost tructures were partially ofset 
by leverage and mix services gross margin was seventy point five per cent up seventy basis point sequentially mainely due to a different mix net income of twenty point six billion and diluded 
earnings per share of a dollar and twenty four cent both grew over sixty per cent year over year and were september quarter records.
'''

In [115]:
transformation = jiwer.Compose([
    jiwer.ToLowerCase(),
    jiwer.RemoveWhiteSpace(replace_by_space=True),
    jiwer.Strip(),
    jiwer.ExpandCommonEnglishContractions(),
    jiwer.RemoveEmptyStrings(),
    jiwer.RemovePunctuation(),
    jiwer.RemoveMultipleSpaces(),
    jiwer.ReduceToListOfListOfWords(word_delimiter= ' '),
])
('s2t:',round(jiwer.wer(gt,hypothesis_s2t,truth_transform = transformation, hypothesis_transform = transformation)*100,ndigits=2),
'hubert:',round(jiwer.wer(gt,hypothesis_hubert,truth_transform = transformation, hypothesis_transform = transformation)*100,ndigits=2),
'wav2vec:',round(jiwer.wer(gt,hypothesis_wav2vec,truth_transform = transformation, hypothesis_transform = transformation)*100,ndigits=2)
)

('s2t:', 25.49, 'hubert:', 15.36, 'wav2vec:', 18.3)

In [116]:
gt_ex = "Hi, I am at the Upsilon gas station on twenty seventh and Pratt Avenue and my credit card has been declined three times. I know I have room on the card because I paid it off last week."
ht_ex = "Hi I am at the up silent gas station on twenty seventh and brat avenue and my credit card has been designed three times I know have room on the cart because I paid off last week"

In [117]:
np.array([' '.join(i) for i in transformation(gt_ex)]), np.array([' '.join(i) for i in transformation(ht_ex)])

(array(['hi i am at the upsilon gas station on twenty seventh and pratt avenue and my credit card has been declined three times i know i have room on the card because i paid it off last week'],
       dtype='<U181'),
 array(['hi i am at the up silent gas station on twenty seventh and brat avenue and my credit card has been designed three times i know have room on the cart because i paid off last week'],
       dtype='<U177'))

In [133]:
jiwer.compute_measures(truth=gt_ex,hypothesis=ht_ex,truth_transform=transformation,hypothesis_transform=transformation)

{'wer': 0.18421052631578946,
 'mer': 0.1794871794871795,
 'wil': 0.27169274537695587,
 'wip': 0.7283072546230441,
 'hits': 32,
 'substitutions': 4,
 'deletions': 2,
 'insertions': 1}

**WER Formula**

float(S+D+I)/float(H+S+D)

In [107]:
round(jiwer.wer(truth=gt_ex,hypothesis=ht_ex,truth_transform=transformation,hypothesis_transform=transformation)*100,ndigits=2)

18.42