In [None]:
!cp -r ../input/python-packages2 ./

In [None]:
!tar xvfz ./python-packages2/jiwer.tgz
!pip install ./jiwer/jiwer-2.3.0-py3-none-any.whl -f ./ --no-index
!tar xvfz ./python-packages2/normalizer.tgz
!pip install ./normalizer/bnunicodenormalizer-0.0.24.tar.gz -f ./ --no-index
!tar xvfz ./python-packages2/pyctcdecode.tgz
!pip install ./pyctcdecode/attrs-22.1.0-py2.py3-none-any.whl -f ./ --no-index --no-deps
!pip install ./pyctcdecode/exceptiongroup-1.0.0rc9-py3-none-any.whl -f ./ --no-index --no-deps
!pip install ./pyctcdecode/hypothesis-6.54.4-py3-none-any.whl -f ./ --no-index --no-deps
!pip install ./pyctcdecode/numpy-1.21.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl -f ./ --no-index --no-deps
!pip install ./pyctcdecode/pygtrie-2.5.0.tar.gz -f ./ --no-index --no-deps
!pip install ./pyctcdecode/sortedcontainers-2.4.0-py2.py3-none-any.whl -f ./ --no-index --no-deps
!pip install ./pyctcdecode/pyctcdecode-0.4.0-py2.py3-none-any.whl -f ./ --no-index --no-deps

!tar xvfz ./python-packages2/pypikenlm.tgz
!pip install ./pypikenlm/pypi-kenlm-0.1.20220713.tar.gz -f ./ --no-index --no-deps



In [None]:
pip install transformers==4.20.0 

In [None]:
import os
import numpy as np
from tqdm.auto import tqdm
from glob import glob
from transformers import AutoFeatureExtractor, pipeline
import pandas as pd
import librosa
import IPython
from tqdm.auto import tqdm
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset
import torch
import gc
import wave
from scipy.io import wavfile
import scipy.signal as sps
import pyctcdecode
from transformers import Wav2Vec2ProcessorWithLM
from bnunicodenormalizer import Normalizer 

tqdm.pandas()
import warnings
warnings.filterwarnings("ignore")


# Load Model

In [None]:
class CFG:
    my_model_name = '../input/yellowking-dlsprint-model/YellowKing_model'
    processor_name = '../input/yellowking-dlsprint-model/YellowKing_processor'
    
processor = Wav2Vec2ProcessorWithLM.from_pretrained(CFG.processor_name)

In [None]:
my_asrLM = pipeline("automatic-speech-recognition", model=CFG.my_model_name ,feature_extractor =processor.feature_extractor, tokenizer= processor.tokenizer,decoder=processor.decoder ,device=0)

# Function declarations

In [None]:
def infer(audio_path):
    speech, sr = librosa.load(audio_path, sr=processor.feature_extractor.sampling_rate)

    my_LM_prediction = my_asrLM(
                speech, chunk_length_s=112, stride_length_s=None
            )

    return my_LM_prediction['text']


def batch_infer(audio_paths, batch_size):
    '''
    infers on a batch of audio
    args:
      audio_paths  : list of path to audio files <list of string>
    returns:
      bangla predicted texts <list of string>
    '''
    results = []
    for path in audio_paths:
        results.append(infer(path))
    
    return results


bnorm = Normalizer()
def normalize(sen):
    _words = [bnorm(word)['normalized']  for word in sen.split()]
    return " ".join([word for word in _words if word is not None])

def dari(sentence):
    try:
        if sentence[-1]!="ред":
            sentence+="ред"
    except:
        print(sentence)
    return sentence


def directory_infer(folder_path, batch_size):
    # audios = os.listdir(folder_path)
    audios = [f for f in os.listdir(folder_path) if f.endswith('.wav')]
    
    sentences = []
    
    for idx in tqdm(range(0,len(audios),batch_size)):
        batch_paths = [ os.path.join(folder_path, audio) for audio in audios[idx:idx+batch_size] ]
        sentences+=batch_infer(batch_paths, batch_size)
    
    df = pd.DataFrame({'file_name' : audios, 'transcriptions' : sentences})
    df.transcriptions= df.transcriptions.apply(lambda x:normalize(x))
    df.transcriptions= df.transcriptions.apply(lambda x:dari(x))
    
    return df

In [None]:
# district_serial = {
#     'rangpur':1,
#     'kishoreganj':2,
#     'narail':3,
#     'chittagong':4,
#     'narsingdi':5,
#     'tangail':6,
#     'habiganj':7,
#     'barishal':8,
#     'sandwip':10,
#     'sylhet':9,
# }
# BATCH_SIZE = 16

# for district in district_serial:
#     input_path = os.path.join("/kaggle/input/only-dis/only_dis/only_dis", district)
#     print("====================================================================")
#     print("Staritng:", district)
    
#     submission = directory_infer(input_path, BATCH_SIZE)
#     submission.to_csv(f"2.{district_serial[district]}: {district}-yellowking_inference.csv", index=False)
    
#     print("Exporting:", f"2.{district_serial[district]}: {district}-yellowking_inference.csv")
#     print("====================================================================")
#     print()

BATCH_SIZE = 16
input_path = "/kaggle/input/final-splits/final_splits/test"
submission = directory_infer(input_path, BATCH_SIZE)
submission.to_csv("test_inference.csv", index=False)
submission

In [None]:
import pandas as pd
df = pd.read_excel('/kaggle/input/final-splits/final_splits/test/test.xlsx')
df = df.rename(columns={'transcriptions': 'Ground_Truth'})
df

In [None]:
merged_df = pd.merge(df, submission, on="file_name")
merged_df['Model_Name'] = 'Yellowking'
merged_df = merged_df[['Model_Name','district','file_name','External_ID','transcriptions', 'Ground_Truth']]
merged_df = merged_df.rename(columns={'transcriptions': 'Prediction','district': 'District'})


merged_df