In [1]:
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
from model.config import *
# For adjusting volume 
import soundfile as sf 
import pyloudnorm as pyln 
import numpy as np
import os 
from datasets import load_metric
import librosa 
import edit_distance as ed
from tqdm import tqdm

In [2]:
# To ignore cuda warnings of no gpu
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [3]:
# To ignore pandas warnings
import warnings
warnings.simplefilter(action='ignore', category=UserWarning)
import pandas as pd 

In [4]:
# To display a pandas df 
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199

In [5]:
def segmentLargeArray(inputTensor,chunksize=200000):
    # print(inputTensor)
    list_of_segments = []
    tensor_length = inputTensor.shape[1]
    for i in range(0,tensor_length+1,chunksize):
        list_of_segments.append(inputTensor[:,i:i+chunksize])
    return list_of_segments 

In [6]:
def adjust_volume(ip_tensor,sr):
    data = ip_tensor.numpy()
    # Peak normalization of all audio to -1dB
    meter = pyln.Meter(sr) #create BS.1770 Meter
    # print(data)
    # print(np.transpose(data).shape)
    loudness = meter.integrated_loudness(np.transpose(data)) 
    # print(f'Before: {loudness} dB')
    # This is peak normalization which depends on the original volume of audio file
    # peak_normalized_audio = pyln.normalize.peak(data,-1.0)
    # Actually this is loudness normalization to a fixed level irrespective of volume in original file
    peak_normalized_audio = pyln.normalize.loudness(data, loudness, 0)
    loudness = meter.integrated_loudness(np.transpose(peak_normalized_audio)) 
    # print(f'After peak normalization: {loudness} dB')
    op_tensor = torch.from_numpy(peak_normalized_audio)
    return op_tensor

In [7]:
def predict_from_speech(ip_file,model,processor):
    # print("=> Loading the audio input to the model")
    speech_array, sampling_rate = torchaudio.load(ip_file)
    # print(speech_array,sampling_rate)
    # print('=> Adjusting volume of audio input')
    speech_array = adjust_volume(speech_array,sampling_rate)
    resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
    resampled_array = resampler(speech_array).squeeze()
    if len(resampled_array.shape) == 1:
        resampled_array = resampled_array.reshape([1,resampled_array.shape[0]])
    # print(resampled_array.shape[1])
    if resampled_array.shape[1] >= 200000:
        # print('The input file is longer than 10 seconds')
        # print('Now Predicting ...')
        list_of_segments = segmentLargeArray(resampled_array,chunksize=100000)
        # print(list_of_segments)
        output = ''
        for segment in list_of_segments:
            # print(segment.size())
            if segment.size()[1] > 0:
                logits = model(segment.to(DEVICE)).logits
                # print(logits)
                pred_ids = torch.argmax(logits,dim=-1)[0]
                output += processor.decode(pred_ids)
            else:
                output += ''
        # print(f"Prediction:\n{output}")
    else:
        # print('The input file is less than 10 seconds')
        # print('Now Predicting ...')
        logits = model(resampled_array.to(DEVICE)).logits
        # print(logits)
        pred_ids = torch.argmax(logits, dim = -1)[0]
        output = processor.decode(pred_ids)
        # print(f"Prediction:\n{output}")
    return output

In [93]:
predict_from_speech(test_df.loc[32,'path'],model,processor) 

200000
The input file is longer than 10 seconds


'त्यसैले सिसुलुले अफ्रिका नेसनल काङ्ग्रेसमादबिएर वस्तुभन्दा बरावर व्यवहार हुने पार्ठीमा प्रवेश गरेको बताए।'

In [8]:
def load_evaluation_metrics():
    wer_metric = load_metric("wer")
    cer_metric = load_metric("cer",revision="master")
    return wer_metric,cer_metric

In [9]:
def calculateWER(actual_label, predicted_label):
    # convert string to list
    actual_words = actual_label.split()
    predicted_words = predicted_label.split()
    # costs will hold the costs like in Levenshtein distance algorithm
    costs = [[0 for inner in range(len(predicted_words)+1)] for outer in range(len(actual_words)+1)]
    # backtrace will hold the operations we've done.
    # so we could later backtrace, like the WER algorithm requires us to.
    backtrace = [[0 for inner in range(len(predicted_words)+1)] for outer in range(len(actual_words)+1)]
    # ok means no change, sub means substitution, ins means insertion and del means deletion
    operations = {
        'ok': 0,
        'sub': 1,
        'ins': 2,
        'del': 3
    }
    # penalties for insertion, substitution and deletion
    penalties = {
        'ins': 1,
        'sub': 1,
        'del': 1
    }
    # First column represents the case where we achieve zero predicted labels i-e all the actual labels were deleted 
    for i in range(1,len(actual_words)+1):
        costs[i][0] = penalties['del']*i 
        backtrace[i][0] = operations['del']
    
    # First row represents the case where we achieve the predicted label by inserting all the predicted labels into a zero length actual label i-e all unwanted insertions 
    for j in range(1,len(predicted_words)+1):
        costs[0][j] = penalties['ins']*j 
        backtrace[0][j] = operations['ins']
    
    # computation
    for i in  range(1,len(actual_words)+1):
        for j in range(1,len(predicted_words)+1):
            # no change in predictions and actual label
            if actual_words[i-1] == predicted_words[j-1]:
                costs[i][j] = costs[i-1][j-1]
                backtrace[i][j] = operations['ok']
            else:
                # change has occured
                sub_cost = costs[i-1][j-1] + penalties['sub']
                ins_cost = costs[i][j-1] + penalties['ins']
                del_cost = costs[i-1][j] + penalties['del']
                costs[i][j] = min(sub_cost,ins_cost,del_cost)
                if costs[i][j] == sub_cost:
                    backtrace[i][j] = operations['sub']
                elif costs[i][j] == ins_cost:
                    backtrace[i][j] = operations['ins']
                else: 
                    backtrace[i][j] = operations['del']
    
    # backtrace through the best route
    i = len(actual_words)
    j = len(predicted_words)
    sub_count = 0 
    del_count = 0 
    ins_count = 0 
    correct_count = 0 

    while i > 0 or j > 0:
        if backtrace[i][j] == operations['ok']:
            correct_count += 1
            i -= 1
            j -= 1
        elif backtrace[i][j] == operations['sub']:
            sub_count += 1 
            i -= 1
            j -= 1
        elif backtrace[i][j] == operations['ins']:
            ins_count += 1
            j -= 1
        elif backtrace[i][j] == operations['del']:
            del_count += 1
            i -= 1
    
    """ 
    WER formula: 
    WER = S + D + I / N = S + D I / S + D + C
    """
    wer = round((sub_count + del_count + ins_count)/(sub_count + del_count + correct_count),3)
    # wer = round((sub_count + ins_count + del_count)/(float)(len(actual_words)),3)
    return wer 

In [10]:
def evaluate_metrics_own(model,processor,test_df):
    print(f"=> Loading Metrics")
    wer_metric, cer_metric = load_evaluation_metrics()
    test_files_count = test_df.shape[0]
    print(f"There are total {test_files_count} files")
    wers = []
    cers = [] 
    cer_accuracies = []
    wer_accuracies = []
    # total_wer = 0 
    # total_cer = 0
    # total_cer_accuracy = 0
    # total_wer_accuracy = 0
    test_df['predicted_label'] = ''
    test_df['wer'] = test_df['wer_accuracy'] = test_df['cer'] = test_df['cer_accuracy'] = 0
    # looping in all the audio and labels 
    for i in tqdm(range(0,test_files_count)):
        predicted_label = predict_from_speech(ip_file=test_df['path'][i],model=model,processor=processor)
        test_df.loc[i,'predicted_label'] = predicted_label
        actual_label = test_df['labels'][i]
        # calculating the metrics manually 
        sm = ed.SequenceMatcher(predicted_label,actual_label)
        ed_distance = sm.distance()
        cer = ed_distance / len(actual_label)
        wer = calculateWER(actual_label=actual_label, predicted_label= predicted_label)
        cer_accuracy = 1 - cer 
        wer_accuracy = 1 - wer
        wers.append(wer)
        cers.append(cer)
        cer_accuracies.append(cer_accuracy)
        wer_accuracies.append(wer_accuracy)
        # total_wer += wer 
        # total_cer += cer 
        # total_cer_accuracy += cer_accuracy 
        # total_wer_accuracy += wer_accuracy
        test_df.loc[i,'wer'] = wer 
        test_df.loc[i,'cer'] = cer 
        test_df.loc[i,'wer_accuracy'] = wer_accuracy
        test_df.loc[i,'cer_accuracy'] = cer_accuracy
    
    # just some verifications here
    total_wer = sum(wers)
    total_cer = sum(cers)
    total_cer_accuracy = sum(cer_accuracies)
    total_wer_accuracy = sum(wer_accuracies)

    average_wer = total_wer / test_files_count
    average_cer = total_cer / test_files_count 
    average_cer_accuracy = total_cer_accuracy / test_files_count
    average_wer_accuracy = total_wer_accuracy / test_files_count

    result = {
        'Average WER': average_wer,
        'Average WER Accuracy': average_wer_accuracy,
        'Average CER': average_cer,
        'Average CER Accuracy': average_cer_accuracy,
    }
    return result, test_df 

In [11]:
def evaluate_metrics(model,processor,test_df):
    print(f"=> Loading Metrics")
    wer_metric, cer_metric = load_evaluation_metrics()
    test_files_count = test_df.shape[0]
    print(f"There are total {test_files_count} files")
    total_accuracy = 0
    test_df['predicted_label'] = ''
    test_df['wer'] = test_df['wer_accuracy'] = test_df['cer'] = test_df['cer_accuracy'] = 0
    actual_labels = []
    predicted_labels = []
    # looping in all the audio and labels 
    for i in tqdm(range(0,test_files_count)):
        predicted_label = predict_from_speech(ip_file=test_df['path'][i],model=model,processor=processor)
        test_df.loc[i,'predicted_label'] = predicted_label
        actual_label = test_df['labels'][i]
        wer = wer_metric.compute(predictions=[predicted_label],references=[actual_label])
        cer = cer_metric.compute(predictions=[predicted_label],references=[actual_label])
        wer_accuracy = 1 - wer 
        cer_accuracy = 1 - cer
        predicted_labels.append(predicted_label)
        actual_labels.append(actual_label)
        test_df.loc[i,'wer'] = wer 
        test_df.loc[i,'cer'] = cer 
        test_df.loc[i,'wer_accuracy'] = wer_accuracy
        test_df.loc[i,'cer_accuracy'] = cer_accuracy
    # calculating the metrics 
    average_wer = wer_metric.compute(predictions=predicted_labels, references=actual_labels)
    average_cer = cer_metric.compute(predictions=predicted_labels,references=actual_labels)
    average_cer_accuracy = 1 - average_cer
    average_wer_accuracy = 1 - average_wer 
    
    result = {
        'Average WER': average_wer,
        'Average WER Accuracy': average_wer_accuracy,
        'Average CER': average_cer,
        'Average CER Accuracy': average_cer_accuracy,
    }
    return result, test_df 

In [66]:
 # can be changed to relative paths
model_path = 'D:\Programming\Projects\major_project\Codes\ASR\wav2vec_trained_models\\nepali-wav2vec-v2\models\model-lt-4sec-first-5000' 
processor_path = 'D:\Programming\Projects\major_project\Codes\ASR\wav2vec_trained_models\\nepali-wav2vec-v2\processors\processor_0.1_dropout_lt_4sec_first_5000'
print("=> Loading the trained model and processor")
model = Wav2Vec2ForCTC.from_pretrained(model_path).to(DEVICE)
processor = Wav2Vec2Processor.from_pretrained(processor_path) 
 # ** For testing in an entire test dataset
test_dataset_path = 'D:\Programming\Projects\major_project\Codes\ASR\data\\transcript_durations\dataset_duration_gt_10sec.csv'
# Audio paths for audio directory
audio_path = 'D:\Programming\Projects\major_project\Codes\ASR\data\\audio\\' 
test_df =  pd.read_csv(test_dataset_path,usecols=["path","labels"])
test_df["path"] = audio_path + test_df["path"] + ".flac"

=> Loading the trained model and processor


Special tokens have been added in the vocabulary, make sure the associated word embedding are fine-tuned or trained.


In [16]:
# Looping over all the models to calculate the wer and cer for all model for 100 test data 
model_dir = "D:\Programming\Projects\major_project\Codes\ASR\wav2vec_trained_models\\nepali-wav2vec-v2\models\\"
processor_dir = "D:\Programming\Projects\major_project\Codes\ASR\wav2vec_trained_models\\nepali-wav2vec-v2\processors\\"
models = os.listdir(model_dir)
processors = os.listdir(processor_dir)
# audio and transcript path 
 # ** For testing in an entire test dataset
test_dataset_path = 'D:\Programming\Projects\major_project\Codes\ASR\data\\transcript_durations\dataset_duration_gt_10sec.csv'
# Audio paths for audio directory
audio_path = 'D:\Programming\Projects\major_project\Codes\ASR\data\\audio\\' 
test_df =  pd.read_csv(test_dataset_path,usecols=["path","labels"])
test_df["path"] = audio_path + test_df["path"] + ".flac"
test_df_100 = test_df[:100]
print(f'Test Dataset size: {test_df_100.shape}') 
test_csv_path = "D:\Programming\Projects\major_project\Codes\ASR\wav2vec_trained_models\\test_result_csvs\\"
# Looping over all the models and processors and getting their test scores as well as saving their csv files
results = {}
for i in range(len(models)):
    print(f"For model {i}")
    model = Wav2Vec2ForCTC.from_pretrained(models[i]).to(DEVICE)
    processor = Wav2Vec2ForCTC.from_pretrained(processors[i])
    print("Evaluating metrics ... ")
    result, test_df = evaluate_metrics(model,processor,test_df.copy())
    results[model] = result 
    print("Saving CSV file ... ")
    test_df.to_csv(f'{test_csv_path}{model}.csv',index=False)


(100, 2)

In [67]:
test_df.shape

(213, 2)

In [68]:
test_df.head()

Unnamed: 0,path,labels
0,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_2099_0456476554.flac,विश्व विषयक मन्त्रीस्तरीय अन्तर्राष्ट्रिय सम्मेलनमा भाग लिन बङ्लादेशको राजधानी ढाका प्रस्थान गर्नुअघि त्रिभुवन अन्तराष्ट्रिय विमानस्थलमा सञ्चारकर्मी बिच उनले सो कुरा बताए
1,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_0546_2868510042.flac,राजा ज्ञानेन्द्र शाहले दाइजो बकस दिएको घरजग्गा जबरजस्ती सरकारी ट्रस्टमा राखिएको भन्दै पूर्वअधिराजकुमारी प्रेरणाराज्यलक्ष्मी सिंह न्यायका लागि सर्वोच्च अदालत पुगिन्
2,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_3997_2276753961.flac,आन्तरिक राजस्व कार्यालय भैरहवा अन्तर्गत रुपन्देहीको सिद्धार्थनगर नगरपालिका लुम्बिनी नगरपालिका तिलोत्तमा नगरपालिकामा मूल्य अभिवृद्धि करमा दर्ता छ
3,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_2099_4853806205.flac,वेल्स उन्नाइस सय अन्ठानब्बेमा सान डिएगोमा बसाई सरे र एउटा आवासिय बजारले त्यहाँबाट हटाएपछि फ्लोरिडाको सेन्ट पिटर्सबर्गमा नै पुनर्स्थापित भए
4,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_3960_8205149857.flac,प्रथम सगरमाथा आरोही एडमन्ड हिलारीको सहयोगमा बनेको कुन्दे अस्पतालले पर्यटक तथा खुम्बु क्षेत्रका स्थानीय बासीको उपचारमा ठुलो योगदान दिएको छ


These are just some tests done

In [16]:
wer_metric = load_metric("wer")
wer = wer_metric.compute(predictions=["विश्व विषयक मन्त्रीस्तरीय अन्तर्राष्ट्रि"], references=["विश्व विषयक मन्त्रीस्तरी"])
wer

0.6666666666666666

In [24]:
print(calculateWER(actual_label="विश्व विषयक मन्त्रीस्तरीय अन्तर्राष्ट्रि",predicted_label="विश्व विषयक मन्त्रीस्तरी"))

0.5


In [96]:
result1, test_df1 = evaluate_metrics(model,processor,test_df.copy())
print(f"Result: {result1}")
test_df1.head()

=> Loading Metrics
There are total 213 files


100%|██████████| 213/213 [20:25<00:00,  5.75s/it]

Result: {'Average WER': 0.44130434782608696, 'Average WER Accuracy': 0.558695652173913, 'Average CER': 0.10123588648153799, 'Average CER Accuracy': 0.898764113518462}





Unnamed: 0,path,labels,predicted_label,wer,wer_accuracy,cer,cer_accuracy
0,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_2099_0456476554.flac,विश्व विषयक मन्त्रीस्तरीय अन्तर्राष्ट्रिय सम्मेलनमा भाग लिन बङ्लादेशको राजधानी ढाका प्रस्थान गर्नुअघि त्रिभुवन अन्तराष्ट्रिय विमानस्थलमा सञ्चारकर्मी बिच उनले सो कुरा बताए,विश्व विषयक मन्त्रिस्तरिय अन्तर्राष्ट्रिय सम्मेलनमा भाग लिन बङ्नादेशको राजधानी ढाका पनस्तान गर्नुअघि त्रिभुवन र अन्तर्राष्ट्रिय विभानस्थलमा सञचारकर्मीबिच उनले सो कुरा बताय,0.380952,0.619048,0.082353,0.917647
1,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_0546_2868510042.flac,राजा ज्ञानेन्द्र शाहले दाइजो बकस दिएको घरजग्गा जबरजस्ती सरकारी ट्रस्टमा राखिएको भन्दै पूर्वअधिराजकुमारी प्रेरणाराज्यलक्ष्मी सिंह न्यायका लागि सर्वोच्च अदालत पुगिन्,राजा ज्ञानेन्द्र शाहले दाइजो बकस दिएको घरजग्गा जबजती सरकारी ड्रस्टमा राखिएको भन्दै पूर्व अधिराजकुमारी प्रेरणा राज्य लक्ष्मी सिंहन्यायका लागि सर्वच अतलात पुगिन्,0.5,0.5,0.08589,0.91411
2,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_3997_2276753961.flac,आन्तरिक राजस्व कार्यालय भैरहवा अन्तर्गत रुपन्देहीको सिद्धार्थनगर नगरपालिका लुम्बिनी नगरपालिका तिलोत्तमा नगरपालिकामा मूल्य अभिवृद्धि करमा दर्ता छ,आन्तरिक राजस्व कार्यलय भैरवा अन्तर्गत रूपनदेहीको सिद्धार्थनगरनगरपालिका लुम्बिनी नगरपालिका तिलोत्तमा नगरपालिकामा मूल्य अभिवृद्धि कर्मादर्ता छ,0.411765,0.588235,0.048611,0.951389
3,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_2099_4853806205.flac,वेल्स उन्नाइस सय अन्ठानब्बेमा सान डिएगोमा बसाई सरे र एउटा आवासिय बजारले त्यहाँबाट हटाएपछि फ्लोरिडाको सेन्ट पिटर्सबर्गमा नै पुनर्स्थापित भए,वेल्स उन्नाइस सय अन्छनब्बेमा सान डिएगोमा बसाई सरे र एउटा आवश्य बजारले त्यहाँ बाट हटाएपछि फ्लोरिडागो सेन पिटर्सबर्गमा नै पुनर्स्थापित भए,0.3,0.7,0.065217,0.934783
4,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_3960_8205149857.flac,प्रथम सगरमाथा आरोही एडमन्ड हिलारीको सहयोगमा बनेको कुन्दे अस्पतालले पर्यटक तथा खुम्बु क्षेत्रका स्थानीय बासीको उपचारमा ठुलो योगदान दिएको छ,प्रथम सगर माथा आरोही एडमन हिलारीको सहयोगमा बनेको कुन्दै अस्पतालले पर्यटक तथा खुम्बु क्षेत्रका स्थानीय बासीको उपचारमा ठुलो योगदान दिएको छ,0.2,0.8,0.029197,0.970803


In [97]:
result2, test_df2 = evaluate_metrics_own(model,processor,test_df.copy())
print(f"Result: {result2}")
test_df2.head()

=> Loading Metrics
There are total 213 files


100%|██████████| 213/213 [20:40<00:00,  5.82s/it]

Result: {'Average WER': 0.4905023474178404, 'Average WER Accuracy': 0.5094976525821596, 'Average CER': 0.15350433114167802, 'Average CER Accuracy': 0.8464956688583223}





Unnamed: 0,path,labels,predicted_label,wer,wer_accuracy,cer,cer_accuracy
0,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_2099_0456476554.flac,विश्व विषयक मन्त्रीस्तरीय अन्तर्राष्ट्रिय सम्मेलनमा भाग लिन बङ्लादेशको राजधानी ढाका प्रस्थान गर्नुअघि त्रिभुवन अन्तराष्ट्रिय विमानस्थलमा सञ्चारकर्मी बिच उनले सो कुरा बताए,विश्व विषयक मन्त्रिस्तरिय अन्तर्राष्ट्रिय सम्मेलनमा भाग लिन बङ्नादेशको राजधानी ढाका पनस्तान गर्नुअघि त्रिभुवन र अन्तर्राष्ट्रिय विभानस्थलमा सञचारकर्मीबिच उनले सो कुरा बताय,0.381,0.619,0.082353,0.917647
1,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_0546_2868510042.flac,राजा ज्ञानेन्द्र शाहले दाइजो बकस दिएको घरजग्गा जबरजस्ती सरकारी ट्रस्टमा राखिएको भन्दै पूर्वअधिराजकुमारी प्रेरणाराज्यलक्ष्मी सिंह न्यायका लागि सर्वोच्च अदालत पुगिन्,राजा ज्ञानेन्द्र शाहले दाइजो बकस दिएको घरजग्गा जबजती सरकारी ड्रस्टमा राखिएको भन्दै पूर्व अधिराजकुमारी प्रेरणा राज्य लक्ष्मी सिंहन्यायका लागि सर्वच अतलात पुगिन्,0.5,0.5,0.08589,0.91411
2,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_3997_2276753961.flac,आन्तरिक राजस्व कार्यालय भैरहवा अन्तर्गत रुपन्देहीको सिद्धार्थनगर नगरपालिका लुम्बिनी नगरपालिका तिलोत्तमा नगरपालिकामा मूल्य अभिवृद्धि करमा दर्ता छ,आन्तरिक राजस्व कार्यलय भैरवा अन्तर्गत रूपनदेहीको सिद्धार्थनगरनगरपालिका लुम्बिनी नगरपालिका तिलोत्तमा नगरपालिकामा मूल्य अभिवृद्धि कर्मादर्ता छ,0.412,0.588,0.048611,0.951389
3,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_2099_4853806205.flac,वेल्स उन्नाइस सय अन्ठानब्बेमा सान डिएगोमा बसाई सरे र एउटा आवासिय बजारले त्यहाँबाट हटाएपछि फ्लोरिडाको सेन्ट पिटर्सबर्गमा नै पुनर्स्थापित भए,वेल्स उन्नाइस सय अन्छनब्बेमा सान डिएगोमा बसाई सरे र एउटा आवश्य बजारले त्यहाँ बाट हटाएपछि फ्लोरिडागो सेन पिटर्सबर्गमा नै पुनर्स्थापित भए,0.3,0.7,0.065217,0.934783
4,D:\Programming\Projects\major_project\Codes\ASR\data\audio\nep_3960_8205149857.flac,प्रथम सगरमाथा आरोही एडमन्ड हिलारीको सहयोगमा बनेको कुन्दे अस्पतालले पर्यटक तथा खुम्बु क्षेत्रका स्थानीय बासीको उपचारमा ठुलो योगदान दिएको छ,प्रथम सगर माथा आरोही एडमन हिलारीको सहयोगमा बनेको कुन्दै अस्पतालले पर्यटक तथा खुम्बु क्षेत्रका स्थानीय बासीको उपचारमा ठुलो योगदान दिएको छ,0.2,0.8,0.029197,0.970803


In [98]:
test_df1.to_csv('metrics_result1.csv',index=False)
test_df2.to_csv('metrics_result2.csv',index=False)