# Load Model, Real Predictions

 - load wav file, STT

In [None]:
# 필요 import 문

import urllib3
import json
import base64
import kss
import torch
from torch import nn
import torch.nn.functional as F
from transformers import BertModel, BertTokenizer
import gluonnlp as nlp
from tqdm.notebook import tqdm
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from kobert_tokenizer import KoBERTTokenizer

In [None]:
## STT 관련 필요 코드

openApiURL = "http://aiopen.etri.re.kr:8000/WiseASR/Recognition"
accessKey = "624e30a6-6843-4f56-a417-3685c5ceda16"  # replace with your key
languageCode = "korean"
http = urllib3.PoolManager()

In [None]:
##### STT 변환 함수 정의 #####

def wav_to_stt(filename, openApiURL, languageCode, accessKey, http):
    result = []

    with open(filename, "rb") as file:
        audioContents = base64.b64encode(file.read()).decode("utf8")

    requestJson = {
        "argument": {
            "language_code": languageCode,
            "audio": audioContents
        }
    }

    response = http.request(
        "POST",
        openApiURL,
        headers={
            "Content-Type": "application/json; charset=UTF-8",
            "Authorization": accessKey
        },
        body=json.dumps(requestJson)
    )

    try:
        response_body = json.loads(response.data.decode('utf-8'))
    except json.JSONDecodeError:
        print("Failed to decode the response as JSON:")
        raise

    stt_result = response_body.get('return_object', {}).get('recognized', '')
    result = stt_result

    return result

In [None]:
def stt_to_kss(result):
    sentences = kss.split_sentences(result)
    return sentences

In [22]:
##### KoBERT 모델 관련 코드 #####

tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
bertmodel = BertModel.from_pretrained('skt/kobert-base-v1', return_dict=False)
vocab = nlp.vocab.BERTVocab.from_sentencepiece(tokenizer.vocab_file, padding_token='[PAD]')

class BERTClassifier(nn.Module):
    def __init__(self, bert, hidden_size=768, num_classes=2, dr_rate=None, params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate

        self.classifier = nn.Linear(hidden_size, num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)

    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)

        _, pooler = self.bert(input_ids=token_ids, token_type_ids=segment_ids.long(), attention_mask=attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

['네, 카드번호와 명의자 통화 부탁드립니다.', '확인되었습니다.', '저는 추가로 넣어드렸습니다.', '더 필요한 사항 없으십니까?', '태양교육상담사 김민재였습니다.', '감사합니다.']
Sentence 1: 네, 카드번호와 명의자 통화 부탁드립니다.
Predicted Label: 1
Predicted Probabilities:
 [0.00862525 0.9913748 ]
----------
Sentence 2: 확인되었습니다.
Predicted Label: 1
Predicted Probabilities:
 [0.00456362 0.9954364 ]
----------
Sentence 3: 저는 추가로 넣어드렸습니다.
Predicted Label: 0
Predicted Probabilities:
 [0.98966867 0.01033128]
----------
Sentence 4: 더 필요한 사항 없으십니까?
Predicted Label: 1
Predicted Probabilities:
 [0.01941818 0.9805818 ]
----------
Sentence 5: 태양교육상담사 김민재였습니다.
Predicted Label: 1
Predicted Probabilities:
 [0.00408685 0.9959131 ]
----------
Sentence 6: 감사합니다.
Predicted Label: 1
Predicted Probabilities:
 [0.00311304 0.9968869 ]
----------
Predicted Labels: tensor([1, 1, 0, 1, 1, 1], device='cuda:0')
Predicted Probabilities:
 tensor([[0.0086, 0.9914],
        [0.0046, 0.9954],
        [0.9897, 0.0103],
        [0.0194, 0.9806],
        [0.0041, 0.9959],
        [0.0031, 0.9969]], device

In [None]:
# 예측 후 확률값을 출력하는 함수

def predict_and_print(sentences):
    model.eval()

    # Tokenize all sentences at once
    tokenized_sent = tokenizer(
        sentences,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=128
    )

    token_ids = tokenized_sent['input_ids'].to(device)
    segment_ids = tokenized_sent['token_type_ids'].to(device)
    valid_length = torch.tensor([len(tokenizer.tokenize(sentence)) for sentence in sentences]).to(device)

    with torch.no_grad():
        logits = model(token_ids, valid_length, segment_ids)
        probabilities = F.softmax(logits, dim=1)
        _, predicted = torch.max(logits, 1)

    # Print the results
    for i, (sentence, label, probs) in enumerate(zip(sentences, predicted, probabilities)):
        print(f"Sentence {i + 1}: {sentence}")
        print("Predicted Label:", label.item())
        print("Predicted Probabilities:\n", probs.cpu().numpy())
        print("----------")

    print("Predicted Labels:", predicted)
    print("Predicted Probabilities:\n", probabilities)


In [None]:
# 그래픽 카드 사용

if torch.cuda.is_available():
    device = 'cuda:0'
#     print('현재 가상환경 GPU 사용 가능상태')
else:
    device = 'cpu'
#     print('GPU 사용 불가능 상태')

In [None]:
# model 정의

model = BERTClassifier(bertmodel, dr_rate=0.5).to(device)

In [None]:
# Load the model weights
model_path = './vp_text_classification_model.pt'  # Path to your trained model checkpoint
model.load_state_dict(torch.load(model_path))

In [None]:
# wav 파일 불러오기
filename = './counsel_merge_1 (mp3cut.net).wav'

In [None]:
# wav to stt, stt to kss 진행

stt_result = wav_to_stt(filename, openApiURL, languageCode, accessKey, http)
sentences = stt_to_kss(stt_result)
# print(sentences)
predict_and_print(sentences)

- predicted_probs는 각 클래스에 대한 확률을 나타냄
- 예를 들어, `[0.9985, 0.0015]`와 같은 출력
  - 첫 번째 클래스의 확률이 99.85%이고 두 번째 클래스의 확률이 0.15%임을 의미