In [1]:
import librosa
import numpy as np

import os
import time

import boto3
from botocore.exceptions import ClientError

import gluonnlp as nlp

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from transformers import AutoTokenizer
from kobert.pytorch_kobert import get_pytorch_kobert_model
from kobert.utils import get_tokenizer

from torchvision.models.resnet import ResNet, BasicBlock

In [2]:
import pandas as pd

In [3]:
# BERT 모델, Vocabulary 불러오기
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size=768,
                 num_classes=6,  ##클래스 수 조정##
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate

        self.classifier = nn.Linear(hidden_size, num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)

    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)

        _, pooler = self.bert(input_ids=token_ids, token_type_ids=segment_ids.long(),
                              attention_mask=attention_mask.float().to(token_ids.device), return_dict=False)
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)


In [4]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, vocab, max_len,
                 pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, vocab=vocab, pad=pad, pair=pair)

        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i],))

    def __len__(self):
        return (len(self.labels))


In [5]:
class ResNetModel(ResNet):
    def __init__(self, num_classes=6):
        super(ResNetModel, self).__init__(BasicBlock, [2, 2, 2, 2], num_classes=num_classes)
        self.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [6]:
device = torch.device('cpu')

In [7]:
# Setting prediction parameters
max_len = 60
batch_size = 64
learning_rate = 5e-5

print("Loading BERT model...")
# Load pre-trained model (weights)
bertmodel, vocab = get_pytorch_kobert_model()

# Load tokenizer from a local directory
# kobert_tokenizer = AutoTokenizer.from_pretrained("kobert_tokenizer", use_fast=False)
# tok = kobert_tokenizer.tokenize
print("Loading BERT tokenizer...")
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

PATH = '/Users/yoohajun/Library/Mobile Documents/com~apple~CloudDocs/Hajun/Graduate_project/fastapi/KoBERT/'
kobert_model = BERTClassifier(bertmodel, dr_rate=0.5)
kobert_model.load_state_dict(torch.load(PATH + 'model_state_dict.pt', map_location=device))

Loading BERT model...
using cached model. /Users/yoohajun/Library/Mobile Documents/com~apple~CloudDocs/Hajun/Graduate_project/fastapi/Diffusion/.cache/kobert_v1.zip
using cached model. /Users/yoohajun/Library/Mobile Documents/com~apple~CloudDocs/Hajun/Graduate_project/fastapi/Diffusion/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece
Loading BERT tokenizer...
using cached model. /Users/yoohajun/Library/Mobile Documents/com~apple~CloudDocs/Hajun/Graduate_project/fastapi/Diffusion/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


<All keys matched successfully>

In [8]:
# Define the path to the saved model
model_path = '/Users/yoohajun/Library/Mobile Documents/com~apple~CloudDocs/Hajun/Graduate_project/fastapi/resnet-model/pytorch_resnet.pt'
# Load the saved model
checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
resnet_model = ResNetModel(num_classes=6)
resnet_model.load_state_dict(checkpoint['model_state_dict'])


<All keys matched successfully>

In [9]:
def scale_to_range(arr, target_range=(0, 1)):
    # Calculate exponential values for each element
    exp_arr = np.exp(arr)

    # Calculate the sum of all exponential values
    exp_sum = np.sum(exp_arr)

    # Calculate probability for each element by dividing its exponential value by the sum
    probs = exp_arr / exp_sum

    # Scale probabilities to target range
    scaled = (probs * (target_range[1] - target_range[0])) + target_range[0]

    return scaled

# decision diffusion을 수행할 때 range scaling을 하는 이유는 균일하게 특성을 통일해야한다고 생각했지만
# 오히려 scaling을 하게 되면 그 특성을 손상시키게 된다
# 정확도가 89 -> 82 로 낙하 (기본 Decision Tree)

In [10]:
def softmax(vals, idx):
    valscpu = vals.cpu().detach().squeeze(0)
    a = 0
    for i in valscpu:
        a += np.exp(i)
    return ((np.exp(valscpu[idx])) / a).item() * 100


In [11]:
def text_predict(predict_sentence, model=kobert_model):
    # print("predictsentence start:", predict_sentence)
    start = time.time()
    text_label = ['regular', 'help', 'robbery', 'sexual', 'theft', 'violence']
    data = [predict_sentence]
    # dataset_another = [data]

    transform = nlp.data.BERTSentenceTransform(tok, max_len, pad=True, pair=False)
    tokenized = transform(data)
    model.eval()

    # print([tokenized[0]])
    # token_ids = torch.tensor([tokenized[0]]).to(device)
    # segment_ids = torch.tensor([tokenized[2]]).to(device)
    token_ids = torch.tensor(np.array([tokenized[0]])).to(device)
    valid_length = [tokenized[1]]
    segment_ids = torch.tensor(np.array([tokenized[2]])).to(device)

    result = model(token_ids, valid_length, segment_ids)
    # print(result)
    idx = result.argmax().cpu().item()
    out_prob = result.detach().cpu().numpy()[0]

    # scaled_prob = scale_to_range(out_prob)
    # print(out_prob)
    # print("대사의 카테고리는:", text_label[idx])
    # print("대사 신뢰도는:", "{:.2f}%".format(softmax(result, idx)))
    end = time.time() - start
    # print("text predict 걸린 시간:", end)
    return out_prob

In [12]:
def audio_predict(file_location, model=resnet_model):
    try:
        audio_data, sr = librosa.load(file_location, sr=44100, duration=5)

        # Define label names
        label_names = ['regular', 'help', 'robbery', 'sexual', 'theft', 'violence']

        # Calculate the spectrogram of the audio data
        spec = librosa.feature.melspectrogram(y=audio_data, sr=sr)

        # Convert the spectrogram to decibels
        spec_db = librosa.power_to_db(spec, ref=np.max)

        # Add an additional channel to the spectrogram
        spec_db = np.repeat(spec_db[:, :, np.newaxis], 4, axis=2)

        # Resize the spectrogram to match the input shape of the model
        spec_resized = np.resize(spec_db, (1, 4, 128, 128))

        # Normalize the spectrogram by z-score
        mean = np.mean(spec_resized)
        std = np.std(spec_resized)
        spec_resized = (spec_resized - mean) / std

        # Convert the spectrogram to a tensor and move it to the device
        spectrogram_tensor = torch.tensor(spec_resized, dtype=torch.float).to(device)

        # Set the model to evaluation mode
        model.eval()

        # Predict the probabilities for each class
        with torch.no_grad():
            out = model(spectrogram_tensor)

        # Get the index of the class with the highest probability
        predicted_class_index = torch.argmax(out, dim=1)

        label_index = predicted_class_index.item()

        # print("음성의 카테고리는:", label_names[label_index])
        # print("음성 신뢰도는:", "{:.2f}%".format(softmax(out, label_index)))
        prob = out.detach().cpu().numpy()[0]
        # scaled_prob = scale_to_range(prob)

        return prob

    except Exception as e:
        print(f'Error: {e}')
        return None

In [13]:
df = pd.read_csv('./audio_text_diffusion_train.csv', encoding='utf8')

In [14]:
df

Unnamed: 0.1,Unnamed: 0,audio_id,audio_dir,note,category_02
0,0,15.실내_906628_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,,실내
1,1,15.실내_903897_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,,실내
2,2,15.실내_918134_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,,실내
3,3,15.실내_654528_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,,실내
4,4,15.실내_661859_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,,실내
...,...,...,...,...,...
402,402,3.절도범죄_1240207_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,소매치기 잡아라,절도범죄
403,403,3.절도범죄_1240211_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,소매치기 잡아라,절도범죄
404,404,3.절도범죄_1240983_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,저 놈 잡아라,절도범죄
405,405,3.절도범죄_1240989_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,저 놈 잡아라,절도범죄


In [15]:
# 음성 파일 id, 음성 파일 경로, note, 음성 파일 븐류 -> 1차 csv

# 오디오 확률 리스트, 텍스트 확률 리스트, 라벨

# 리스트 nested 해제 , 라벨

# diffusion layer ml 학습 - decision tree

In [16]:
# define a function to apply to each row of the DataFrame
def add_prediction(row):
    # call the audio_predict function with the audio_dir value
    audio_prediction = audio_predict(row['audio_dir'])
    # add the prediction values as a new column to the DataFrame
    if audio_prediction is not None:
        # add the prediction values as a new column to the DataFrame
        # temp = scale_to_range(audio_prediction)
        # temp_arr = temp.tolist()
        # row['audio_prediction'] = temp_arr
        row['audio_prediction'] = audio_prediction.tolist()
    else:
        row['audio_prediction'] = None

    text_prediction = text_predict(row['note'])

    if text_prediction is not None:
        # add the prediction values as a new column to the DataFrame
        row['text_prediction'] = text_prediction.tolist()
    else:
        row['text_prediction'] = None

    return row


In [17]:
df

Unnamed: 0.1,Unnamed: 0,audio_id,audio_dir,note,category_02
0,0,15.실내_906628_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,,실내
1,1,15.실내_903897_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,,실내
2,2,15.실내_918134_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,,실내
3,3,15.실내_654528_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,,실내
4,4,15.실내_661859_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,,실내
...,...,...,...,...,...
402,402,3.절도범죄_1240207_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,소매치기 잡아라,절도범죄
403,403,3.절도범죄_1240211_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,소매치기 잡아라,절도범죄
404,404,3.절도범죄_1240983_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,저 놈 잡아라,절도범죄
405,405,3.절도범죄_1240989_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,저 놈 잡아라,절도범죄


In [18]:
# fill NaN values with empty string
df = df.fillna('empty')

In [19]:
from tqdm import tqdm
tqdm.pandas()
# apply the add_prediction function to each row of the DataFrame
df = df.progress_apply(add_prediction, axis=1)

  0%|          | 0/407 [00:00<?, ?it/s][W NNPACK.cpp:79] Could not initialize NNPACK! Reason: Unsupported hardware.
  spec_resized = (spec_resized - mean) / std
  audio_data, sr = librosa.load(file_location, sr=44100, duration=5)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error: [Errno 2] No such file or directory: '/Users/yoohajun/Desktop/grad_audio/diffusion/train/15.실내_688645_label.wav'


100%|██████████| 407/407 [02:35<00:00,  2.61it/s]


In [20]:
# print the resulting DataFrame with the new 'prediction' column
df.head()

Unnamed: 0.1,Unnamed: 0,audio_id,audio_dir,note,category_02,audio_prediction,text_prediction
0,0,15.실내_906628_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[-11.508453369140625, -8.252114295959473, -23....","[1.985169768333435, -1.2518784999847412, 1.072..."
1,1,15.실내_903897_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[17.464433670043945, -21.854764938354492, -7.6...","[1.985169768333435, -1.2518784999847412, 1.072..."
2,2,15.실내_918134_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[3.1379899978637695, 1.7926372289657593, -12.4...","[1.985169768333435, -1.2518784999847412, 1.072..."
3,3,15.실내_654528_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[28.44495964050293, -16.67407989501953, -4.198...","[1.985169768333435, -1.2518784999847412, 1.072..."
4,4,15.실내_661859_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[nan, nan, nan, nan, nan, nan]","[1.985169768333435, -1.2518784999847412, 1.072..."


In [21]:
df.to_csv('./audio_text_label.csv')

In [22]:
df

Unnamed: 0.1,Unnamed: 0,audio_id,audio_dir,note,category_02,audio_prediction,text_prediction
0,0,15.실내_906628_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[-11.508453369140625, -8.252114295959473, -23....","[1.985169768333435, -1.2518784999847412, 1.072..."
1,1,15.실내_903897_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[17.464433670043945, -21.854764938354492, -7.6...","[1.985169768333435, -1.2518784999847412, 1.072..."
2,2,15.실내_918134_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[3.1379899978637695, 1.7926372289657593, -12.4...","[1.985169768333435, -1.2518784999847412, 1.072..."
3,3,15.실내_654528_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[28.44495964050293, -16.67407989501953, -4.198...","[1.985169768333435, -1.2518784999847412, 1.072..."
4,4,15.실내_661859_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[nan, nan, nan, nan, nan, nan]","[1.985169768333435, -1.2518784999847412, 1.072..."
...,...,...,...,...,...,...,...
402,402,3.절도범죄_1240207_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,소매치기 잡아라,절도범죄,"[-1.3769102096557617, 4.8432111740112305, -11....","[-0.8364552855491638, -0.4790562391281128, -0...."
403,403,3.절도범죄_1240211_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,소매치기 잡아라,절도범죄,"[-4.723093509674072, -8.860213279724121, -11.5...","[-0.8364552855491638, -0.4790562391281128, -0...."
404,404,3.절도범죄_1240983_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,저 놈 잡아라,절도범죄,"[-4.010353088378906, -3.603227138519287, -10.2...","[-1.1488778591156006, 0.18310293555259705, -0...."
405,405,3.절도범죄_1240989_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,저 놈 잡아라,절도범죄,"[-15.072626113891602, 3.24332594871521, -8.771...","[-1.1488778591156006, 0.18310293555259705, -0...."


## Decision Diffusion Method with ML

In [23]:
import pandas as pd
import ast

In [24]:
df = pd.read_csv('./audio_text_label.csv', encoding='utf8')

In [25]:
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,audio_id,audio_dir,note,category_02,audio_prediction,text_prediction
0,0,0,15.실내_906628_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[-11.508453369140625, -8.252114295959473, -23....","[1.985169768333435, -1.2518784999847412, 1.072..."
1,1,1,15.실내_903897_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[17.464433670043945, -21.854764938354492, -7.6...","[1.985169768333435, -1.2518784999847412, 1.072..."
2,2,2,15.실내_918134_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[3.1379899978637695, 1.7926372289657593, -12.4...","[1.985169768333435, -1.2518784999847412, 1.072..."
3,3,3,15.실내_654528_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[28.44495964050293, -16.67407989501953, -4.198...","[1.985169768333435, -1.2518784999847412, 1.072..."
4,4,4,15.실내_661859_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[nan, nan, nan, nan, nan, nan]","[1.985169768333435, -1.2518784999847412, 1.072..."


In [26]:
df['audio_prediction'].head()

0    [-11.508453369140625, -8.252114295959473, -23....
1    [17.464433670043945, -21.854764938354492, -7.6...
2    [3.1379899978637695, 1.7926372289657593, -12.4...
3    [28.44495964050293, -16.67407989501953, -4.198...
4                       [nan, nan, nan, nan, nan, nan]
Name: audio_prediction, dtype: object

In [27]:
df['audio_prediction'][0]

'[-11.508453369140625, -8.252114295959473, -23.207284927368164, 3.5422844886779785, 8.454593658447266, -5.967389106750488]'

In [28]:
df['audio_prediction'] = df['audio_prediction'].apply(lambda x: ast.literal_eval(str(x).replace('nan', 'None')))
df['text_prediction'] = df['text_prediction'].apply(lambda x: ast.literal_eval(str(x).replace('nan', 'None')))

In [29]:
del df['Unnamed: 0']
del df['Unnamed: 0.1']

In [30]:
df

Unnamed: 0,audio_id,audio_dir,note,category_02,audio_prediction,text_prediction
0,15.실내_906628_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[-11.508453369140625, -8.252114295959473, -23....","[1.985169768333435, -1.2518784999847412, 1.072..."
1,15.실내_903897_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[17.464433670043945, -21.854764938354492, -7.6...","[1.985169768333435, -1.2518784999847412, 1.072..."
2,15.실내_918134_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[3.1379899978637695, 1.7926372289657593, -12.4...","[1.985169768333435, -1.2518784999847412, 1.072..."
3,15.실내_654528_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[28.44495964050293, -16.67407989501953, -4.198...","[1.985169768333435, -1.2518784999847412, 1.072..."
4,15.실내_661859_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,empty,실내,"[None, None, None, None, None, None]","[1.985169768333435, -1.2518784999847412, 1.072..."
...,...,...,...,...,...,...
402,3.절도범죄_1240207_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,소매치기 잡아라,절도범죄,"[-1.3769102096557617, 4.8432111740112305, -11....","[-0.8364552855491638, -0.4790562391281128, -0...."
403,3.절도범죄_1240211_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,소매치기 잡아라,절도범죄,"[-4.723093509674072, -8.860213279724121, -11.5...","[-0.8364552855491638, -0.4790562391281128, -0...."
404,3.절도범죄_1240983_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,저 놈 잡아라,절도범죄,"[-4.010353088378906, -3.603227138519287, -10.2...","[-1.1488778591156006, 0.18310293555259705, -0...."
405,3.절도범죄_1240989_label,/Users/yoohajun/Desktop/grad_audio/diffusion/t...,저 놈 잡아라,절도범죄,"[-15.072626113891602, 3.24332594871521, -8.771...","[-1.1488778591156006, 0.18310293555259705, -0...."


In [31]:
new_df = df[['audio_prediction', 'text_prediction', 'category_02']]

In [32]:
new_df['audio_prediction'][0]

[-11.508453369140625,
 -8.252114295959473,
 -23.207284927368164,
 3.5422844886779785,
 8.454593658447266,
 -5.967389106750488]

In [33]:
new_df

Unnamed: 0,audio_prediction,text_prediction,category_02
0,"[-11.508453369140625, -8.252114295959473, -23....","[1.985169768333435, -1.2518784999847412, 1.072...",실내
1,"[17.464433670043945, -21.854764938354492, -7.6...","[1.985169768333435, -1.2518784999847412, 1.072...",실내
2,"[3.1379899978637695, 1.7926372289657593, -12.4...","[1.985169768333435, -1.2518784999847412, 1.072...",실내
3,"[28.44495964050293, -16.67407989501953, -4.198...","[1.985169768333435, -1.2518784999847412, 1.072...",실내
4,"[None, None, None, None, None, None]","[1.985169768333435, -1.2518784999847412, 1.072...",실내
...,...,...,...
402,"[-1.3769102096557617, 4.8432111740112305, -11....","[-0.8364552855491638, -0.4790562391281128, -0....",절도범죄
403,"[-4.723093509674072, -8.860213279724121, -11.5...","[-0.8364552855491638, -0.4790562391281128, -0....",절도범죄
404,"[-4.010353088378906, -3.603227138519287, -10.2...","[-1.1488778591156006, 0.18310293555259705, -0....",절도범죄
405,"[-15.072626113891602, 3.24332594871521, -8.771...","[-1.1488778591156006, 0.18310293555259705, -0....",절도범죄


In [34]:
target = new_df['category_02']

In [35]:
# audio_prob_df

In [36]:
audio_prob_df = new_df['audio_prediction'].apply(pd.Series)

  audio_prob_df = new_df['audio_prediction'].apply(pd.Series)


In [37]:
audio_prob_df

Unnamed: 0,0,1,2,3,4,5
0,-11.508453,-8.252114,-23.207285,3.542284,8.454594,-5.967389
1,17.464434,-21.854765,-7.678674,-4.887698,-21.380985,-1.747072
2,3.137990,1.792637,-12.433538,-23.451603,1.165656,8.441647
3,28.444960,-16.674080,-4.198120,-16.081032,-18.207804,-3.304222
4,,,,,,
...,...,...,...,...,...,...
402,-1.376910,4.843211,-11.274698,-16.632980,2.652317,3.715904
403,-4.723094,-8.860213,-11.551517,-16.616116,-9.978647,20.510338
404,-4.010353,-3.603227,-10.285007,-21.263302,-0.790818,12.896317
405,-15.072626,3.243326,-8.771307,-17.382805,6.553047,4.722164


In [38]:
c = ['audio_prob_{}'.format(i) for i in range(6)]
audio_prob_df.columns = c
audio_prob_df

Unnamed: 0,audio_prob_0,audio_prob_1,audio_prob_2,audio_prob_3,audio_prob_4,audio_prob_5
0,-11.508453,-8.252114,-23.207285,3.542284,8.454594,-5.967389
1,17.464434,-21.854765,-7.678674,-4.887698,-21.380985,-1.747072
2,3.137990,1.792637,-12.433538,-23.451603,1.165656,8.441647
3,28.444960,-16.674080,-4.198120,-16.081032,-18.207804,-3.304222
4,,,,,,
...,...,...,...,...,...,...
402,-1.376910,4.843211,-11.274698,-16.632980,2.652317,3.715904
403,-4.723094,-8.860213,-11.551517,-16.616116,-9.978647,20.510338
404,-4.010353,-3.603227,-10.285007,-21.263302,-0.790818,12.896317
405,-15.072626,3.243326,-8.771307,-17.382805,6.553047,4.722164


In [39]:
audio_means = audio_prob_df.mean()

In [40]:
audio_prob_df = audio_prob_df.fillna(value=audio_means)

In [41]:
v = np.column_stack([df.audio_prediction.values.tolist()])
v[0][0]

  arr = asanyarray(v)


[-11.508453369140625,
 -8.252114295959473,
 -23.207284927368164,
 3.5422844886779785,
 8.454593658447266,
 -5.967389106750488]

In [42]:
text_prob_df = new_df['text_prediction'].apply(pd.Series)
c = ['text_prob_{}'.format(i) for i in range(6)]
text_prob_df.columns = c
text_prob_df

Unnamed: 0,text_prob_0,text_prob_1,text_prob_2,text_prob_3,text_prob_4,text_prob_5
0,1.985170,-1.251878,1.072886,-1.533072,-1.373477,1.073765
1,1.985170,-1.251878,1.072886,-1.533072,-1.373477,1.073765
2,1.985170,-1.251878,1.072886,-1.533072,-1.373477,1.073765
3,1.985170,-1.251878,1.072886,-1.533072,-1.373477,1.073765
4,1.985170,-1.251878,1.072886,-1.533072,-1.373477,1.073765
...,...,...,...,...,...,...
402,-0.836455,-0.479056,-0.917581,-0.670842,3.972445,-0.816911
403,-0.836455,-0.479056,-0.917581,-0.670842,3.972445,-0.816911
404,-1.148878,0.183103,-0.968534,-0.681608,3.823746,-0.802174
405,-1.148878,0.183103,-0.968534,-0.681608,3.823746,-0.802174


In [43]:
text_means = text_prob_df.mean()

In [44]:
text_prob_df = text_prob_df.fillna(value=text_means)

In [45]:
# 데이터프레임을 병렬로 연결하기
result = pd.concat([audio_prob_df, text_prob_df, target], axis=1)

In [46]:
result

Unnamed: 0,audio_prob_0,audio_prob_1,audio_prob_2,audio_prob_3,audio_prob_4,audio_prob_5,text_prob_0,text_prob_1,text_prob_2,text_prob_3,text_prob_4,text_prob_5,category_02
0,-11.508453,-8.252114,-23.207285,3.542284,8.454594,-5.967389,1.985170,-1.251878,1.072886,-1.533072,-1.373477,1.073765,실내
1,17.464434,-21.854765,-7.678674,-4.887698,-21.380985,-1.747072,1.985170,-1.251878,1.072886,-1.533072,-1.373477,1.073765,실내
2,3.137990,1.792637,-12.433538,-23.451603,1.165656,8.441647,1.985170,-1.251878,1.072886,-1.533072,-1.373477,1.073765,실내
3,28.444960,-16.674080,-4.198120,-16.081032,-18.207804,-3.304222,1.985170,-1.251878,1.072886,-1.533072,-1.373477,1.073765,실내
4,-0.446208,-3.118892,-10.164642,-16.763775,-2.070044,6.610947,1.985170,-1.251878,1.072886,-1.533072,-1.373477,1.073765,실내
...,...,...,...,...,...,...,...,...,...,...,...,...,...
402,-1.376910,4.843211,-11.274698,-16.632980,2.652317,3.715904,-0.836455,-0.479056,-0.917581,-0.670842,3.972445,-0.816911,절도범죄
403,-4.723094,-8.860213,-11.551517,-16.616116,-9.978647,20.510338,-0.836455,-0.479056,-0.917581,-0.670842,3.972445,-0.816911,절도범죄
404,-4.010353,-3.603227,-10.285007,-21.263302,-0.790818,12.896317,-1.148878,0.183103,-0.968534,-0.681608,3.823746,-0.802174,절도범죄
405,-15.072626,3.243326,-8.771307,-17.382805,6.553047,4.722164,-1.148878,0.183103,-0.968534,-0.681608,3.823746,-0.802174,절도범죄


In [47]:
result.to_csv('./result_probability.csv')

In [48]:
X = result.drop('category_02', axis=1)
y = result['category_02']

In [50]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [51]:
# 탐색할 매개변수들을 딕셔너리 형태로 정의
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [2, 3, 4, 5, 6, 7, 8, 9, 10],
    'min_samples_split': [2, 3, 4, 5],
    'min_samples_leaf': [1, 2, 3, 4, 5],
}
# 분류 알고리즘 생성하기
clf = DecisionTreeClassifier()

# GridSearchCV 객체 생성
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5)



In [52]:
# 분류 알고리즘 학습시키기
grid_search.fit(X_train, y_train)

In [54]:
# 테스트 데이터로 성능 평가하기
accuracy = grid_search.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.8902439024390244


In [55]:
import pickle

pickle.dump(grid_search, open('./DT_model.pkl', 'wb'))

In [57]:
# 예측하기
new_data = [[-9.0, 3.0, -5.0, 2.0, 1.0, 0.0, 10.0, -5.0, 2.0, -3.0, 4.0, -1.0],
            [15.2371187210083, -2.6208269596099854, -12.99371337890625, -14.600499153137207, -12.526419639587402, 5.471261501312256, -1.2606452703475952, -1.0324167013168335, -1.0932425260543823, 4.17419958114624, -0.9211591482162476, -0.3627887964248657],
            [-5.0, -10.0, -2.0, 2.0, -1.0, 3.0, 10.0, -5.0, 2.0, -3.0, 4.0, -1.0]]

prediction = grid_search.predict(new_data)

print(prediction)

['실내' '강제추행(성범죄)' '실내']


