In [None]:
# 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# 코랩 작업장으로 복사
!cp /content/drive/MyDrive/Sonus/processed_clips_final.zip /content/

In [None]:
# 압축 해제
!unzip /content/processed_clips_final.zip -d /content/

In [None]:
import os
import numpy as np
import pandas as pd
import h5py
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
import random
from sklearn.metrics import f1_score
from collections import Counter


# HDF5 파일이 저장된 디렉토리 설정
h5_dir = '/content/processed_clips_final/'
batch_size = 256
epochs = 50
H = 4  # 사용할 고조파 개수


# GPU 설정
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        print(e)


# 악기 MIDI 번호에서 인덱스 매핑 정의
instrument_mapping = {
    1: 0, 41: 1, 42: 2, 43: 3, 61: 4,
    71: 5, 72: 6, 7: 7, 44: 8, 69: 9, 74: 10
}
instrument_names = {
    0: "Grand Piano", 1: "Violin", 2: "Viola", 3: "Cello", 4: "Horn",
    5: "Bassoon", 6: "Clarinet", 7: "Harpsichord", 8: "Contrabass",
    9: "Oboe", 10: "Flute"
}


# 고조파 생성 함수
def generate_harmonics(note, harmonics=H):
    base_frequency = 440 * (2 ** ((note - 69) / 12))  # MIDI 음높이로부터 기본 주파수 계산
    harmonic_frequencies = [base_frequency * (i + 1) for i in range(harmonics)]
    return np.array(harmonic_frequencies, dtype=np.float32)


# 데이터셋 로드 함수
def load_h5_file(h5_path):
    with h5py.File(h5_path, 'r') as f:
        clip = np.array(f['clip'])
        instruments = np.array(f['instrument']).flatten()
        note = np.array(f['note']).flatten()[0]

        # 다중 레이블로 악기별로 변환
        labels = np.zeros(len(instrument_mapping), dtype=np.float32)
        for instr in instruments:
            idx = instrument_mapping.get(instr, -1)
            if idx != -1:
                labels[idx] = 1

        # 실시간 고조파 생성
        harmonics = generate_harmonics(note, harmonics=H)

    return clip, labels, harmonics


# 각 클래스의 샘플 수를 계산하여 클래스 가중치를 설정하는 함수
def calculate_class_weights(h5_files):
    instrument_counts = Counter()
    for file in h5_files:
        _, labels, _ = load_h5_file(file)
        instrument_counts.update(np.where(labels == 1)[0])  # 라벨이 1인 위치에서 각 클래스 수를 누적

    # 클래스 가중치 계산: 클래스마다 전체 샘플 수 / 해당 클래스의 샘플 수
    total_samples = sum(instrument_counts.values())
    class_weights = {i: total_samples / count for i, count in instrument_counts.items()}
    return class_weights


# 에포크 종료 시 F1 스코어 계산 콜백
class F1ScoreOnEpochEnd(callbacks.Callback):
    def __init__(self, data_generator):
        super().__init__()
        self.data_generator = data_generator

    def on_epoch_end(self, epoch, logs=None):
        y_true, y_pred = [], []

        # 모든 배치에 대해 예측 수행 및 실제 라벨 수집
        # for batch in range(len(self.data_generator)):
        #     [clips, notes, harmonics], labels = self.data_generator[batch]
        #     predictions = self.model.predict([clips, notes, harmonics], verbose=0)

        for batch in range(len(self.data_generator)):
            [clips, harmonics], labels = self.data_generator[batch]
            predictions = self.model.predict([clips, harmonics], verbose=0)

            # 다중 레이블 이진화 처리
            y_true.extend(labels)
            y_pred.extend(predictions)

        # 이진 임계값(0.5) 적용
        y_true = np.array(y_true)
        y_pred = np.array(y_pred) >= 0.5  # 0.5 이상의 예측값을 긍정 클래스로 처리

        # F1 스코어 계산
        f1 = f1_score(y_true, y_pred, average='macro')
        print(f"\nEpoch {epoch + 1} - F1 Score: {f1:.4f}")
        logs['f1_score'] = f1  # 로그에 F1 스코어 추가


class H5DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, h5_files, batch_size=32, shuffle=True):
        self.h5_files = h5_files
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return len(self.h5_files) // self.batch_size

    def __getitem__(self, index):
        batch_files = self.h5_files[index * self.batch_size:(index + 1) * self.batch_size]
        clips, labels, harmonics = [], [], []

        for file in batch_files:
            clip, label, harmonic = load_h5_file(file)
            clips.append(clip)
            labels.append(label)
            harmonics.append(harmonic)

        clips = np.array(clips)
        labels = np.array(labels)
        harmonics = np.array(harmonics)

        # tf.TensorSpec을 사용하여 output_signature 정의
        # TensorFlow에 데이터의 모양과 유형을 알려줍니다.
        output_signature = (
            (tf.TensorSpec(shape=clips.shape[1:], dtype=clips.dtype),  # clips
             tf.TensorSpec(shape=harmonics.shape[1:], dtype=harmonics.dtype)),  # harmonics
            tf.TensorSpec(shape=labels.shape[1:], dtype=labels.dtype)  # labels
        )

        # Return the data as a tuple instead of a list
        return (clips, harmonics), labels # output_signature는 model.fit에서 자동으로 추론됩니다.


    def on_epoch_end(self):
        if self.shuffle:
            random.shuffle(self.h5_files)


# 학습 및 검증 파일을 분리하는 함수
def split_train_val(h5_dir, val_ratio=0.05):
    train_files, val_files = [], []

    for instrument in os.listdir(h5_dir):
        instrument_path = os.path.join(h5_dir, instrument)
        if os.path.isdir(instrument_path):
            files = [os.path.join(instrument_path, f) for f in os.listdir(instrument_path) if f.endswith('.h5')]
            val_size = int(len(files) * val_ratio)
            val_files.extend(files[:val_size])
            train_files.extend(files[val_size:])

    return train_files, val_files


# 멀티 브랜치 CNN 모델 생성
def build_model(input_shape=(256, 46, 1), num_classes=11, harmonics_count=H):
    input_layer = layers.Input(shape=input_shape, name="spectrogram_input")
    x = input_layer
    for _ in range(5):
        branch1 = layers.Conv2D(64, (11, 1), activation='relu', padding='same')(x)
        branch1 = layers.BatchNormalization()(branch1)
        branch1 = layers.MaxPooling2D((2, 2))(branch1)

        branch2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        branch2 = layers.BatchNormalization()(branch2)
        branch2 = layers.MaxPooling2D((2, 2))(branch2)

        branch3 = layers.Conv2D(64, (1, 11), activation='relu', padding='same')(x)
        branch3 = layers.BatchNormalization()(branch3)
        branch3 = layers.MaxPooling2D((2, 2))(branch3)

        x = layers.Concatenate()([branch1, branch2, branch3])

    flatten_spectrogram = layers.Flatten()(x)
    harmonics_input = layers.Input(shape=(harmonics_count,), name="harmonics_input")
    combined = layers.Concatenate()([flatten_spectrogram, harmonics_input])

    fc1 = layers.Dense(256, activation='relu')(combined)
    fc1 = layers.Dropout(0.5)(fc1)
    fc2 = layers.Dense(128, activation='relu')(fc1)
    output_layer = layers.Dense(num_classes, activation='sigmoid')(fc2)

    model = models.Model(inputs=[input_layer, harmonics_input], outputs=output_layer)
    return model


# 모델 학습 및 평가
def train_model(h5_dir, batch_size, epochs):
    train_files, val_files = split_train_val(h5_dir, val_ratio=0.05)
    train_gen = H5DataGenerator(train_files, batch_size=batch_size)
    val_gen = H5DataGenerator(val_files, batch_size=batch_size, shuffle=False)

    model = build_model(input_shape=(256, 46, 1), num_classes=11, harmonics_count=H)
    model.compile(optimizer=optimizers.Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # 클래스 가중치 계산
    class_weights = calculate_class_weights(train_files)

    lr_reduce = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, verbose=1)
    early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)

    f1_on_epoch_end = F1ScoreOnEpochEnd(train_gen)

    history = model.fit(train_gen, epochs=epochs,
                        callbacks=[lr_reduce, early_stop, f1_on_epoch_end],
                        validation_data=val_gen,
                        class_weight=class_weights)

    return model, history


# 모델 학습 실행
model, history = train_model(h5_dir, batch_size, epochs)

# 학습 기록을 DataFrame으로 변환
history_df = pd.DataFrame(history.history)

# CSV 파일로 저장
history_df.to_csv("training_history.csv", index=False)


Epoch 1/50


  self._warn_if_super_not_called()


[1m1140/1140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 930ms/step - accuracy: 0.3097 - loss: 11.4185
Epoch 1 - F1 Score: 0.1627
[1m1140/1140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1772s[0m 2s/step - accuracy: 0.3098 - loss: 11.4121 - val_accuracy: 0.2652 - val_loss: 0.3342 - learning_rate: 0.0010 - f1_score: 0.1627
Epoch 2/50
[1m1140/1140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 928ms/step - accuracy: 0.6029 - loss: 1.8235
Epoch 2 - F1 Score: 0.3866
[1m1140/1140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1701s[0m 1s/step - accuracy: 0.6029 - loss: 1.8235 - val_accuracy: 0.7236 - val_loss: 0.1196 - learning_rate: 0.0010 - f1_score: 0.3866
Epoch 3/50
[1m1140/1140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 927ms/step - accuracy: 0.6461 - loss: 1.6284
Epoch 3 - F1 Score: 0.1732
[1m1140/1140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1696s[0m 1s/step - accuracy: 0.6461 - loss: 1.6284 - val_accuracy: 0.6245 - val_loss: 0.5045 - lea

In [None]:
# 또는 SavedModel 형식으로 저장
model.save("instrument_classification_model.keras")

In [None]:
model = load_model('/content/instrument_classification_model.keras')
model.summary()

In [19]:
import os
import h5py
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, f1_score, classification_report

# 모델 불러오기
model = load_model('/content/instrument_classification_model.keras')

# 검증 데이터 경로
validation_dir = '/content/processed_clips_test'

# 악기 MIDI 번호에서 인덱스 매핑 정의
instrument_mapping = {
    1: 0, 41: 1, 42: 2, 43: 3, 61: 4,
    71: 5, 72: 6, 7: 7, 44: 8, 69: 9, 74: 10
}
instrument_names = {
    0: "Grand Piano", 1: "Violin", 2: "Viola", 3: "Cello", 4: "Horn",
    5: "Bassoon", 6: "Clarinet", 7: "Harpsichord", 8: "Contrabass",
    9: "Oboe", 10: "Flute"
}

# 고조파 생성 함수 (학습 코드와 동일하게 설정)
H = 4  # 학습 시 사용했던 고조파 개수와 동일하게 설정
def generate_harmonics(note, harmonics=H):
    base_frequency = 440 * (2 ** ((note - 69) / 12))  # MIDI 음높이로부터 기본 주파수 계산
    harmonic_frequencies = [base_frequency * (i + 1) for i in range(harmonics)]
    return np.array(harmonic_frequencies, dtype=np.float32)

# 임계값 설정 (확률이 이 값을 초과하면 악기가 존재한다고 판단)
threshold = 0.5

# 전체 예측 및 실제 레이블 저장 리스트
all_true_labels = []
all_pred_labels = []

# 검증 데이터의 각 클립에 대해 예측 수행
for filename in os.listdir(validation_dir):
    if filename.endswith('.h5'):
        filepath = os.path.join(validation_dir, filename)

        # HDF5 파일 로드
        with h5py.File(filepath, 'r') as hf:
            clip = hf['clip'][:]
            instrument_label = int(hf['instrument'][0])  # 실제 악기 번호

            # 보조 입력 데이터 불러오기 (예: note 정보)
            note = hf['note'][:]

            # 악기 번호가 매핑에 있는지 확인
            if instrument_label in instrument_mapping:
                true_label_vector = np.zeros(len(instrument_mapping))
                true_label_vector[instrument_mapping[instrument_label]] = 1  # 실제 레이블 벡터 생성

                # clip 차원 조정
                clip = clip.reshape(1, 256, 46, 1)

                # 고조파 생성
                harmonics = generate_harmonics(note[0])  # note is (1,), so take note[0]
                harmonics = harmonics.reshape(1, H)  # Reshape to (1, 4) for batch size 1

                # 예측 수행
                prediction = model.predict([clip, harmonics], verbose=0)[0]  # [0] to get first sample

                # 예측 벡터 생성 (임계값 적용)
                pred_label_vector = (prediction >= threshold).astype(int)

                # 전체 레이블 저장
                all_true_labels.append(true_label_vector)
                all_pred_labels.append(pred_label_vector)

# 정확도 및 F1-score 계산
all_true_labels = np.array(all_true_labels)
all_pred_labels = np.array(all_pred_labels)

# 전체 다중 레이블 분류 성능 보고
print("전체 성능:")
print(classification_report(all_true_labels, all_pred_labels, target_names=[instrument_names[i] for i in range(len(instrument_names))]))

# 악기별 성능 분석
for i, instrument_name in instrument_names.items():
    instrument_true = all_true_labels[:, i]
    instrument_pred = all_pred_labels[:, i]

    accuracy = accuracy_score(instrument_true, instrument_pred)
    f1 = f1_score(instrument_true, instrument_pred)

    print(f"{instrument_name} - 정확도: {accuracy * 100:.2f}%, F1-score: {f1:.2f}")


전체 성능:
              precision    recall  f1-score   support

 Grand Piano       1.00      1.00      1.00      1733
      Violin       0.97      0.74      0.84      1364
       Viola       0.43      0.81      0.56       294
       Cello       1.00      0.92      0.95      1255
        Horn       0.64      0.76      0.70       226
     Bassoon       0.88      0.71      0.79       346
    Clarinet       0.95      0.84      0.89       449
 Harpsichord       0.00      0.00      0.00         0
  Contrabass       0.00      0.00      0.00         0
        Oboe       0.00      0.00      0.00         0
       Flute       0.00      0.00      0.00         0

   micro avg       0.90      0.87      0.89      5667
   macro avg       0.53      0.53      0.52      5667
weighted avg       0.94      0.87      0.90      5667
 samples avg       0.87      0.87      0.87      5667

Grand Piano - 정확도: 99.95%, F1-score: 1.00
Violin - 정확도: 93.26%, F1-score: 0.84
Viola - 정확도: 93.38%, F1-score: 0.56
Cello - 정확도

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
