## 특성 및 모델 변경
- 그동안 계속 3차원 데이터로 추출하여 CNN 모델에 적용했던 걸 변경, 특성들을 평균화하여 2차원 데이터로 만들고 다른 모델들을 써 보기로

In [None]:
def perceptual_sharpness(audio_path, sr=16000, n_fft=512, hop_length=256):
    # 음원 파일 로드
    y= audio_path
    sr = sr

    # STFT 수행
    D = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))

    # 주파수 대역별로 에너지 계산
    energy = np.sum(D, axis=0)

    # 고주파수 대역 성분 추출
    high_freq_energy = energy[3000:8000]  # 예시로 3000Hz에서 6000Hz 사이의 주파수 대역을 고주파수 대역으로 설정

    # Perceptual Sharpness 계산
    sharpness = np.sum(np.log1p(high_freq_energy))

    return sharpness

In [None]:
def extract_features(file_name):
    
    audio, sr = librosa.load(file_name, sr=16000)
    
        ##Mel-spectrogram 구현
    spectrogram = librosa.stft(audio, n_fft=512, hop_length= 256) 
    power_spectrogram = spectrogram**2
    mel = librosa.feature.melspectrogram(S=power_spectrogram, sr=sr)
    mel = librosa.power_to_db(np.abs(mel)**2)
    #mfcc 구현
    mfccs = librosa.feature.mfcc(S = mel, n_mfcc=20)

    stft = np.abs(spectrogram)
    chroma_stft = librosa.feature.chroma_stft(S=stft,hop_length=512)
    rms = librosa.feature.rms(y=audio)
    spectral_centroids = librosa.feature.spectral_centroid(y=audio, sr=sr)
    spectral_bandwidths = librosa.feature.spectral_bandwidth(y=audio, sr=sr)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)
    zero_crossing_rates = librosa.feature.zero_crossing_rate(y=audio)
    chroma_cens = librosa.feature.chroma_cens(y=audio, sr=sr)
    tempo, _ = librosa.beat.beat_track(y=audio, sr=sr)
    ps = perceptual_sharpness(audio)
    
    mfccs_mean = mfccs.mean(axis=1)
    mfccs_var = mfccs.mean(axis=1)
    
    for i in range(len(mfccs_mean)):
        locals()[f'mfccs_mean_{i}'] = mfccs_mean[i]
        locals()[f'mfccs_var_{i}'] = mfccs_var[i]
    chroma_stft_mean = chroma_stft.mean()
    chroma_stft_var = chroma_stft.var()
    rms_mean = rms.mean()
    rms_var = rms.var()
    spectral_centroids_mean = spectral_centroids.mean()
    spectral_centroids_var = spectral_centroids.var()
    spectral_bandwidths_mean = spectral_bandwidths.mean()
    spectral_bandwidths_var = spectral_bandwidths.var()
    spectral_rolloff_mean = spectral_rolloff.mean()
    spectral_rolloff_var = spectral_rolloff.var()
    zero_crossing_rates_mean = zero_crossing_rates.mean()
    zero_crossing_rates_var = zero_crossing_rates.var()
    harmony_mean = chroma_cens.mean()
    harmony_var = chroma_cens.var()
    tempo_mean = tempo.mean()
    tempo_var = tempo.var()
    perceptual_sharpness_mean = ps.mean()
    perceptual_sharpness_var = ps.var()
    
    #합치기
    features = np.array([])
    for j in range(20):
        features = np.hstack((features,locals()[f'mfccs_mean_{j}'],locals()[f'mfccs_var_{j}']))
    
    features = np.hstack((features,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroids_mean
                         ,spectral_centroids_var, spectral_bandwidths_mean, spectral_bandwidths_var,spectral_rolloff_mean
                         , spectral_rolloff_var,zero_crossing_rates_mean, zero_crossing_rates_var, harmony_mean, harmony_var
                         , tempo_mean,tempo_var,perceptual_sharpness_mean,perceptual_sharpness_var))
    features = scale(features)
#     features = np.pad(features,(2,2),mode='constant')
    
    
    return features

In [None]:
# 테스트용 리스트 만들기
test_df = pd.DataFrame([os.listdir(test_path)[i][:-4] for i in range(len(os.listdir(test_path)))])

In [None]:
def load_data(data):
    train_data = []
#     file_list = data['wav_id']
    file_list = data[0] #테스트용

#     audio_path = train_audio_path
    audio_path = test_path #테스트용
    for i, file_name in tqdm(enumerate(file_list)):
        features = extract_features(os.path.join(audio_path, f'{file_name}.wav'))
        train_data.append(features)
        if (i+1)%100 == 0:
            print(f'{i+1}번째 파일 완료')
    return train_data

### 트러블 발생
- 약 29000번째 전후 데이터에서 float은 mean을 할 수 없다는 오류가 뜸...왜?

In [None]:

data = train_csv
train_data = []
file_list = data['wav_id']
# file_list = data[0] #테스트용

audio_path = train_audio_path
# audio_path = test_path #테스트용
for i, file_name in tqdm(enumerate(file_list)):

    audio, sr = librosa.load(os.path.join(audio_path, f'{file_name}.wav'), sr=16000)
        ##Mel-spectrogram 구현
    spectrogram = librosa.stft(audio, n_fft=512, hop_length= 256) 
    power_spectrogram = spectrogram**2
    mel = librosa.feature.melspectrogram(S=power_spectrogram, sr=sr)
    mel = librosa.power_to_db(np.abs(mel)**2)
    #mfcc 구현
    mfccs = librosa.feature.mfcc(S = mel, n_mfcc=20)

    stft = np.abs(spectrogram)
    chroma_stft = librosa.feature.chroma_stft(S=stft,hop_length=512)
    rms = librosa.feature.rms(y=audio)
    spectral_centroids = librosa.feature.spectral_centroid(y=audio, sr=sr)
    spectral_bandwidths = librosa.feature.spectral_bandwidth(y=audio, sr=sr)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)
    zero_crossing_rates = librosa.feature.zero_crossing_rate(y=audio)
    chroma_cens = librosa.feature.chroma_cens(y=audio, sr=sr)
    tempo, _ = librosa.beat.beat_track(y=audio, sr=sr)
    ps = perceptual_sharpness(audio)
    
    try:
        mfccs_mean = mfccs.mean(axis=1)
        mfccs_var = mfccs.mean(axis=1)

        for k in range(len(mfccs_mean)):
            locals()[f'mfccs_mean_{k}'] = mfccs_mean[k]
            locals()[f'mfccs_var_{k}'] = mfccs_var[k]
        chroma_stft_mean = chroma_stft.mean()
        chroma_stft_var = chroma_stft.var()
        rms_mean = rms.mean()
        rms_var = rms.var()
        spectral_centroids_mean = spectral_centroids.mean()
        spectral_centroids_var = spectral_centroids.var()
        spectral_bandwidths_mean = spectral_bandwidths.mean()
        spectral_bandwidths_var = spectral_bandwidths.var()
        spectral_rolloff_mean = spectral_rolloff.mean()
        spectral_rolloff_var = spectral_rolloff.var()
        zero_crossing_rates_mean = zero_crossing_rates.mean()
        zero_crossing_rates_var = zero_crossing_rates.var()
        harmony_mean = chroma_cens.mean()
        harmony_var = chroma_cens.var()
        tempo_mean = tempo.mean()
        tempo_var = tempo.var()
        perceptual_sharpness_mean = ps.mean()
        perceptual_sharpness_var = ps.var()
    except:
        print(f'{i}번째 파일에서 문제 발생')
        continue

    #합치기
    features = np.array([])
    for j in range(20):
        features = np.hstack((features,locals()[f'mfccs_mean_{j}'],locals()[f'mfccs_var_{j}']))

    features = np.hstack((features,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroids_mean
                         ,spectral_centroids_var, spectral_bandwidths_mean, spectral_bandwidths_var,spectral_rolloff_mean
                         , spectral_rolloff_var,zero_crossing_rates_mean, zero_crossing_rates_var, harmony_mean, harmony_var
                         , tempo_mean,tempo_var,perceptual_sharpness_mean,perceptual_sharpness_var))
#     features = scale(features)
#     features = extract_features(os.path.join(audio_path, f'{file_name}.wav')) - 일단  스케일링 없이 뽑고 이상치 확인 후 제거
    train_data.append(features)
    if i>0 and i%100 == 0:
        print(f'{i}번째 파일 완료')
        locals()[f'X_{int(i/100)}'] = pd.DataFrame(train_data) #i/는 실수형으로 나오니까 i//를 쓰든지 이렇게 int화하든지
        
        #아마 또 28980쯤에서 오류 날 테니까 해당 파일 뜯어봐서 문제가 뭔지 발견하고 다시 진행한 다음 28900까지 저장된 X_289 DF랑 합치자

In [None]:
features = np.array(pd.concat((X_289,train_data))).reshape(-1,58)
features.shape


#28980번 파일을 제외한 나머지를 features4로 저장

#문제의 28980번 파일 확인 후 제거
train_csv = train_csv.drop(index=28980)
np.save('./features4.npy',features)
train_csv.to_csv('e:/Data2/csv/train.csv',index=False)

In [None]:
# Define the parameter grid for the random search
X_train, X_test, y_train, y_test = train_test_split(X , train_csv['감정'],random_state=0, stratify = train_csv['감정'], test_size = 0.3)

param_grid = {
    'n_neighbors': np.arange(1, 15),  # Number of neighbors
    'weights': ['uniform', 'distance'],  # Weight function
    'p': [1, 2]  # Power parameter for the Minkowski distance metric
}

# Create the KNN classifier
knn = KNeighborsClassifier()

# Perform the random search
random_search_knn = RandomizedSearchCV(
    knn, param_distributions=param_grid, n_iter=10, cv=5, random_state=42
)
random_search_knn.fit(X_train, y_train)

# Evaluate the KNN model with the best parameters on the test set
best_knn = random_search_knn.best_estimator_
y_pred_knn = best_knn.predict(X_test)
test_accuracy_knn = accuracy_score(y_test, y_pred_knn)

# Evaluate the KNN model on the training set
y_train_pred_knn = best_knn.predict(X_train)
train_accuracy_knn = accuracy_score(y_train, y_train_pred_knn)

print("Train KNN Accuracy:", train_accuracy_knn)
print("Test KNN Accuracy:", test_accuracy_knn)


#결과값 약 38% 나옴
