#import

In [23]:
import numpy as np
import pandas as pd
import librosa

from sklearn.model_selection import train_test_split

In [24]:
# 資料判斷
df_csv = pd.read_csv("Training Dataset/training datalist.csv")
print("資料資訊")
df_csv.info()

資料資訊
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 28 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   ID                         1000 non-null   object 
 1   Sex                        1000 non-null   int64  
 2   Age                        1000 non-null   int64  
 3   Disease category           1000 non-null   int64  
 4   Narrow pitch range         1000 non-null   int64  
 5   Decreased volume           1000 non-null   int64  
 6   Fatigue                    1000 non-null   int64  
 7   Dryness                    1000 non-null   int64  
 8   Lumping                    1000 non-null   int64  
 9   heartburn                  1000 non-null   int64  
 10  Choking                    1000 non-null   int64  
 11  Eye dryness                1000 non-null   int64  
 12  PND                        1000 non-null   int64  
 13  Smoking                    1000 non-null   i

In [25]:
# 挑選出要訓練的Disease category 1、2、3
df_csv = df_csv.loc[df_csv['Disease category'].isin([1, 2, 3, 4, 5]), ['ID', 'Disease category']]

# 在dataframe中加入要訓練的音檔路徑
df_csv['wav_path'] = df_csv['ID'].map("./Training Dataset/training_voice_data{}.wav".format)

print("Disease category in source_df :",df_csv['Disease category'].unique())
print("source_df :\n", df_csv)

Disease category in source_df : [1 2 3 5 4]
source_df :
           ID  Disease category  \
0    1202f15                 1   
1    0600ve0                 2   
2    1001o7l                 2   
3    1201c1t                 1   
4    0402jvt                 1   
..       ...               ...   
995  0G00ftn                 3   
996  1201pkr                 4   
997  0202p64                 3   
998  12021au                 2   
999  04027it                 2   

                                              wav_path  
0    ./Training Dataset/training_voice_data1202f15.wav  
1    ./Training Dataset/training_voice_data0600ve0.wav  
2    ./Training Dataset/training_voice_data1001o7l.wav  
3    ./Training Dataset/training_voice_data1201c1t.wav  
4    ./Training Dataset/training_voice_data0402jvt.wav  
..                                                 ...  
995  ./Training Dataset/training_voice_data0G00ftn.wav  
996  ./Training Dataset/training_voice_data1201pkr.wav  
997  ./Training Datas

In [27]:
training_df, test_df = train_test_split(df_csv, test_size=0.2, random_state=333)

print("training_df shape :", training_df.shape, ", test_df shape :", test_df.shape)

training_df shape : (800, 3) , test_df shape : (200, 3)


In [32]:
# 定義函數
def audio_to_mfccs(filename, sample_rate=44100, offset=0, duration=None):
    # 讀取音訊檔案，並設定取樣率、起始時間、及持續時間
    voice, sample_rate = librosa.load(
        filename, sr=sample_rate, offset=offset, duration=duration
    )

    # 將時間值轉換為 FFT 與 hop length 所需的框架數 (以取樣點表示)
    n_fft = int(16/1000 * sample_rate)  # 將 16 毫秒轉換為取樣點
    hop_length = int(8/1000 * sample_rate)  # 將 8 毫秒轉換為取樣點

    # 計算音訊數據的 MFCC 特徵
    mfcc_feature = librosa.feature.mfcc(
        y=voice, sr=sample_rate, n_mfcc=13, n_fft=n_fft, hop_length=hop_length)

    # 計算 MFCC 的一階和二階差分特徵
    delta_mfcc_feature = librosa.feature.delta(mfcc_feature)

    # 將原始 MFCC 特徵和差分特徵串聯起來，得到所有幀的特徵向量
    mfccs = np.concatenate((mfcc_feature, delta_mfcc_feature))
    mfccs_features = np.transpose(mfccs)  # 將矩陣轉置，使每行代表一個幀

    # 返回特徵向量
    return mfccs_features

In [34]:
training_id = training_df['ID'].tolist()
training_data = pd.DataFrame()
for id in training_id:
    mfccs_feature = audio_to_mfccs(training_df[training_df['ID']==id]['wav_path'].values[0])
    df = pd.DataFrame(mfccs_feature)
    # print("id :",id, ", number of frames :", df.shape[0])

    # 訓練資料標記
    label = training_df[training_df['ID']==id]['Disease category'].values[0]
    if label==1:
        df['c1'] = 1; df['c2'] = 0; df['c3'] = 0 ; df['c4'] = 0; df['c5'] = 0
    elif label==2:
        df['c1'] = 0; df['c2'] = 1; df['c3'] = 0 ; df['c4'] = 0; df['c5'] = 0
    elif label==3:
        df['c1'] = 0; df['c2'] = 0; df['c3'] = 1 ; df['c4'] = 0; df['c5'] = 0
    elif label==4:
        df['c1'] = 0; df['c2'] = 0; df['c3'] = 0 ; df['c4'] = 1; df['c5'] = 0
    elif label==5:
        df['c1'] = 0; df['c2'] = 0; df['c3'] = 0 ; df['c4'] = 0; df['c5'] = 1
    else:
        df['c1'] = np.nan; df['c2'] = np.nan; df['c3'] = np.nan; df['c4'] = np.nan; df['c5'] = np.nan

    training_data = pd.concat([training_data, df])

print("training_data.shape :", training_data.shape)

x_train = training_data.iloc[:, :-5]
y_train = training_data.iloc[:, -5:]
print("x_train.shape, y_train.shape :", x_train.shape, y_train.shape)
print("y_train.columns :", y_train.columns.tolist())

  voice, sample_rate = librosa.load(


FileNotFoundError: [Errno 2] No such file or directory: './Training Dataset/training_voice_data1101uri.wav'