In [None]:
import pydub
import numpy as np
import os
import sounddevice as sd
from pesq import pesq
from copy import deepcopy
import librosa

In [2]:
audios=[]
def find_(name:str):
    result=[]
    for idx,ch in enumerate(name):
        if ch=="_":
            result.append(idx)
    return result
for dirpath, dirnames, filenames in os.walk("./audio1"):
    for file_name in filenames:
        if file_name==".DS_Store":
            continue
        audio_data={}
        audio_data["format"]=file_name[-3:]
        i_=find_(file_name)
        audio_data["type"]=file_name[:i_[0]]
        audio_data["bits"]=file_name[i_[-1]+1:-4]
        full_path=os.path.join(dirpath, file_name)
        dub_temp=pydub.AudioSegment.from_file(full_path, format=audio_data["format"]) 
        samples=dub_temp.get_array_of_samples()
        sample_rate=dub_temp.frame_rate
        audio_data["freq"]=sample_rate
        audio_data["data"]=samples
        audio_data["size"]=os.path.getsize(full_path)
        audios.append(audio_data)

In [3]:
# sd.play(audios[0]["data"], audios[0]["freq"])


In [4]:
for audio in audios:
    if audio["type"]=="原始音乐":
        music_origin=audio
    elif audio["type"]=="原始语音":
        speech_origin=audio

In [5]:
# sd.play(music_origin["data"], music_origin["freq"] )
np.min(np.array(music_origin["data"]))

np.int32(-1932735233)

In [None]:
def compare_with_mel(ref:dict, deg:dict, n_mels=128, frame_len=0.025, hop_len=0.01, eps=1e-10):
    ref_data=np.array(ref["data"], dtype=np.float64)
    deg_data=np.array(deg["data"], dtype=np.float64)
    ref_data=ref_data/np.max(np.abs(ref_data))
    deg_data=deg_data/np.max(np.abs(deg_data)) # 归一化
    if ref["freq"]!=deg["freq"]:
        deg_data=librosa.resample(deg_data, orig_sr=deg["freq"], target_sr=ref["freq"]) # 把采样率设置为相同
    min_len=min(len(ref["data"]), len(deg["data"]))
    ref_data=ref_data[:min_len] # 保证长度相同
    deg_data=deg_data[:min_len]
    n_fft=int(frame_len*ref["freq"])
    hop=int(hop_len*ref["freq"])
    S_orig=librosa.feature.melspectrogram(y=deg_data, sr=ref["freq"], n_mels=n_mels, n_fft=n_fft, hop_length=hop)
    S_deg=librosa.feature.melspectrogram(y=deg_data, sr=deg["freq"], n_mels=n_mels, n_fft=n_fft, hop_length=hop)
    los_S_orig=np.log(S_orig+eps)
    log_S_deg=np.log(S_deg+eps)
    P_signal=np.mean(los_S_orig**2)
    D=los_S_orig-log_S_deg
    P_noise=np.mean(D**2)
    if P_noise==0:
        mel_snr=float('inf')
    else:
        mel_snr=10*np.log10(P_signal/P_noise)
    return mel_snr

In [7]:
for idx,audio in enumerate(audios):
    if audio["type"]=="音乐":
        score=compare_with_mel(ref=music_origin, deg=audio)
        audios[idx]["score"]=score 
    elif audio["type"]=="语音":
        score=compare_with_mel(ref=speech_origin, deg=audio)
        audios[idx]["score"]=score 
    else:
        audios[idx]["score"]=float('inf')

In [8]:
for idx,audio in enumerate(audios):
    print(f"idx:{idx}, Type:{audio["type"]}, format:{audio["format"]}, freq:{audio["freq"]}, score:{audio["score"]}, ")

idx:0, Type:语音, format:wav, freq:16000, score:18.314466718611957, 
idx:1, Type:音乐, format:mp3, freq:44100, score:18.524451023312423, 
idx:2, Type:音乐, format:wav, freq:48000, score:inf, 
idx:3, Type:音乐, format:mp3, freq:44100, score:18.426748281117302, 
idx:4, Type:语音, format:wav, freq:44100, score:27.050768329974176, 
idx:5, Type:语音, format:mp3, freq:44100, score:27.03502176179284, 
idx:6, Type:语音, format:mp3, freq:16000, score:20.066914738866966, 
idx:7, Type:音乐, format:wav, freq:8000, score:7.467198641688583, 
idx:8, Type:音乐, format:wav, freq:32000, score:11.67471811955007, 
idx:9, Type:语音, format:wav, freq:16000, score:20.37355769216617, 
idx:10, Type:语音, format:wav, freq:22050, score:18.685708516542235, 
idx:11, Type:音乐, format:wav, freq:48000, score:inf, 
idx:12, Type:原始音乐, format:wav, freq:48000, score:inf, 
idx:13, Type:语音, format:wav, freq:8000, score:inf, 
idx:14, Type:语音, format:mp3, freq:44100, score:26.964155764511744, 
idx:15, Type:语音, format:wav, freq:44100, score:28.0897

In [14]:
summary_mp3=0
summary_acc=0
summary_wav=0
summary_mp3_size=0
summary_acc_size=0
summary_wav_size=0
for audio in audios:
    if audio["freq"]==44100:
        if audio["format"]=="mp3":
           summary_mp3+=audio["score"]
           summary_mp3_size+=audio["size"]
        elif audio["format"]=="m4a":
            summary_acc+=audio["score"]
            summary_acc_size+=audio["size"]
        elif audio["format"]=="wav":
            summary_wav+=audio["score"]
            summary_wav_size+=audio["score"]
print(f"sc_mp3:{summary_mp3}, sc_acc:{summary_acc}, sc_wav:{summary_wav}")

sc_mp3:181.41308607370675, sc_acc:135.6769137146322, sc_wav:87.36340768897529


In [16]:
sizes=[i_size for i_size in [summary_mp3_size, summary_acc_size, summary_wav_size]]
max_size=max(sizes)
min_size=min(sizes)
print(max_size, min_size)
sizes=[(max_size-i)/(max_size-min_size) for i in sizes]
print(sizes)

1416386 87.36340768897529
[np.float64(0.0), np.float64(0.5181703780819584), np.float64(1.0)]


In [17]:
losses=[float(au) if au!=float('inf') else 50 for au in [summary_mp3, summary_acc, summary_wav] ]
max_loss=max(losses)
min_loss=min(losses)
losses=[(i-min_loss)/(max_loss-min_loss) for i in losses]
print(losses)

[1.0, 0.513701980223894, 0.0]


In [12]:
print(losses)

[0.25503300348223334, 0.25997000029397593, 1.0, 0.25767288514813735, 0.46043451319621886, 0.46006429144551214, 0.29623527477143824, 0.0, 0.09892410900509113, 0.30344483876690453, 0.26376136808730144, 1.0, 1.0, 1.0, 0.458398142143845, 0.484861920354473, 0.4597438636358125, 1.0, 0.257894975978381, 0.14372023160054917, 0.26275446879937586, 0.25288545975208626, 0.45903273538346856, 0.447344905062515, 0.022950281602825803, 0.05270400757719118, 0.4598255328783484, 0.25290911448814385, 0.4604895823967673, 0.024192100084629714, 0.2931805882984699, 1.0, 0.2968453456538577, 0.25215952722538026, 0.25891017329243265, 1.0]


In [None]:
cmp_cmx=[(14.75+1.31)/2, (20.07+1.01)/2, (0.51+0.22)/2]
max_cmx=max(cmp_cmx)
min_cmx=min(cmp_cmx)
cmp_cmx=[(max_cmx-i)/(max_cmx-min_cmx) for i in cmp_cmx]
print(cmp_cmx)


[0.24668304668304683, 0.0, 1.0]


[[0.         0.51817038 1.        ]
 [1.         0.51370198 0.        ]
 [0.24668305 0.         1.        ]
 [1.         2.         3.        ]]


In [None]:
import numpy as np

def entropy_weight_method(data):
    """
    熵权法计算指标权重
    参数:
        data: numpy 数组，行表示样本，列表示指标
    返回:
        weights: 各指标的权重
    """
    # 数据预处理：确保数据为正值（熵权法要求数据非负）
    data = np.array(data, dtype=float)
    if (data < 0).any():
        raise ValueError("数据中包含负值，请确保所有数据非负！")

    # 1. 数据标准化（归一化到 [0,1]）
    # 使用线性归一化：(x - min) / (max - min)
    min_val = np.min(data, axis=0)
    max_val = np.max(data, axis=0)
    # 防止除以零
    denominator = max_val - min_val
    denominator[denominator == 0] = 1  # 如果某列最大值等于最小值，设分母为 1
    normalized_data = (data - min_val) / denominator

    # 2. 计算比重 p_ij
    # 防止除以零，先对归一化数据加一个小值（如 1e-10）
    normalized_data += 1e-10
    row_sums = np.sum(normalized_data, axis=1, keepdims=True)
    p = normalized_data / row_sums

    # 3. 计算熵值 e_j
    m = data.shape[0]  # 样本数
    k = 1 / np.log(m)  # 常数 k = 1/ln(m)
    entropy = -k * np.sum(p * np.log(p), axis=0)

    # 4. 计算权重 w_j
    d = 1 - entropy  # 差异系数
    weights = d / np.sum(d)  # 归一化权重

    return weights

# 示例数据
if __name__ == "__main__":
    # 假设有 4 个样本，3 个指标
     
    apx_mat=np.array([
        sizes, losses, cmp_cmx, [1,2,3]
    ])
    apx_mat=np.transpose(apx_mat)
    print(apx_mat)
    try:
        weights = entropy_weight_method(apx_mat)
        print("各指标权重：", weights)
        final_mp3=float(weights[0]*sizes[0]+weights[1]*losses[0]+weights[2]*cmp_cmx[0]+weights[3]*1 )
        final_acc=float(weights[0]*sizes[1]+weights[1]*losses[1]+weights[2]*cmp_cmx[1]+weights[3]*2 )
        final_wav=float(weights[0]*sizes[2]+weights[1]*losses[2]+weights[2]*cmp_cmx[2]+weights[3]*3 )
        print(f"[mp3, acc,wav]={[final_mp3, final_acc, final_wav]}")
    except ValueError as e:
        print("错误：", e)
        

[[0.         1.         0.24668305 1.        ]
 [0.51817038 0.51370198 0.         2.        ]
 [1.         0.         1.         3.        ]]
各指标权重： [0.21516213 0.32669979 0.24226642 0.21587166]
[mp3, acc,wav]=[0.6023344694170183, 0.7110602923735012, 1.1050435262357652]
