In [25]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import os



target_sr = 22050


def preprocess_audio(audio_path):
    try:
        y, sr = librosa.load(audio_path, sr=None)
        y = librosa.effects.resample(y, sr, target_sr)
        mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=target_sr, n_mels=128)
        mel_spect_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
        return mel_spect_db
    except Exception as e:
        print(f"Error loading {audio_path}: {e}")
        return None


def augment_audio(y, sr):
    augmented_samples = []
    y_stretched = librosa.effects.time_stretch(y, rate=np.random.uniform(0.8, 1.2))
    augmented_samples.append(y_stretched)
    y_shifted = librosa.effects.pitch_shift(y, sr =sr, n_steps = np.random.randint(-5, 5))
  
    augmented_samples.append(y_shifted)
    noise = np.random.randn(len(y))
    y_noisy = y + 0.005 * noise
    augmented_samples.append(y_noisy)
    return augmented_samples


def segment_mel_spectrogram(mel_spect, segment_length=128, hop_length=64):
    segments = []
    num_segments = (mel_spect.shape[1] - segment_length) // hop_length + 1
    for i in range(num_segments):
        start = i * hop_length
        end = start + segment_length
        segment = mel_spect[:, start:end]
        segments.append(segment)
    return segments


real_dir = r'C:\Users\alisa\Downloads\REAL2'
fake_dir = r'C:\Users\alisa\Downloads\FAKE\target generated'

audio = []
labels = []

# Label: 0 for real, 1 for fake
for filename in os.listdir(real_dir):
    filepath = os.path.join(real_dir, filename)
    if os.path.isfile(filepath):
        audio.append(filepath)
        labels.append(0)

for filename in os.listdir(fake_dir):
    filepath = os.path.join(fake_dir, filename)
    if os.path.isfile(filepath):
        audio.append(filepath)
        labels.append(1)

audio = audio[:1000]+audio[-1000:]
labels = labels[:1000]+labels[-1000:]


audio_validate = audio[35001:36001]+audio[142001:143001]
label_validate = labels[35001:36001]+labels[142001:143001]

In [2]:
print(audio[1])

C:\Users\alisa\Downloads\REAL2\1001_DFA_DIS_XX.wav


In [31]:
X = []
y = []

segment_length = 128
hop_length = 64
sampling_rate = 44100
for file_path, label in zip(audio, labels):
    try:
        y_audio, sr = librosa.load(file_path, sr = sampling_rate)
        augmented_samples = (augment_audio(y_audio, sr))
        for sample in augmented_samples:
            
            mel_spectrogram = librosa.feature.melspectrogram(y=sample, n_mels=128, sr=target_sr)
            mel_spect = librosa.power_to_db(mel_spectrogram, ref=np.max)
            segments = segment_mel_spectrogram(mel_spect, segment_length=segment_length, hop_length=hop_length)
            X.extend(segments)
            y.extend([label] * len(segments))
    except:
        pass
        



  y_audio, sr = librosa.load(file_path, sr = sampling_rate)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


In [34]:
X = np.array(X)
X = X[..., np.newaxis]  
y = np.array(y)

print(X)
print(y)

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)


#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
from sklearn.model_selection import train_test_split



# Adjust the parameters as needed
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,  # Adjust the test set size
    train_size=0.8  # Specify the training set size
)

[[[[-80.        ]
   [-67.48805237]
   [-62.69728088]
   ...
   [-48.55817413]
   [-49.93937683]
   [-50.37886429]]

  [[-78.67423248]
   [-60.78795242]
   [-53.8396759 ]
   ...
   [-29.51849747]
   [-29.68558502]
   [-31.36261749]]

  [[-80.        ]
   [-61.75588226]
   [-56.44762039]
   ...
   [-26.35385132]
   [-26.41447258]
   [-27.17625809]]

  ...

  [[-80.        ]
   [-80.        ]
   [-80.        ]
   ...
   [-80.        ]
   [-80.        ]
   [-80.        ]]

  [[-80.        ]
   [-80.        ]
   [-80.        ]
   ...
   [-80.        ]
   [-80.        ]
   [-80.        ]]

  [[-80.        ]
   [-80.        ]
   [-80.        ]
   ...
   [-80.        ]
   [-80.        ]
   [-80.        ]]]


 [[[-44.58983612]
   [-45.26303864]
   [-47.99424744]
   ...
   [-47.6952095 ]
   [-48.19197083]
   [-52.68888092]]

  [[-32.85071945]
   [-39.62019348]
   [-37.68228531]
   ...
   [-35.57228851]
   [-38.97397232]
   [-44.63150406]]

  [[-29.62252617]
   [-33.31103134]
   [-35.04387283]
 

In [36]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(segment_length, segment_length, 1), padding='same', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)


history = model.fit(X_train, y_train, epochs=1, validation_data=(X_test, y_test), callbacks=[early_stopping, reduce_lr])


model.save('spectromodel2.keras')





[1m733/733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m152s[0m 204ms/step - accuracy: 0.9576 - loss: 1.5568 - val_accuracy: 0.9995 - val_loss: 0.5595 - learning_rate: 0.0010


In [39]:
A = []
b = []
for file_path, label in zip(audio_validate, label_validate):
    try:
        y_audio, sr = librosa.load(file_path, sr = sampling_rate)
        augmented_samples = (augment_audio(y_audio, sr))
        for sample in augmented_samples:
            
            mel_spectrogram = librosa.feature.melspectrogram(y=sample, n_mels=128, sr=target_sr)
            mel_spect = librosa.power_to_db(mel_spectrogram, ref=np.max)
            segments = segment_mel_spectrogram(mel_spect, segment_length=segment_length, hop_length=hop_length)
            A.extend(segments)
            b.extend([label] * len(segments))
    except:
        pass
        
A = np.array(A)
A = A[..., np.newaxis]
b = np.array(b)
predictions = model.predict(A)
predicted_labels = (predictions > 0.5).astype(int).flatten()


from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(b, predicted_labels)
print(f'Validation Accuracy: {accuracy:.4f}')
print(classification_report(b, predicted_labels))



  self.gen.throw(typ, value, traceback)


ValueError: math domain error

In [47]:
mshan_allah = []
mshan_allah2 = []
test = r'C:\Users\alisa\Downloads\ElevenLabs_2024-02-21T05_28_34_Me_ivc_s50_sb75_se0_b_m2.wav'


y_audio, sr = librosa.load(test, sr = sampling_rate)


mel_spectrogram = librosa.feature.melspectrogram(y=y_audio, n_mels=128, sr=sr)
mel_spect = librosa.power_to_db(mel_spectrogram, ref=np.max)
segments = segment_mel_spectrogram(mel_spect, segment_length=segment_length, hop_length=hop_length)
mshan_allah.append(segments)
mshan_allah2.append([label] * len(segments))

mshanallah = np.array(mshan_allah)
mshanallah = mshanallah[..., np.newaxis]  
mshanallah2 = np.array(mshan_allah2)

print(mshanallah[:5])
final_label = model.predict(mshanallah)
print(f'Predicted label: {final_label}')

[[[[[-80.       ]
    [-80.       ]
    [-80.       ]
    ...
    [-49.4      ]
    [-50.291542 ]
    [-49.392487 ]]

   [[-80.       ]
    [-80.       ]
    [-80.       ]
    ...
    [-29.640594 ]
    [-28.847553 ]
    [-28.806057 ]]

   [[-80.       ]
    [-80.       ]
    [-80.       ]
    ...
    [-25.664982 ]
    [-24.190214 ]
    [-25.93696  ]]

   ...

   [[-80.       ]
    [-80.       ]
    [-80.       ]
    ...
    [-80.       ]
    [-80.       ]
    [-80.       ]]

   [[-80.       ]
    [-80.       ]
    [-80.       ]
    ...
    [-80.       ]
    [-80.       ]
    [-80.       ]]

   [[-80.       ]
    [-80.       ]
    [-80.       ]
    ...
    [-80.       ]
    [-80.       ]
    [-80.       ]]]


  [[[-80.       ]
    [-79.43311  ]
    [-79.33829  ]
    ...
    [-46.7409   ]
    [-45.75564  ]
    [-54.165688 ]]

   [[-80.       ]
    [-80.       ]
    [-80.       ]
    ...
    [-26.72726  ]
    [-29.924212 ]
    [-31.80526  ]]

   [[-80.       ]
    [-80.       ]
    [-80. 

ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "conv2d_3" is incompatible with the layer: expected axis -1 of input shape to have value 1, but received input with shape (1, 6, 128, 128)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(1, 6, 128, 128, 1), dtype=float32)
  • training=False
  • mask=None