In [None]:
import os
import glob
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
import wave
# Set paths
DATA_DIR = '/Users/E6EKI5G/Desktop/ASVspoof'  # Replace with your dataset path
REAL_DIR = os.path.join(DATA_DIR, 'real')
FAKE_DIR = os.path.join(DATA_DIR, 'fake')
# Parameters
SR = 16000
N_MFCC = 40
MAX_LENGTH = 400  # Adjust based on your dataset
# Helper function to check if a file is a valid WAV file
def is_valid_wav(file_path):
   try:
       with wave.open(file_path, 'r') as file:
           return True
   except wave.Error as e:
       print(f"File {file_path} is not a valid WAV file: {e}")
       return False
# Helper function to load and preprocess audio files
def load_audio(file_path, sr=SR):
   try:
       if not is_valid_wav(file_path):
           raise ValueError(f"File {file_path} is not a valid WAV file")
       audio, sr = librosa.load(file_path, sr=sr)
       return audio
   except (EOFError, ValueError, RuntimeError) as e:
       print(f"Error loading {file_path}: {e}")
       return None
# Helper function to extract MFCC features
def extract_features(file_path):
   audio = load_audio(file_path)
   if audio is None:
       return None
   mfccs = librosa.feature.mfcc(y=audio, sr=SR, n_mfcc=N_MFCC)
   if mfccs.shape[1] > MAX_LENGTH:
       mfccs = mfccs[:, :MAX_LENGTH]
   else:
       mfccs = np.pad(mfccs, ((0, 0), (0, MAX_LENGTH - mfccs.shape[1])), mode='constant')
   return mfccs
# Load dataset and extract features
def load_dataset(real_dir, fake_dir):
   X = []
   y = []
   real_files = glob.glob(os.path.join(real_dir, '*.wav'))
   fake_files = glob.glob(os.path.join(fake_dir, '*.wav'))
   for file in real_files:
       features = extract_features(file)
       if features is not None:
           X.append(features)
           y.append(0)  # 0 for real
   for file in fake_files:
       features = extract_features(file)
       if features is not None:
           X.append(features)
           y.append(1)  # 1 for fake
   X = np.array(X)
   y = np.array(y)
   return X, y
# Prepare data
X, y = load_dataset(REAL_DIR, FAKE_DIR)
X = X[..., np.newaxis]  # Add channel dimension
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the CNN model
model = models.Sequential([
   layers.Conv2D(32, (3, 3), activation='relu', input_shape=X_train.shape[1:]),
   layers.MaxPooling2D((2, 2)),
   layers.Conv2D(64, (3, 3), activation='relu'),
   layers.MaxPooling2D((2, 2)),
   layers.Conv2D(128, (3, 3), activation='relu'),
   layers.MaxPooling2D((2, 2)),
   layers.Flatten(),
   layers.Dense(128, activation='relu'),
   layers.Dropout(0.5),
   layers.Dense(1, activation='sigmoid')
])
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')