In [None]:
pip install librosa numpy scikit-learn tensorflow

In [None]:
import librosa
import numpy as np

def extract_features(file_path, max_pad_len=216):
    """Extract MFCC features from an audio file."""
    try:
        audio, sample_rate = librosa.load(file_path, sr=None)  # Load audio file
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)  # Extract 40 MFCC features
        pad_width = max_pad_len - mfccs.shape[1]  # Padding to ensure fixed size
        if pad_width > 0:
            mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfccs = mfccs[:, :max_pad_len]
        return mfccs
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [None]:
import os

def load_dataset(data_dir):
    """Load dataset from the specified directory."""
    features, labels = [], []
    for label, sub_dir in enumerate(['real', 'fake']):  # 0 for real, 1 for fake
        sub_dir_path = os.path.join(data_dir, sub_dir)
        for file_name in os.listdir(sub_dir_path):
            file_path = os.path.join(sub_dir_path, file_name)
            mfccs = extract_features(file_path)
            if mfccs is not None:
                features.append(mfccs)
                labels.append(label)
    return np.array(features), np.array(labels)

In [None]:
from sklearn.model_selection import train_test_split

data_dir = '/content/sample_data/dataset'
features, labels = load_dataset(data_dir)
features = np.expand_dims(features, axis=-1)  # Add a channel dimension
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(40, 216, 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification (0 or 1)
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

In [None]:
from google.colab import drive
drive.mount('/content/drive')

in the below code just enter the path of your sample data to check if it is fake or real





In [None]:
def detect_fake(audio_file_path, model):
    """Classify if the audio is fake or real."""
    features = extract_features(audio_file_path)
    if features is not None:
        features = np.expand_dims(features, axis=[0, -1])  # Prepare for prediction
        prediction = model.predict(features)
        return "Fake" if prediction >= 0.5 else "Real"
    return "Error processing file"

audio_path = '/content/sample_data/dataset/fake/Alg_1_5.wav'  # enter the path of your sample data.
print(f"The audio is {detect_fake(audio_path, model)}")