In [1]:
pip install librosa numpy tensorflow matplotlib



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import librosa
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import img_to_array

# Function to extract MFCC features from the input audio file
def extract_mfcc_features(file_path, max_pad_len=50):
    y, sr = librosa.load(file_path, sr=22050)  # Load audio
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)  # Extract MFCC features
    pad_width = max_pad_len - mfcc.shape[1]

    if pad_width > 0:
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_pad_len]  # Trim to max length

    return mfcc

# Function to display MFCC output as an image
def plot_mfcc(file_path):
    mfcc = extract_mfcc_features(file_path)

    plt.figure(figsize=(10, 4))
    plt.imshow(mfcc, cmap='viridis', aspect='auto', origin='lower')
    plt.colorbar(format='%+2.0f dB')
    plt.title("MFCC Output Image")
    plt.xlabel("Time Frames")
    plt.ylabel("MFCC Coefficients")
    plt.show()

# Preprocess MFCC features for VGG16
def preprocess_audio(file_path):
    mfcc = extract_mfcc_features(file_path)
    mfcc = np.expand_dims(mfcc, axis=-1)  # Add channel dimension
    mfcc = np.repeat(mfcc, 3, axis=-1)  # Convert grayscale MFCC to 3-channel
    mfcc = np.array(img_to_array(mfcc))  # Convert to VGG16 format
    mfcc = np.expand_dims(mfcc, axis=0)  # Add batch dimension
    mfcc = mfcc / 255.0  # Normalize

    return mfcc

# Build VGG16-based model (without training, just for inference)
def build_vgg16_model(input_shape):
    base_model = VGG16(weights="imagenet", include_top=False, input_shape=input_shape)

    for layer in base_model.layers:
        layer.trainable = False  # Freeze pre-trained layers

    model = Sequential([
        base_model,
        Flatten(),
        Dense(256, activation="relu"),
        Dropout(0.5),
        Dense(1, activation="sigmoid")  # Binary classification
    ])

    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

# Load pretrained model (or train a new one if needed)
def load_pretrained_model():
    model_path = "deepfake_audio_model.h5"  # Ensure this model is available
    try:
        model = tf.keras.models.load_model(model_path)
        print("Loaded pretrained model successfully!")
    except:
        print("No pretrained model found! Creating a new model (but not trained).")
        model = build_vgg16_model(input_shape=(40, 50, 3))
    return model

# Predict if the input audio is real or fake
def predict_audio(file_path):
    model = load_pretrained_model()
    audio_features = preprocess_audio(file_path)

    prediction = model.predict(audio_features)
    return "Fake Audio" if prediction[0][0] > 0.5 else "Real Audio"

# Main Execution: User provides input file
if __name__ == "__main__":
    test_audio = input("Enter the path of the audio file: ")

    # Display MFCC Image
    print("Displaying MFCC Image for the input audio file...")
    plot_mfcc(test_audio)

    # Make Prediction
    result = predict_audio(test_audio)
    print(f"Prediction: {result}")