In [7]:

pip install datasets==2.14.5



In [21]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [9]:
import os
import librosa
import gc
import ast
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import torchaudio
import sys
import datasets
from datasets import load_dataset, load_metric
from transformers import AutoFeatureExtractor
from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer


In [10]:
model_chechpoint = "facebook/wav2vec2-base"
batch_size = 32

In [11]:
metric = load_metric("accuracy")

In [24]:
fake_path = os.path.join('/content/dataset/fake data')
real_path = os.path.join('/content/dataset/real data')

In [25]:
def add_noise(data, noise_factor=0.005):
    noise = np.random.randn(len(data))
    augmented_data = data + noise_factor * noise
    return augmented_data

def time_stretch(data, rate=1.1):
    return librosa.effects.time_strtetch(data, rate)

def pitch_shift(data, sr, n_steps=2):
    return librosa.effects.pitch_shift(data, sr, n_steps)

def extract_features(file_path, augment=False):
    y, sr = librosa.load(file_path, sr=None)

    if augment:
        aug_choice = np.random.choice(['none', 'noise','stretch','pitch'])
        if aug_choice == 'noise':
            y = add_noise(y)
        elif aug_choice == 'stretch':
            y = time_stretch(y, rate=np.random.uniform(0.8, 1.2))
        elif aug_choice == 'pitch':
            y = pitch_shift(y, sr, n_steps=np.random.randint(-3, 3))

    # Extract 40 MFCC(Mel-frequency cepstral coefficients)
    mfccs = lilbrosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfccs_mean = np.mean(mfccs, axis=1)

    # Extract additional features
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr))
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
    tonnetz = np.mean(librosa.feature.tonnetz(y=y, sr=sr))
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))

    # Combine all features into single vector
    features = np.hstack([mfccs_mean, spectral_centroid, spectral_bandwidth, spectral_contrast, spectral_rolloff, chroma, tonnetz, zero_crossing_rate])

    return features

In [26]:
def load_dataset(fake_path, real_path):
    x = []
    y = []

    # Get list of files from both fake and real directories
    fake_files = [file for file in os.listdir(fake_path) if file.endswith('.wav')]
    real_files = [file for file in os.listdir(real_path) if file.endswith('.wav')]

    # Find the minimum number of files between the two classes for balancing
    min_files = min(len(fake_files), len(real_files))

    print(f"processing {min_files} 'fake' and {min_files} 'real' audio files for balance")

    # Process fake audio files  (only up to min_files to balance)
    for file_name in tqdm(fake_files[:min_files], desc="Fake files", unit="file"):
        file_path =os.path.join(fake_path, file)
        features = extract_features(file_path)
        x.append(features)
        y.append(1)  # Label 1 for fake

    # Process real audio files( only upto min_files to balance)
    for file_name in tqdm(real_files[:min_files], desc="Real files", unit="file"):
        file_path = os.path.join(real_path, file_name)
        features = extract_features(file_path)
        x.append(features)
        y.append(0) # Label 0 for real

    print(f"Number of 'fake' files processed: {min_files}")
    print(f"Number of 'real' files processed: {min_files}")

    return np.array(x), np.array(y), min_files, min_files

In [None]:
# Load the dataset with progress bars
x, y, fake_files_count, real_files_count = load_dataset(fake_path, real_path)

In [None]:
class_labels = ['Fake', 'Real']
class_counts = [fake_files_count, real_files_count]

plt.figure(figsize=(6, 4))
sns.barplot(x=class_labels, y=class_counts, palette='viridis')
plt.title('distribution of fake and real audio files')
plt.xlabel('Class')
plt.ylabel('Number of files')
plt.show()

In [None]:
  X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier(n_estimators= 100, random_state=42)
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100: 2f}%")


In [None]:
importances = clf.feature_importances_
feature_names = [f'MFCC {}'i+1 for i in range(40)] + [
    'Spectral_Centroid',
    'Spectral_Bandwidth',
    'Spectral_Contrast',
    'Spectral_Rolloff',
    'Chroma',
    'Tonnetz',
    'Zero_Crossing_Rate',
    'RMSE'
]

plt.figure(figsize=(12, 8))
sns.barplot(x=importances, y=feature_names, palette='magma')
plt.title('Feature Importance from Random Foresst Classifier')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.show()

In [None]:
import joblib
from sklearn.metrics import classification_report

model_filename = "random forest model.pkl"
joblib.dump(clf, model_filename)
print(f"Model saved as {model_filename}")

loaded_model = joblib.load(model_filename)
print("model loaded successful")

y_pred = loaded_model.predict(X_test)
report = classification_report(y_test, y_pred, target_names=["Real", "Fake"])
print("classification report: \n", report)

def predict_fake_or_real(wav_file, model):
    features = extract_features(wav_file)
    features = features.reshape(1, -1)
    prediction = model.predict(features)
    if prediction[0] == 1:
        return "Fake"
    else:
        return "Real"

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Bidirectional, GlobalMaxPooling1D, Input

class_labels = ['Fake', 'Real']
class_counts = [fake_files_count, real_files_count]

plt.figure(figsize=(6, 4))
sns.barplot(x=class_labels, y=class_counts, palette='viridis')
plt.title('Distribution of Fake and Real Audio Files')
plt.xlabel('Class')
plt.ylabel('Number of Files')
plt.show()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

def build_model(input_shape):
    inputs = Input(shape=input_shape)

    x = Conv1D(filters=64, kernel_size=3, padding='same', activation='relu')(inputs)
    x = MaxPooling1D(pool_size=2)(x)

    x = Bidirectional(LSTM(64, return_sequences=True))(x)

    attention = Attention()([x, x])
    attention = GlobalMaxPooling1D()(attention)

    x = Dense(128, activation='relu')(attention)
    x = Dropout(0.5)(x)
    outputs = Dense(1, activation='sigmoid')(x)
    model = Model(inputs, outputs)
    return model

input_shape = (X_train.shape[1], X_train.shape[2])

model.summary()

history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_split=0.2,verbose=1)

y_pred = (model.predict(X_test) > 0.5).astype("int32")
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy * 100:.2f}%")
print(classification_report(y_test, y_pred, target_names=class_labels))

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('loss')
plt.legend()

plt.show()