In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Import Libraries

In [None]:
!pip install resampy

In [86]:
import numpy as np
import pandas as pd
import os
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import IPython
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Conv2D, MaxPool2D, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# Load Data

In [87]:
audio_files_path = "/content/drive/MyDrive/kaggle/KAGGLE/AUDIO"

In [88]:
folders = os.listdir(audio_files_path)
print(folders)

['FAKE', 'REAL', '.ipynb_checkpoints']


In [89]:
real_audio = "/content/drive/MyDrive/kaggle/DEMONSTRATION/DEMONSTRATION/linus-original-DEMO.wav"
fake_audio = "/content/drive/MyDrive/kaggle/DEMONSTRATION/DEMONSTRATION/linus-to-musk-DEMO.wav"

# Preprocess

In [172]:
import os
import librosa
import numpy as np
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

def process_file(file_path, folder):
    try:
        audio, sample_rate = librosa.load(file_path, sr=46000, res_type="kaiser_fast")
        mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)

        return mfccs_scaled_features, folder
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None, None

def extract_features(folders, audio_files_path):
    data = []
    labels = []

    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = []
        for folder in folders:
            files = os.listdir(os.path.join(audio_files_path, folder))
            for file in files:
                file_path = os.path.join(audio_files_path, folder, file)
                futures.append(executor.submit(process_file, file_path, folder))

        for future in tqdm(futures):
            result, label = future.result()
            if result is not None:
                data.append(result)
                labels.append(label)

    return data, labels

# folders, audio_files_path tanımlamalarınızı burada yapın
data, labels = extract_features(folders, audio_files_path)


100%|██████████| 64/64 [07:49<00:00,  7.33s/it]


In [173]:
feature_df = pd.DataFrame({"features": data, "class": labels})
feature_df.head()

Unnamed: 0,features,class
0,"[-378.7221, 93.80587, -4.8884387, 2.7786431, -...",FAKE
1,"[-385.12756, 95.737946, -9.984296, 2.4591713, ...",FAKE
2,"[-317.6355, 128.61046, -13.663625, 2.4752636, ...",FAKE
3,"[-327.00864, 122.894875, -23.583435, 2.6407838...",FAKE
4,"[-305.55045, 129.73132, -14.059689, 2.3538377,...",FAKE


In [174]:
feature_df["class"].value_counts()

FAKE    56
REAL     8
Name: class, dtype: int64

In [175]:
def label_encoder(column):
    le = LabelEncoder().fit(column)
    print(column.name, le.classes_)
    return le.transform(column)

In [176]:
feature_df["class"] = label_encoder(feature_df["class"])

class ['FAKE' 'REAL']


# Feature Scaling

In [177]:
X = np.array(feature_df["features"].tolist())
y = np.array(feature_df["class"].tolist())

In [178]:
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

In [179]:
y_resampled = to_categorical(y_resampled)

In [180]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [181]:
num_labels = len(feature_df["class"].unique())
num_labels

2

In [182]:
input_shape = feature_df["features"][0].shape
input_shape

(40,)

# Model

# Test

In [183]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Activation
from keras.callbacks import EarlyStopping

# Model parametrelerini ayarlayın
input_shape = (X_train.shape[1], 1)  # X_train'in ikinci boyutunu alır
num_labels = y_train.shape[1]  # y_train'deki etiket sayısı

# LSTM modelini oluşturun
model = Sequential()
model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(256, return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_labels))
model.add(Activation('softmax'))

# Modeli derleyin
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Model özetini göster
model.summary()

# Erken durdurma tanımlayın
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

# Modeli eğitin
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    batch_size=32, epochs=50,
                    callbacks=[early_stopping])

Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_25 (LSTM)              (None, 40, 128)           66560     
                                                                 
 dropout_41 (Dropout)        (None, 40, 128)           0         
                                                                 
 lstm_26 (LSTM)              (None, 256)               394240    
                                                                 
 dropout_42 (Dropout)        (None, 256)               0         
                                                                 
 dense_32 (Dense)            (None, 128)               32896     
                                                                 
 activation_28 (Activation)  (None, 128)               0         
                                                                 
 dropout_43 (Dropout)        (None, 128)             

In [184]:
def detect_fake(filename):
    sound_signal, sample_rate = librosa.load(filename, res_type="kaiser_fast")
    mfcc_features = librosa.feature.mfcc(y=sound_signal, sr=sample_rate, n_mfcc=40)
    mfccs_features_scaled = np.mean(mfcc_features.T, axis=0)
    mfccs_features_scaled = mfccs_features_scaled.reshape(1, -1)
    result_array = model.predict(mfccs_features_scaled)
    print(result_array)
    result_classes = ["FAKE", "REAL"]
    result = np.argmax(result_array[0])
    print("Result:", result_classes[result])


In [185]:
test_real = "/content/drive/MyDrive/kaggle/DEMONSTRATION/DEMONSTRATION/linus-original-DEMO.wav"
test_fake = "/content/drive/MyDrive/kaggle/DEMONSTRATION/DEMONSTRATION/linus-to-musk-DEMO.wav"
detect_fake(test_real)
detect_fake(test_fake)

[[0.46513253 0.53486747]]
Result: REAL
[[0.5399389 0.4600611]]
Result: FAKE


In [186]:
test_real = "/content/drive/MyDrive/kaggle/DEMONSTRATION/DEMONSTRATION/WhatsApp Ptt 2023-12-23 at 22.30.55.ogg"
test_fake = "/content/drive/MyDrive/kaggle/DEMONSTRATION/DEMONSTRATION/p_12071717_982.wav"
detect_fake(test_real)
detect_fake(test_fake)

[[0.48711225 0.5128878 ]]
Result: REAL
[[0.54055244 0.4594476 ]]
Result: FAKE


In [189]:
test_real = "/content/drive/MyDrive/kaggle/DEMONSTRATION/DEMONSTRATION/WhatsApp Ptt 2023-12-24 at 11.43.47.ogg"
test_fake = "/content/drive/MyDrive/kaggle/DEMONSTRATION/DEMONSTRATION/WhatsApp Ptt 2023-12-23 at 22.30.55.ogg"
detect_fake(test_real)
detect_fake(test_fake)

[[0.4733494 0.5266506]]
Result: REAL
[[0.48711225 0.5128878 ]]
Result: REAL
