In [2]:

import os
import numpy as np
from pydub import AudioSegment
import librosa
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder




In [3]:

# Function to convert MP3 to WAV
def convert_mp3_to_wav(mp3_file, wav_file):
    audio = AudioSegment.from_mp3(mp3_file)
    audio.export(wav_file, format="wav")


In [4]:

# Function to load raw audio waveform
def load_audio_waveform(file_name, max_length=22050):
    audio, sample_rate = librosa.load(file_name, sr=22050)
    if len(audio) > max_length:
        audio = audio[:max_length]
    elif len(audio) < max_length:
        audio = np.pad(audio, (0, max_length - len(audio)))
    return audio


In [5]:

# Load dataset
def load_data(mp3_files, labels, max_length=22050):
    features = []
    for file in mp3_files:
        wav_file = file.replace('.mp3', '.wav')
        convert_mp3_to_wav(file, wav_file)
        features.append(load_audio_waveform(wav_file, max_length))
    return np.array(features), np.array(labels)



In [47]:
real_dir = r'C:\Users\alisa\Downloads\REAL2'

fake_dir = r'C:\Users\alisa\Downloads\FAKE\target generated'


In [61]:
# Iterate through the real files and append to the lists
audio = []
labels = []

for filename in os.listdir(real_dir):
    filepath = os.path.join(real_dir, filename)
    if os.path.isfile(filepath):  # Check if it is a file
        audio.append(filepath)
        labels.append(0)

# Iterate through the fake files and append to the lists
for filename in os.listdir(fake_dir):
    filepath = os.path.join(fake_dir, filename)
    if os.path.isfile(filepath):  # Check if it is a file
        audio.append(filepath)
        labels.append(1)


In [140]:
X = audio[40000:41000]+audio[-1000:]
y = labels[40000:41000]+labels[-1000:]

In [63]:
print(X[:5])

['C:\\Users\\alisa\\Downloads\\REAL2\\LJ009-0079.wav', 'C:\\Users\\alisa\\Downloads\\REAL2\\LJ009-0080.wav', 'C:\\Users\\alisa\\Downloads\\REAL2\\LJ009-0081.wav', 'C:\\Users\\alisa\\Downloads\\REAL2\\LJ009-0082.wav', 'C:\\Users\\alisa\\Downloads\\REAL2\\LJ009-0083.wav']


In [130]:
print(y)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [64]:
print(len(X))

2000


In [148]:
print(len(np.unique(y)))

2


In [149]:
features = []
feature_labels =[]
# Function to extract features from WAV files
def extract_features(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return mfccs_scaled

for file_path,label in zip(X,y):
    try:
        mcffs = extract_features(file_path)
        features.append(mcffs)
        feature_labels.append(label)
    except:
        print("lost")



In [154]:
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical
# Preprocess data
features = np.array(features)
#feature_labels = to_categorical(feature_labels, num_classes=2)
feature_labels = np.array(feature_labels)
if len(np.unique(feature_labels))==2:
    feature_labels = to_categorical(feature_labels, num_classes=2)

feature_labels= feature_labels.astype(int)
# Split data
X_train, X_test, y_train, y_test = train_test_split(features, feature_labels, test_size=0.2, random_state=42)

print(X_train[1])
print(y_train[1])

[-3.1493207e+02  7.0563828e+01  6.8560605e+00  1.5130168e+01
 -1.9282280e+01 -2.1722900e+01 -2.1015669e+01 -1.7776907e+01
 -1.8098547e+01 -2.6576335e+00 -3.0560453e+01 -3.6130447e+00
 -7.9012203e+00 -1.7641392e+00 -4.6680136e+00 -1.0502735e+01
 -6.6614370e+00 -9.0987473e+00 -6.0405788e+00 -5.2730613e+00
 -8.9120111e+00 -1.0833778e+01 -2.7983074e+00 -5.8874660e+00
 -5.6092596e+00 -1.9497608e+00 -1.3693511e+00 -1.6678838e+00
 -5.1933393e+00  9.0673089e-01 -4.3491859e+00 -2.1322532e+00
 -2.6487991e-01  2.0365031e-01 -1.4446510e+00  1.6279507e+00
  7.2388929e-01  6.3565475e-01 -1.9485286e+00  1.0032367e-01]
[[[1 0]
  [0 1]]

 [[0 1]
  [1 0]]]


In [None]:

# Build CNN model
model = Sequential()
model.add(Conv1D(16, kernel_size=3, activation='relu', input_shape=(X.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(32, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [152]:
model = Sequential()
model.add(Dense(256, input_shape=(features.shape[1],), activation='relu'))  # Adjust input_shape
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))  # Use sigmoid for binary classification

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [155]:

history = model.fit(X_train, y_train, epochs=50, batch_size=8, validation_data = (X_test,y_test), verbose=1)


Epoch 1/50


ValueError: Arguments `target` and `output` must have the same rank (ndim). Received: target.shape=(8, 2, 2, 2), output.shape=(8, 1)

In [85]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, alpha=0.0001, solver='adam', verbose=10, random_state=42, tol=0.000000001)

# Train the MLP
mlp.fit(X_train, y_train)

Iteration 1, loss = 0.58240463
Iteration 2, loss = 0.35828875
Iteration 3, loss = 0.23388692
Iteration 4, loss = 0.16546878
Iteration 5, loss = 0.12636992
Iteration 6, loss = 0.10067282
Iteration 7, loss = 0.08365644
Iteration 8, loss = 0.07111476
Iteration 9, loss = 0.06179515
Iteration 10, loss = 0.05425683
Iteration 11, loss = 0.04830263
Iteration 12, loss = 0.04332356
Iteration 13, loss = 0.03908242
Iteration 14, loss = 0.03553496
Iteration 15, loss = 0.03248085
Iteration 16, loss = 0.02976133
Iteration 17, loss = 0.02739471
Iteration 18, loss = 0.02526173
Iteration 19, loss = 0.02343100
Iteration 20, loss = 0.02173302
Iteration 21, loss = 0.02021524
Iteration 22, loss = 0.01887836
Iteration 23, loss = 0.01768269
Iteration 24, loss = 0.01654654
Iteration 25, loss = 0.01554117
Iteration 26, loss = 0.01461284
Iteration 27, loss = 0.01378532
Iteration 28, loss = 0.01297565
Iteration 29, loss = 0.01224776
Iteration 30, loss = 0.01161378
Iteration 31, loss = 0.01097581
Iteration 32, los



In [72]:

# Train model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.5015 - loss: 15.5475 - val_accuracy: 0.7150 - val_loss: 0.4640
Epoch 2/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5630 - loss: 3.9169 - val_accuracy: 0.8900 - val_loss: 0.4577
Epoch 3/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6259 - loss: 1.5838 - val_accuracy: 0.7175 - val_loss: 0.5662
Epoch 4/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6907 - loss: 0.7835 - val_accuracy: 0.9725 - val_loss: 0.5043
Epoch 5/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7329 - loss: 0.6601 - val_accuracy: 0.9250 - val_loss: 0.4192
Epoch 6/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7941 - loss: 0.5078 - val_accuracy: 0.9650 - val_loss: 0.2685
Epoch 7/50
[1m50/50[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x1718add45d0>

In [73]:

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9865 - loss: 0.0457 
Test Accuracy: 99.00%


In [86]:
A = audio[39000:40000]+audio[-2000:-1000]
b = labels[39000:40000]+labels[-2000:-1000]

In [87]:
features = []
feature_labels =[]
# Function to extract features from WAV files
def extract_features(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return mfccs_scaled

for file_path,label in zip(A,b):
    try:
        mcffs = extract_features(file_path)
        features.append(mcffs)
        feature_labels.append(label)
    except:
        print("lost")



In [88]:
y_pred = mlp.predict(features)

from sklearn.metrics import accuracy_score
# Evaluate the model
accuracy = accuracy_score(feature_labels, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 0.5405
