In [2]:
import os
import shutil
import librosa
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Function to convert audio to waveform image
def save_waveform_image(audio_path, save_path):
    try:
        y, sr = librosa.load(audio_path, sr=None)
        plt.figure(figsize=(14, 5))
        plt.plot(y)
        plt.title('Waveform')
        plt.savefig(save_path)
        plt.close('all')
    except:
        print('fail')
        pass
# Preprocess audio files to waveform images

def preprocess_audio_batch(audio_files, audio_dir, image_dir):
    for file in audio_files:
        if file.endswith(('.wav', '.flac')):
            audio_path = os.path.join(audio_dir, file)
            relative_path = os.path.relpath(audio_path, audio_dir)
            save_path = os.path.join(image_dir, relative_path)
            
            if file.endswith('.wav'):
                save_path = save_path.replace('.wav', '.png')
            elif file.endswith('.flac'):
                save_path = save_path.replace('.flac', '.png')
            
            save_dir = os.path.dirname(save_path)
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            
            save_waveform_image(audio_path, save_path)
# Organize images into train and test directories
def organize_images(image_dir, train_dir, test_dir, test_size=0.2):
    categories = ['real2000', 'fake2000']
    for category in categories:
        category_dir = os.path.join(image_dir, category)
        images = [os.path.join(category_dir, img) for img in os.listdir(category_dir) if img.endswith('.png')]
        train_images, test_images = train_test_split(images, test_size=test_size)
        
        for img_set, set_dir in zip([train_images, test_images], [train_dir, test_dir]):
            category_set_dir = os.path.join(set_dir, category)
            if not os.path.exists(category_set_dir):
                os.makedirs(category_set_dir)
            for img_path in img_set:
                shutil.copy(img_path, category_set_dir)


In [12]:
real_audio_dir =r'C:\Users\alisa\Downloads\10krealaudio'
fake_audio_dir = r'C:\Users\alisa\Downloads\largerfake'
real_image_dir = '10kimages/real2000'
fake_image_dir = '10kimages/fake2000'
image_dir = '10kimages'
train_dir = '10kimages/train'
test_dir = '10kimages/test'


In [3]:
all_files =[f for f in os.listdir(real_audio_dir) if f.endswith(('.wav', '.flac'))]


In [4]:
batch1 = all_files[:2000]
batch2 = all_files[2001:4000]
batch3 = all_files[4001:6000]
batch4 = all_files[6001:8000]
batch5 = all_files[8001:10000]

In [5]:
preprocess_audio_batch(batch1, real_audio_dir, real_image_dir)

  y, sr = librosa.load(audio_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail
fail


In [6]:
preprocess_audio_batch(batch2, real_audio_dir, real_image_dir)

In [7]:
preprocess_audio_batch(batch3, real_audio_dir, real_image_dir)

In [8]:
preprocess_audio_batch(batch4, real_audio_dir, real_image_dir)

In [9]:
preprocess_audio_batch(batch5, real_audio_dir, real_image_dir)

In [13]:
fake_files =[f for f in os.listdir(fake_audio_dir) if f.endswith(('.wav', '.flac'))]
fakebatch1 = fake_files[:2000]
fakebatch2= fake_files[2001:4000]
fakebatch3 = fake_files[4001:6000]
fakebatch4 = fake_files[6001:8000]
fakebatch5 = fake_files[8001:10000]

In [14]:
preprocess_audio_batch(fakebatch1, fake_audio_dir, fake_image_dir)

In [15]:
preprocess_audio_batch(fakebatch2, fake_audio_dir, fake_image_dir)

In [16]:
preprocess_audio_batch(fakebatch3, fake_audio_dir, fake_image_dir)

In [17]:
preprocess_audio_batch(fakebatch4, fake_audio_dir, fake_image_dir)

In [18]:
preprocess_audio_batch(fakebatch5, fake_audio_dir, fake_image_dir)

In [19]:
organize_images(image_dir, train_dir, test_dir)


In [20]:
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.15)
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(224, 224),
                                                    batch_size=32,
                                                    class_mode='binary',
                                                    subset='training')
validation_generator = train_datagen.flow_from_directory(train_dir,
                                                         target_size=(224, 224),
                                                         batch_size=32,
                                                         class_mode='binary',
                                                         subset='validation')

from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = Flatten()(base_model.output)
x = Dense(512, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)  # Binary classification
model = Model(inputs=base_model.input, outputs=x)

# Freeze the base model
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_generator,
                    validation_data=validation_generator,
                    epochs=5,
                    steps_per_epoch=train_generator.samples // train_generator.batch_size,
                    validation_steps=validation_generator.samples // validation_generator.batch_size)

# Unfreeze some layers of the base model
for layer in base_model.layers[-3:]:  # Unfreeze the last 4 layers
    layer.trainable = True

# Recompile the model with a lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),  # Lower learning rate
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Continue training (fine-tuning)
history_fine = model.fit(train_generator,
                         validation_data=validation_generator,
                         epochs=10,
                         steps_per_epoch=train_generator.samples // train_generator.batch_size,
                         validation_steps=validation_generator.samples // validation_generator.batch_size)


Found 13569 images belonging to 2 classes.
Found 2393 images belonging to 2 classes.
Epoch 1/5


  self._warn_if_super_not_called()


[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m770s[0m 2s/step - accuracy: 0.7210 - loss: 0.8619 - val_accuracy: 0.9671 - val_loss: 0.1959
Epoch 2/5
[1m  1/424[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10:28[0m 1s/step - accuracy: 0.8750 - loss: 0.3909

  self.gen.throw(typ, value, traceback)


[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8750 - loss: 0.3909 - val_accuracy: 1.0000 - val_loss: 0.1812
Epoch 3/5
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m734s[0m 2s/step - accuracy: 0.8428 - loss: 0.3787 - val_accuracy: 0.9286 - val_loss: 0.2359
Epoch 4/5
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8438 - loss: 0.3931 - val_accuracy: 0.8800 - val_loss: 0.3005
Epoch 5/5
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m731s[0m 2s/step - accuracy: 0.8589 - loss: 0.3445 - val_accuracy: 0.9768 - val_loss: 0.1714
Epoch 1/10
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m782s[0m 2s/step - accuracy: 0.8746 - loss: 0.3185 - val_accuracy: 0.9481 - val_loss: 0.1979
Epoch 2/10
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8750 - loss: 0.2983 - val_accuracy: 0.9200 - val_loss: 0.2332
Epoch 3/10
[1m424/424[0m [32m━━━━━━━

In [25]:
import librosa
import matplotlib.pyplot as plt

# Load the audio file
audio_path = r'C:\Users\alisa\Downloads\record_out.wav'
y, sr = librosa.load(audio_path, sr=None)

# Plot and save the waveform image
plt.figure(figsize=(14, 5))
plt.plot(y)
plt.title('Waveform')
waveform_image_path = 'workplz.png'
plt.savefig(waveform_image_path)
plt.close()


import tensorflow as tf
from tensorflow.keras.preprocessing import image

# Load the image
img = image.load_img(waveform_image_path, target_size=(224,224))

# Convert the image to a numpy array
img_array = image.img_to_array(img)

# Normalize the image
img_array /= 255.0

# Expand dimensions to match the input shape of the model
img_array = np.expand_dims(img_array, axis=0)



model.predict(img_array)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step


array([[0.996511]], dtype=float32)

In [26]:
model.save('10kimagesVGG16.keras')

In [27]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(test_dir,
                                                  target_size=(224, 224),
                                                  batch_size=32,
                                                  class_mode='binary')
loss, accuracy = model.evaluate(test_generator)
print(f'Test Accuracy: {accuracy:.2f}')

Found 3992 images belonging to 2 classes.


  self._warn_if_super_not_called()


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 2s/step - accuracy: 0.8911 - loss: 0.2957
Test Accuracy: 0.89


In [28]:
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.15)
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(128, 128),
                                                    batch_size=32,
                                                    class_mode='binary',
                                                    subset='training')
validation_generator = train_datagen.flow_from_directory(train_dir,
                                                         target_size=(128, 128),
                                                         batch_size=32,
                                                         class_mode='binary',
                                                         subset='validation')

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_generator,
                    validation_data=validation_generator,
                    epochs=20,
                    steps_per_epoch=train_generator.samples // train_generator.batch_size,
                    validation_steps=validation_generator.samples // validation_generator.batch_size)



Found 13569 images belonging to 2 classes.
Found 2393 images belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20


  self._warn_if_super_not_called()


[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 211ms/step - accuracy: 0.6918 - loss: 0.6173 - val_accuracy: 0.8809 - val_loss: 0.3352
Epoch 2/20
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 298us/step - accuracy: 0.8438 - loss: 0.3444 - val_accuracy: 0.7600 - val_loss: 0.4486
Epoch 3/20


  self.gen.throw(typ, value, traceback)


[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 206ms/step - accuracy: 0.8447 - loss: 0.3742 - val_accuracy: 0.8792 - val_loss: 0.3392
Epoch 4/20
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231us/step - accuracy: 0.9062 - loss: 0.3424 - val_accuracy: 0.9600 - val_loss: 0.2820
Epoch 5/20
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 206ms/step - accuracy: 0.8584 - loss: 0.3447 - val_accuracy: 0.8801 - val_loss: 0.2894
Epoch 6/20
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 249us/step - accuracy: 0.8750 - loss: 0.3614 - val_accuracy: 0.8400 - val_loss: 0.3280
Epoch 7/20
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 206ms/step - accuracy: 0.8706 - loss: 0.3143 - val_accuracy: 0.8944 - val_loss: 0.3130
Epoch 8/20
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 201us/step - accuracy: 0.7500 - loss: 0.4193 - val_accuracy: 0.8800 - val_loss: 0.2673
Epoch 9/20
[1m424/424[

In [29]:
model.save('10kimagesCNN.keras')

In [4]:
import librosa
import matplotlib.pyplot as plt

# Load the audio file
audio_path = r'C:\Users\alisa\Downloads\ali.wav'
y, sr = librosa.load(audio_path, sr=None)

# Plot and save the waveform image
plt.figure(figsize=(14, 5))
plt.plot(y)
plt.title('Waveform')
waveform_image_path = 'workplz.png'
plt.savefig(waveform_image_path)
plt.close()


import tensorflow as tf
from tensorflow.keras.preprocessing import image

# Load the image
img = image.load_img(waveform_image_path, target_size=(128,128))

# Convert the image to a numpy array
img_array = image.img_to_array(img)

# Normalize the image
img_array /= 255.0

# Expand dimensions to match the input shape of the model
img_array = np.expand_dims(img_array, axis=0)

model = tf.keras.models.load_model('10kimagesCNN.keras')

model.predict(img_array)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step


array([[1.]], dtype=float32)

In [33]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(test_dir,
                                                  target_size=(128, 128),
                                                  batch_size=32,
                                                  class_mode='binary')
loss, accuracy = model.evaluate(test_generator)
print(f'Test Accuracy: {accuracy:.2f}')

Found 3992 images belonging to 2 classes.
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 151ms/step - accuracy: 0.8618 - loss: 0.3545
Test Accuracy: 0.86


In [36]:
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.15)
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(224, 224),
                                                    batch_size=32,
                                                    class_mode='binary',
                                                    subset='training')
validation_generator = train_datagen.flow_from_directory(train_dir,
                                                         target_size=(224, 224),
                                                         batch_size=32,
                                                         class_mode='binary',
                                                         subset='validation')

from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = Flatten()(base_model.output)
x = Dense(512, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)  # Binary classification
model = Model(inputs=base_model.input, outputs=x)

# Freeze the base model
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_generator,
                    validation_data=validation_generator,
                    epochs=10,
                    steps_per_epoch=train_generator.samples // train_generator.batch_size,
                    validation_steps=validation_generator.samples // validation_generator.batch_size)

# Unfreeze some layers of the base model
for layer in base_model.layers[-5:]:  # Unfreeze the last 4 layers
    layer.trainable = True

# Recompile the model with a lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),  # Lower learning rate
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Continue training (fine-tuning)
history_fine = model.fit(train_generator,
                         validation_data=validation_generator,
                         epochs=24,
                         steps_per_epoch=train_generator.samples // train_generator.batch_size,
                         validation_steps=validation_generator.samples // validation_generator.batch_size)


Found 13569 images belonging to 2 classes.
Found 2393 images belonging to 2 classes.
Epoch 1/10
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m873s[0m 2s/step - accuracy: 0.7466 - loss: 0.7860 - val_accuracy: 0.9519 - val_loss: 0.2021
Epoch 2/10
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8750 - loss: 0.4610 - val_accuracy: 0.9200 - val_loss: 0.2269
Epoch 3/10
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m725s[0m 2s/step - accuracy: 0.8366 - loss: 0.3839 - val_accuracy: 0.9611 - val_loss: 0.1969
Epoch 4/10
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8750 - loss: 0.4179 - val_accuracy: 0.8800 - val_loss: 0.2009
Epoch 5/10
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m841s[0m 2s/step - accuracy: 0.8513 - loss: 0.3614 - val_accuracy: 0.9721 - val_loss: 0.1620
Epoch 6/10
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.90

In [37]:
model.save('largerepochlargerdata.keras')

In [41]:
import librosa
import matplotlib.pyplot as plt

# Load the audio file
audio_path = r'C:\Users\alisa\Downloads\FAKE\target generated\MULTIBANDMELGANLJ043-0145_gen.wav'
y, sr = librosa.load(audio_path, sr=None)

# Plot and save the waveform image
plt.figure(figsize=(14, 5))
plt.plot(y)
plt.title('Waveform')
waveform_image_path = 'workplz.png'
plt.savefig(waveform_image_path)
plt.close()


import tensorflow as tf
from tensorflow.keras.preprocessing import image

# Load the image
img = image.load_img(waveform_image_path, target_size=(224,224))

# Convert the image to a numpy array
img_array = image.img_to_array(img)

# Normalize the image
img_array /= 255.0

# Expand dimensions to match the input shape of the model
img_array = np.expand_dims(img_array, axis=0)



model.predict(img_array)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step


array([[0.13265048]], dtype=float32)