In [None]:
!pip install tensorflow-gpu==2.8.0
import tensorflow as tf

!pip install tensorflow-io==0.25.0

In [None]:
import os
from matplotlib import pyplot as plt
import tensorflow as tf
import tensorflow_io as tfio
import librosa
import librosa.display
import IPython.display as ipd
import numpy as np

In [None]:
"OBTAINING A LIST OF AUDIO FILES"
personal_files = os.path.join('.','voice_recods','me_wav')
personal_files_2 = os.path.join('.','voice_recods','new_me_wav')
other_files = os.path.join('.','voice_recods','new_others_wav')

In [None]:
ls

[0m[01;34msample_data[0m/  [01;34mvoice_recods[0m/


In [None]:
"""CREATING TENSORFLOW DATASET"""
pos = tf.data.Dataset.list_files(personal_files+'/*.wav')
pos_2 = tf.data.Dataset.list_files(personal_files_2+'/*.wav')
neg = tf.data.Dataset.list_files(other_files+'/*.wav')

"""Adding labels"""
positives = tf.data.Dataset.zip((pos, tf.data.Dataset.from_tensor_slices(tf.ones(len(pos)))))
positives_2 = tf.data.Dataset.zip((pos_2, tf.data.Dataset.from_tensor_slices(tf.ones(len(pos)))))
negatives = tf.data.Dataset.zip((neg, tf.data.Dataset.from_tensor_slices(tf.zeros(len(neg)))))
data = positives.concatenate(negatives)
data = data.concatenate(positives_2)

In [None]:
#DEFINING DATA LOADING FUNCTION
def load_wav(filename):
    file_contents = tf.io.read_file(filename) #loading file
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1) #simplfying audio in one dimension representation
    #removes trailing axis
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    #The signal goes from 44100 Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

In [None]:
"""Pre processing"""
def preprocess(file_path):
    wav = load_wav(file_path)
    wav = wav[:25000]
    zero_padding = tf.zeros([25000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav],0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram

In [None]:
wav = preprocess('./voice_recods/me_wav/Recording (77).wav')
np.shape(wav)

TensorShape([772, 257, 1])

In [None]:
# Creating spectrograms
data_op = data.map(preprocess)
data_op = data_op.cache()
data_op = data_op.shuffle(buffer_size=1000)
data_op = data_op.batch(8)
data_op = data_op.prefetch(8) #avoids bottlenecking



In [None]:
#Splitting into trainning and testing
print(len(data_op))
train = data_op.take(16)
test = data_op.skip(16).take(8)

45


In [None]:
#importing packages for building the AI model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D

In [None]:
"""Designing the network with convolutional and dense layers"""
model = Sequential()
model.add(Conv2D(16, (3,3), activation='relu', input_shape=(772,257,1)))
model.add(Conv2D(16, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=None, padding="valid", data_format=None))
#model.add(MaxPooling2D(pool_size=(2,2), strides=None, padding="valid", data_format=None))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
opt = tf.keras.optimizers.Adam(learning_rate = 0.01)

In [None]:
model.compile(optimizer=opt,loss='BinaryCrossentropy', metrics="accuracy")
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_12 (Conv2D)          (None, 770, 255, 16)      160       
                                                                 
 conv2d_13 (Conv2D)          (None, 768, 253, 16)      2320      
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 384, 126, 16)     0         
 2D)                                                             
                                                                 
 flatten_6 (Flatten)         (None, 774144)            0         
                                                                 
 dense_17 (Dense)            (None, 64)                49545280  
                                                                 
 dense_18 (Dense)            (None, 64)                4160      
                                                      

In [None]:
"""Training the model"""
hist = model.fit(train, epochs=1,validation_data=test)



In [None]:
from google.colab import files

In [None]:
!mkdir -p saved_model
model.save('saved_model\my_model_angel')

In [None]:
predictions = model.predict(test)

In [None]:
audio_slices = tf.keras.utils.timeseries

In [None]:
predictions

array([[1.0000000e+00],
       [1.0000000e+00],
       [1.0000000e+00],
       [2.2193979e-14],
       [9.9998736e-01],
       [1.7726925e-01],
       [1.0000000e+00],
       [1.4451084e-01],
       [9.9999845e-01],
       [2.4208400e-01],
       [9.9980259e-01],
       [9.9999917e-01],
       [1.3975117e-07],
       [8.2715858e-17],
       [1.0900059e-12],
       [1.0000000e+00],
       [9.9997419e-01],
       [5.4610912e-12],
       [1.0000000e+00],
       [1.0000000e+00],
       [1.0000000e+00],
       [1.0000000e+00],
       [1.0000000e+00],
       [6.1410666e-04],
       [1.0000000e+00],
       [1.0000000e+00],
       [1.0000000e+00],
       [1.0000000e+00],
       [9.9999958e-01],
       [6.6128560e-14],
       [1.5283927e-16],
       [1.0000000e+00],
       [2.0408321e-27],
       [1.0000000e+00],
       [1.0000000e+00],
       [1.0000000e+00],
       [9.9999988e-01],
       [1.0000000e+00],
       [1.0000000e+00],
       [1.0000000e+00],
       [3.2843333e-14],
       [1.000000