## Voice Authentication and Face Recognition

In [None]:
import tensorflow as tf
import numpy as np
import os
import glob
import pickle
import cv2
import time
from numpy import genfromtxt

from keras import backend as K
from keras.models import load_model
K.set_image_data_format('channels_first')
np.set_printoptions(threshold=np.inf)


import pyaudio
from IPython.display import Audio, display, clear_output
import wave
from scipy.io.wavfile import read
from sklearn.mixture import GaussianMixture 
import warnings
warnings.filterwarnings("ignore")

from sklearn import preprocessing
import python_speech_features as mfcc

: 

## Audio processing

In [None]:
#Calculate and returns the delta of given feature vector matrix
def calculate_delta(array):
    rows,cols = array.shape
    deltas = np.zeros((rows,20))
    N = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= N:
            if i-j < 0:
                first = 0
            else:
                first = i-j
            if i+j > rows -1:
                second = rows -1
            else:
                second = i+j
            index.append((second,first))
            j+=1
        deltas[i] = ( array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]])) ) / 10
    return deltas

#convert audio to mfcc features
def extract_features(audio,rate):    
    mfcc_feat = mfcc.mfcc(audio,rate, 0.025, 0.01,20,appendEnergy = True, nfft=1103)
    mfcc_feat = preprocessing.scale(mfcc_feat)
    delta = calculate_delta(mfcc_feat)

    #combining both mfcc features and delta
    combined = np.hstack((mfcc_feat,delta)) 
    return combined

In [35]:
def play_back(path):
    audio = pyaudio.PyAudio()
    
    with wave.open(path, 'rb') as wf:
        stream = audio.open(format=audio.get_format_from_width(wf.getsampwidth()),
                            channels=wf.getnchannels(),
                            rate=wf.getframerate(),
                            output=True
                        )
        data = wf.readframes(1024)
        while data:
            stream.write(data)
            data = wf.readframes(1024)
        
        stream.stop_stream()
        stream.close()
    audio.terminate()

## Add a New User

In [83]:
def add_user():
    
    name = input("Enter Name:")
     # check for existing database
    user_directory = f"./voice_database/{name}"
    if os.path.exists(user_directory):
        print("User already exists!")
        
    else:
        #if database not exists than creating new database
        os.makedirs(user_directory)
        
        #Voice authentication
        FORMAT = pyaudio.paInt16
        CHANNELS = 2
        RATE = 48000
        CHUNK = 1024
        RECORD_SECONDS = 5
            

        for i in range(3):
            is_usable = False
            while not is_usable:
                print(f"State your name to the microphone (recording #{i})")
                audio = pyaudio.PyAudio()
                # start Recording
                stream = audio.open(format=FORMAT, channels=CHANNELS,
                            rate=RATE, input=True,
                            frames_per_buffer=CHUNK)

                
                frames = []
                
                
                for x in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
                    if x == 0:
                        print("Processing, please wait...")
                    data = stream.read(CHUNK, exception_on_overflow=False)
                    if data:
                        frames.append(data)
                    else:
                        print("Buffer overflow, restart recording")
                        break                    
                if frames:
                    # stop Recording
                    stream.stop_stream()
                    stream.close()
                    audio.terminate()
                
                # saving wav file of speaker
                tmp_file = f"{user_directory}/{i+1}.wav"
                with wave.open(tmp_file, 'wb') as waveFile:
                    waveFile.setnchannels(CHANNELS)
                    waveFile.setsampwidth(audio.get_sample_size(FORMAT))
                    waveFile.setframerate(RATE)
                    waveFile.writeframes(b''.join(frames))
                print("Recording processed. Playing back the recording.")
                play_back(tmp_file)

                res = input("Is the recording fine (yes or no): ")
                if res == "yes":
                    is_usable = True
                else:
                    print("Restarting the recording process")


        gmm_dir = "./gmm_models/"
        os.makedirs(gmm_dir, exist_ok=True)
        count = 1

        features = np.array([])
        for path in os.listdir(user_directory):
            path = os.path.join(user_directory, path)
            
            # reading audio files of speaker
            (sr, audio) = read(path)
            
            # extract 40 dimensional MFCC & delta MFCC features
            vector   = extract_features(audio,sr)

            if features.size == 0:
                features = vector
            else:
                features = np.vstack((features, vector))
                
            # when features of 3 files of speaker are concatenated, then do model training
            if count == 3:    
                gmm = GaussianMixture(n_components = 16, max_iter=200, covariance_type='diag',n_init = 3)
                gmm.fit(features)

                model_path = os.path.join(gmm_dir, f"{name}.gmm")
                with open(model_path, "wb") as mf:
                    # saving the trained gaussian model
                    pickle.dump(gmm, mf)
                print(name + ' added successfully') 
                
                features = np.asarray(())
                count = 0
            count = count + 1

if __name__ == '__main__':
    add_user()

State your name to the microphone (recording #0)
Processing, please wait...
Recording processed. Playing back the recording.
State your name to the microphone (recording #1)
Processing, please wait...
Recording processed. Playing back the recording.
State your name to the microphone (recording #2)
Processing, please wait...
Recording processed. Playing back the recording.




Kandahar added successfully


In [84]:
import pyaudio
import wave

FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 48000
CHUNK = 1024
RECORD_SECONDS = 5
OUTPUT_FILE = "test_recording.wav"

audio = pyaudio.PyAudio()

# Start recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
                    rate=RATE, input=True,
                    frames_per_buffer=CHUNK)
print("Recording...")

frames = []
for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK, exception_on_overflow=False)
    frames.append(data)

# Stop recording
stream.stop_stream()
stream.close()
audio.terminate()

# Save the recording to a file
with wave.open(OUTPUT_FILE, 'wb') as wf:
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(audio.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))

print(f"Recording complete. File saved as {OUTPUT_FILE}")

Recording...
Recording complete. File saved as test_recording.wav


## Delete User

In [None]:
# deletes a registered user from database
def delete_user():
    name = input("Enter name of the user:")
    
    with open("./face_database/embeddings.pickle", "rb") as database:
        db = pickle.load(database)
        user = db.pop(name, None)
    
        if user is not None:
            print('User ' + name + ' deleted successfully')
            # save the database
            with open('face_database/embeddings.pickle', 'wb') as database:
                    pickle.dump(db, database, protocol=pickle.HIGHEST_PROTOCOL)

            # remove the speaker wav files and gmm model
            [os.remove(path) for path in glob.glob('./voice_database/' + name + '/*')]
            os.removedirs('./voice_database/' + name)
            os.remove('./gmm_models/' + name + '.gmm')
        
        else:
            print('No such user !!')

delete_user()

## Voice Authentication and Face Recognition

In [85]:
def recognize():
    # Voice Authentication
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 44100
    CHUNK = 1024
    RECORD_SECONDS = 5
    FILENAME = "./test.wav"

    audio = pyaudio.PyAudio()
   
    # start Recording
    stream = audio.open(format=FORMAT, channels=CHANNELS,
                    rate=RATE, input=True,
                    frames_per_buffer=CHUNK)

    print("recording...")
    frames = []

    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)
    print("finished recording")


    # stop Recording
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # saving wav file 
    waveFile = wave.open(FILENAME, 'wb')
    waveFile.setnchannels(CHANNELS)
    waveFile.setsampwidth(audio.get_sample_size(FORMAT))
    waveFile.setframerate(RATE)
    waveFile.writeframes(b''.join(frames))
    waveFile.close()

    modelpath = "./gmm_models/"

    gmm_files = [os.path.join(modelpath,fname) for fname in 
                os.listdir(modelpath) if fname.endswith('.gmm')]

    models    = []
    for fname in gmm_files:
        with open(fname, 'rb') as f:
            models.append(pickle.load(f))

    speakers   = [fname.split("/")[-1].split(".gmm")[0] for fname 
                in gmm_files]
  
    if len(models) == 0:
        print("No Users in the Database!")
        return
        
    #read test file
    sr,audio = read(FILENAME)
    
    # extract mfcc features
    vector = extract_features(audio,sr)
    log_likelihood = np.zeros(len(models)) 

    #checking with each model one by one
    for i in range(len(models)):
        gmm = models[i]         
        scores = np.array(gmm.score(vector))
        log_likelihood[i] = scores.sum()

    pred = np.argmax(log_likelihood)
    identity = speakers[pred]
   
    # if voice not recognized than terminate the process
    if identity == 'unknown':
            print("Not Recognized! Try again...")
            return
    
    print( "Recognized as - ", identity)

       
if __name__ == '__main__':
    recognize()

recording...
finished recording
Recognized as -  Kandahar


In [None]:
def gfcc_extract(audio, sr):
    gfcc = librosa.feature.mfcc(y=audio. sr=sample_rate, n_mfcc=13)
    return np.mean(gfcc, axis=1)

: 

In [None]:
with open("voicepop_model.pkl", "rb") as f:
    svm = pickle.load(f)

audio, sr = librosa.load(file_path, sr=16000)
gfcc = gfcc_extract()

## Another version of recognizing user will keep runnning until KeyboardInterrupt by the user

In [21]:
from sklearn.mixture import 

In [23]:
import sklearn
sklearn.__version__

'0.19.2'