## Steps:
    1. Record and Train methods
    
        i. This method records the user's voice five times by calling record_audio() method
        ii. Calling noise of the collected voice data by callig clean_audio() method
        iii. Training sklearn model "GaussianMixture" on voice data by callig train_model() method 
        
    2. Test model by calling test method

In [3]:
import os
import wave
import time
import pickle
import pyaudio
import warnings
import glob
from scipy.io import wavfile
import numpy as np
from pydub import AudioSegment
from pydub.playback import play
import noisereduce as nr 
from sklearn import preprocessing
from scipy.io.wavfile import read
import python_speech_features as mfcc
from sklearn.mixture import GaussianMixture
warnings.filterwarnings("ignore")


# Feature and label array

def calculate_delta(array):
    rows,cols = array.shape
    deltas = np.zeros((rows,20))
    N = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= N:
            if i-j < 0:
                first =0
            else:
                first = i-j
            if i+j > rows-1:
                second = rows-1
            else:
                second = i+j
            index.append((second,first))
            j+=1
        deltas[i] = ( array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]])) ) / 10
    return deltas
def extract_features(audio,rate):
    mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)
    mfcc_feature = preprocessing.scale(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature,delta))
    return combined
def record_audio():
    Name =(input("Please Enter Your Name:"))
    os.mkdir(f"training_set/{Name}")

    for count in range(5):
        FORMAT = pyaudio.paInt16
        CHANNELS = 1
        RATE = 44100
        CHUNK = 512
        RECORD_SECONDS = 10
        device_index = 2
        audio = pyaudio.PyAudio()
        stream = audio.open(format=FORMAT, channels=CHANNELS,
                        rate=RATE, input=True,frames_per_buffer=CHUNK)
        print ("recording started")
        Recordframes = []
        for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
            data = stream.read(CHUNK)
            Recordframes.append(data)
        print ("recording stopped")
        stream.stop_stream()
        stream.close()
        audio.terminate()
        OUTPUT_FILENAME=Name+str(count)+".wav"
        WAVE_OUTPUT_FILENAME=os.path.join("training_set",Name,OUTPUT_FILENAME)
        waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
        waveFile.setnchannels(CHANNELS)
        waveFile.setsampwidth(audio.get_sample_size(FORMAT))
        waveFile.setframerate(RATE)
        waveFile.writeframes(b''.join(Recordframes))
        waveFile.close()
    clean_audio(Name)
    train_model(Name)
    

def clean_noise(Name):
    for name in glob.glob('./training_set/'+Name+'/*.wav'):
        rate, data = wavfile.read(name)
        reduced_noise = nr.reduce_noise(y=data, sr=rate,prop_decrease = 0.8)
        filename = name.split('\\')[1]
        wavfile.write('./training_set/'+Name+'/'+filename, rate, reduced_noise)
    


def train_model(name):
    source   = f"./training_set/{name}/"   
    dest = "trained_models\\"
    file_paths = glob.glob(f"training_set/{name}/*.wav")
#     train_file = "training_set_addition.txt"
#     file_paths = open(train_file,'r')
    count = 1

    features = np.asarray(())
    for _ in file_paths:
        path = _.split("\\")[1]
#         print(a)
#     for path in file_paths:
#         path = path.strip()
        sr,audio = read(source + path)
        vector   = extract_features(audio,sr)
        if features.size == 0:
            features = vector
        else:
            features = np.vstack((features, vector))
        if count == 5:
            gmm = GaussianMixture(n_components = 6, max_iter = 200, covariance_type='diag',n_init = 3)
            gmm.fit(features)
            # dumping the trained gaussian model
            picklefile = path.split(".")[0]
            picklefile = picklefile[:-1]
            picklefile = picklefile +".gmm"
            pickle.dump(gmm,open(dest + picklefile,'wb'))
            print('+ modeling completed for speaker:',picklefile," with data point = ",features.shape)
            features = np.asarray(())
            count = 0
        count = count + 1
def test_model():
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 44100
    CHUNK = 512
    RECORD_SECONDS = 10
    device_index = 2
    audio = pyaudio.PyAudio()
    stream = audio.open(format=FORMAT, channels=CHANNELS,
                    rate=RATE, input=True,frames_per_buffer=CHUNK)
    print ("recording started")
    Recordframes = []
    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        Recordframes.append(data)
    print ("recording stopped")
    stream.stop_stream()
    stream.close()
    audio.terminate()
    OUTPUT_FILENAME="sample.wav"
    WAVE_OUTPUT_FILENAME=os.path.join("testing_set",OUTPUT_FILENAME)
    waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    waveFile.setnchannels(CHANNELS)
    waveFile.setsampwidth(audio.get_sample_size(FORMAT))
    waveFile.setframerate(RATE)
    waveFile.writeframes(b''.join(Recordframes))
    waveFile.close()
    
    for name in glob.glob('./testing_set/*.wav'):
        rate, data = wavfile.read(name)
        reduced_noise = nr.reduce_noise(y=data, sr=rate)
        wavfile.write('./testing_set/sample.wav', rate, reduced_noise)
    
    source   = "./testing_set/sample.wav"
    modelpath = "./trained_models/"
    # Read Audio Files
    gmm_files = [os.path.join(modelpath,fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm')]
    #Load the Gaussian gender Models
    models    = [pickle.load(open(fname,'rb')) for fname in gmm_files]

    speakers   = [i.split('.gmm')[0] for i in os.listdir(modelpath)]


    sr,audio = read(source)
    vector   = extract_features(audio,sr)
    log_likelihood = np.zeros(len(models))
    for i in range(len(models)):
        gmm    = models[i]  #checking with each model one by one
        scores = np.array(gmm.score(vector))
        log_likelihood[i] = scores.sum()

    winner = np.argmax(log_likelihood)
    print(winner)
    print("\tdetected as - ", speakers[winner])
    
    

while True:
    choice=int(input("\n 1.Record audio for training \n 2.Test Model \n 3.Exit \n"))
    if(choice==1):
        record_audio()
    elif(choice==2):
        test_model()
    if(choice>=3):
        break
        


 1.Record audio for training 
 2.Test Model 
 3.Exit 
2
recording started
recording stopped
10
	detected as -  test

 1.Record audio for training 
 2.Test Model 
 3.Exit 
3
