In [4]:
import numpy as np
import matplotlib.pyplot as plt
import cmath
import math
import time
import sounddevice as sd
from scipy.io.wavfile import write

In [8]:
#Provided Code
class recordsound():
    """
    recordsound Record your voice for T time sampled at a frequency fs
    solve() generates a sampled signal of your voice and save it into a wav file
    """
    def __init__(self, T, fs):
        """
        :param T: the duration time
        :param fs: the sampling frequency
        """
        self.T = T
        self.fs = fs
                
    def solve(self):
        """
        :return: [x,N]
        :x: the triangular pulse vector x
        :N: The sample duration
        """
        print('start recording \n')
        voicerecording = sd.rec(int(self.T * self.fs), self.fs, 1)
        sd.wait()  # Wait until recording is finished
        print('end recording \n')
        write('myvoice1.wav', self.fs, voicerecording.astype(np.float32))  # Save as WAV file 
        
        return voicerecording
    
import numpy as np
import cmath

class dft():
    def __init__(self, x, fs, K=None):
        """
        :param x: Input vector x contains the discrete signal
        :param fs: Input integer fs contains the sample frequency
        :param K: Input positive integer that determines the number of coeffients
        used to calculate the DFT. If K is not provided, K=length(x).
        """
    # START: SANITY CHECK OF INPUTS.
        if (type(fs) != int) or (fs<=0):
            raise NameError('The frequency fs should be a positive integer.')
        if not isinstance(x, np. ndarray):
            raise NameError('The input signal x must be a numpy array.')
        if isinstance(x, np. ndarray):
            if x.ndim!=1:
                raise NameError('The input signal x must be a numpy vector array.')
        self.x=x
        self.fs=fs
        self.N=len(x)
        if K == None:
            K = len(self.x)
        # START: SANITY CHECK OF INPUTS.
        if (type(K) != int) or (K <= 0) or (K < 0):
            raise NameError('K should be a positive integer.')
        self.K=K
        self.f=np.arange(self.K)*self.fs/self.K # (0:K-1) just creates a vector from 0 to K by steps of 1.
        self.f_c=np.arange(-np.ceil(K/2)+1,np.floor(self.K/2)+1)*self.fs/self.K
        # This accounts for the frequencies
        # centered at zero. I want to be guaranteed that k=0 is always a
        # possible k. Then, I also have to account for both even and odd choices
        # of K, and that's why the floor() function appears to round down the
        # numbers.
    def changeK(self,K):
        """
        :param K: Input positive integer that determines the number of coeffients
        used to calculate the DFT. This function changes the attribute K of the class.
        """
        if (type(K) != int) or (K <= 0) or (K <  0):
            raise NameError('K should be a positive integer.')
        old_K=self.K
        self.K=K
        self.f=np.arange(self.K)*self.fs/self.K # (0:K-1) just creates a vector from 0 to K by steps of 1.
        self.f_c=np.arange(-np.ceil(K/2)+1,np.floor(self.K/2)+1)*self.fs/self.K
        # This accounts for the frequencies
        # centered at zero. I want to be guaranteed that k=0 is always a
        # possible k. Then, I also have to account for both even and odd choices
        # of K, and that's why the floor() function appears to round down the
        # numbers.
        print('The value of K was succefully change from %d to %d'%(old_K,self.K))
        pass

    def solve3(self):
        X=np.fft.fft(self.x,self.K)/np.sqrt(self.N);
        # \\\\\ CENTER FFT.
        X_c=np.roll(X,int(np.ceil(self.K/2-1))) # Circularly shift X to get it centered in f_c==0
        return [self.f,X,self.f_c,X_c]
    

In [5]:
#Problem 1
fs = 8000
T = 1 
num_recs = 10
digits = [1, 2]
digit_recs = []

for digit in digits:
    partial_recs = np.zeros((num_recs, int(T*fs)))
    print('When prompted to speak, say ' + str(digit) + '. \n')
    for i in range(num_recs):
        time.sleep(2)
        digit_recorder = recordsound(T, fs)
        spoken_digit = digit_recorder.solve().reshape(int(T*fs))
        partial_recs[i, :] = spoken_digit
    digit_recs.append(partial_recs)

np.save("recorded_digits.npy", digit_recs)
digit_recs = np.load("recorded_digits.npy")
digits = [1, 2]
num_digits = len(digit_recs)
num_recs, N = digit_recs[0].shape 
fs = 8000
DFTs = []
DFTs_c = []

for digit_rec in digit_recs:
    DFTs_aux = np.zeros((num_recs, N), dtype=np.complex_)
    DFTs_c_aux = np.zeros((num_recs, N), dtype=np.complex_)
    for i in range(num_recs):
        rec_i = digit_rec[i, :]
        energy_rec_i = np.linalg.norm(rec_i)
        rec_i /= energy_rec_i
        DFT_rec_i = dft(rec_i, fs)
        [_, X, _, X_c] = DFT_rec_i.solve3()
        DFTs_aux[i, :] = X 
        DFTs_c_aux[i, :] = X_c
    DFTs.append(DFTs_aux)
    DFTs_c.append(DFTs_c_aux) 

np.save("spoken_digits_DFTs.npy", DFTs)
np.save("spoken_digits_DFTs_c.npy", DFTs_c)

When prompted to speak, say 1. 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

When prompted to speak, say 2. 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 



Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  X_c=np.roll(X,np.int(np.ceil(self.K/2-1))) # Circularly shift X to get it centered in f_c==0


In [6]:
fs = 8000
T = 1
num_recs = 10 
digit_recs = []

partial_recs = np.zeros((num_recs, int(T*fs)))
print('When prompted to speak, say 1 or 2' + '. \n')
for i in range(num_recs):
    time.sleep(2)
    digit_recorder = recordsound(T, fs)
    spoken_digit = digit_recorder.solve().reshape(int(T*fs))
    partial_recs[i, :] = spoken_digit
digit_recs.append(partial_recs)

np.save("test_set.npy", partial_recs)
test_set_audio = partial_recs.reshape(T*fs*num_recs)
file_name = 'test_set_audio_rec.wav'
write(file_name, fs, test_set_audio.astype(np.float32))

When prompted to speak, say 1 or 2. 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 

start recording 

end recording 



In [10]:
#Problem 2
def print_matrix(A, nr_decimals = 2):
    
    nr_digits = np.maximum(np.floor(np.log10(np.amax(np.abs(A)))),0) + 1
    nr_digits = nr_digits + nr_decimals + 3
    nr_digits = "{0:1.0f}".format(nr_digits)
    number_format = "{0: " + nr_digits + "." + str(nr_decimals) + "f}"
    n = len(A)
    m = len(A[0])

    for l in range(m):
        value = " "

        for k in range(n):
        
            value = value + " " + number_format.format(A[k,l])

        print( value )
        
fs = 8000
T = 1
test_set = np.load("test_set.npy")
training_set_DFTs = np.abs(np.load("spoken_digits_DFTs.npy"))

num_digits = len(training_set_DFTs)
_, N = training_set_DFTs[0].shape
average_spectra = np.zeros((num_digits, N), dtype=np.complex_)

for i in range(num_digits):
    average_spectra[i, :] = np.mean(training_set_DFTs[i], axis=0) 

num_recs, N = test_set.shape
predicted_labels = np.zeros(num_recs)

DFTs_aux = np.zeros((num_recs, N), dtype=np.complex_)
DFTs_c_aux = np.zeros((num_recs, N), dtype=np.complex_)

for i in range(num_recs):
    rec_i = test_set[i, :]
    energy_rec_i = np.linalg.norm(rec_i)
    rec_i /= energy_rec_i
    DFT_rec_i = dft(rec_i, fs)
    [_, X, _, X_c] = DFT_rec_i.solve3()
    DFTs_aux[i, :] = X 
    DFTs_c_aux[i, :] = X_c

    inner_prods = np.zeros(num_digits) 
    for j in range(num_digits):
        inner_prods[j] = np.inner(np.abs(X), np.abs(average_spectra[j, :]))
    predicted_labels[i] = np.argmax(inner_prods) + 1

print("Average spectrum comparison predicted labels: \n")
print_matrix(predicted_labels[:, None], nr_decimals=0)

Average spectrum comparison predicted labels: 

     1    1    2    2    1    1    2    2    1    1


Omg it works. I said the numbers in this exact order, and I really didn't think it would catch all of them, but it really got them with 100% classification accuracy. If I had done 100 samples, perhaps it would have gotten a few wrong, but I gaming mic was worth the investment.

In [13]:
#Problem 3
def print_matrix(A, nr_decimals = 2):
    
    nr_digits = np.maximum(np.floor(np.log10(np.amax(np.abs(A)))),0) + 1
    nr_digits = nr_digits + nr_decimals + 3
    nr_digits = "{0:1.0f}".format(nr_digits)
    number_format = "{0: " + nr_digits + "." + str(nr_decimals) + "f}"
    n = len(A)
    m = len(A[0])

    for l in range(m):
        value = " "

        for k in range(n):
            
            value = value + " " + number_format.format(A[k,l])

        print( value )

fs = 8000        
T = 1
training_set_DFTs = np.load("spoken_digits_DFTs.npy")
test_set = np.load("test_set.npy")
num_digits = len(training_set_DFTs)

num_recs, N = test_set.shape
predicted_labels = np.zeros(num_recs)

DFTs_aux = np.zeros((num_recs, N), dtype=np.complex_)
DFTs_c_aux = np.zeros((num_recs, N), dtype=np.complex_)

training_set_size, _ = training_set_DFTs[0].shape

for i in range(num_recs):
    rec_i = test_set[i, :]
    energy_rec_i = np.linalg.norm(rec_i)
    rec_i /= energy_rec_i
    DFT_rec_i = dft(rec_i, fs)
    [_, X, _, X_c] = DFT_rec_i.solve3()
    DFTs_aux[i, :] = X 
    DFTs_c_aux[i, :] = X_c

    inner_prods = np.zeros((num_digits, training_set_size))
    for j in range(num_digits):
        for k in range(training_set_size):
            sample_dft = (training_set_DFTs[j])[k, :]
            inner_prods[j, k] = np.inner(np.abs(X), np.abs(sample_dft))
    max_position = np.unravel_index(np.argmax(inner_prods), inner_prods.shape)  
    predicted_labels[i] = max_position[0] + 1

print("Nearest neighbor comparison predicted labels: \n")
print_matrix(predicted_labels[:, None], nr_decimals=0)

Nearest neighbor comparison predicted labels: 

     1    1    2    2    1    1    2    2    1    1
