In [1]:
import numpy as np
# read data saved from mfcc
with open('guitar.npy', 'rb') as f:
    guitar_samples = np.load(f)
print(guitar_samples.shape) # ensure the data doesn't change

(38439, 13)


In [2]:
# reload the train data
with open('cello.npy', 'rb') as f:
    cello_samples = np.load(f)
with open('violin.npy', 'rb') as f:
    violin_samples = np.load(f)
with open('flute.npy', 'rb') as f:
    flute_samples = np.load(f)
with open('saxophone.npy', 'rb') as f:
    saxophone_samples = np.load(f)
with open('clarinet.npy', 'rb') as f:
    clarinet_samples = np.load(f)
with open('trumpet.npy', 'rb') as f:
    trumpet_samples = np.load(f)

In [3]:
# import packages for GMM
import os
from scipy.io.wavfile import read
from sklearn.mixture import GaussianMixture
import python_speech_features as mfcc
from sklearn import preprocessing

In [5]:
# generate models
from datetime import datetime 
start = datetime.now()
guitar_gmm = GaussianMixture(n_components=7, max_iter=200, covariance_type='diag', n_init=3)
guitar_gmm.fit(guitar_samples)

cello_gmm = GaussianMixture(n_components=7, max_iter=200, covariance_type='diag', n_init=3)
cello_gmm.fit(cello_samples)

violin_gmm = GaussianMixture(n_components=7, max_iter=200, covariance_type='diag', n_init=3)
violin_gmm.fit(violin_samples)

flute_gmm = GaussianMixture(n_components=7, max_iter=200, covariance_type='diag', n_init=3)
flute_gmm.fit(flute_samples)

clarinet_gmm = GaussianMixture(n_components=7, max_iter=200, covariance_type='diag', n_init=3)
clarinet_gmm.fit(clarinet_samples)

saxophone_gmm = GaussianMixture(n_components=7, max_iter=200, covariance_type='diag', n_init=3)
saxophone_gmm.fit(saxophone_samples)

trumpet_gmm = GaussianMixture(n_components=7, max_iter=200, covariance_type='diag', n_init=3)
trumpet_gmm.fit(trumpet_samples)
duration = datetime.now() - start
print("Training completed in time: ", duration)

Training completed in time:  0:00:33.428034


In [6]:
models = [guitar_gmm, cello_gmm, violin_gmm, flute_gmm, clarinet_gmm, saxophone_gmm, trumpet_gmm]
classes = ['guitar', 'cello', 'violin', 'flute', 'clarinet', 'saxophone', 'trumpet']

In [9]:
def get_MFCC(sr, audio):
    features = mfcc.mfcc(audio, sr, 0.025, 0.01, 13, appendEnergy=False)
    features = preprocessing.scale(features)
    return features

In [10]:
# accuracy of guitar_gmm
source = "/Users/Kaede/Desktop/final_project/wav_files/guitar/"
files = [os.path.join(source, f) for f in os.listdir(source) if f.endswith('.wav')]
count = 0.
for f in files[74:]:
    sr, audio = read(f)
    features = get_MFCC(sr, audio)
    scores = None
    log_likelihood = np.zeros(len(models))
    for i in range(len(models)):
        gmm = models[i]  # checking with each model one by one
        scores = np.array(gmm.score(features))
        log_likelihood[i] = scores.sum()
    winner = np.argmax(log_likelihood)
    if classes[winner] == 'guitar':
        count += 1
print("guitar accuracy is ", count / len(files[74:]))

guitar accuracy is  0.96875


In [11]:
# accuracy of cello_gmm
source = "/Users/Kaede/Desktop/final_project/wav_files/cello/"
files = [os.path.join(source, f) for f in os.listdir(source) if f.endswith('.wav')]
count = 0.
for f in files[622:]:
    sr, audio = read(f)
    features = get_MFCC(sr, audio)
    scores = None
    log_likelihood = np.zeros(len(models))
    for i in range(len(models)):
        gmm = models[i]  # checking with each model one by one
        scores = np.array(gmm.score(features))
        log_likelihood[i] = scores.sum()
    winner = np.argmax(log_likelihood)
    if classes[winner] == 'cello':
        count += 1
print("cello accuracy is ", count / len(files[622:]))

cello accuracy is  0.6217228464419475


In [12]:
# accuracy of violin_gmm
source = "/Users/Kaede/Desktop/final_project/wav_files/violin/"
files = [os.path.join(source, f) for f in os.listdir(source) if f.endswith('.wav')]
count = 0.
for f in files[1051:]:
    sr, audio = read(f)
    features = get_MFCC(sr, audio)
    scores = None
    log_likelihood = np.zeros(len(models))
    for i in range(len(models)):
        gmm = models[i]  # checking with each model one by one
        scores = np.array(gmm.score(features))
        log_likelihood[i] = scores.sum()
    winner = np.argmax(log_likelihood)
    if classes[winner] == 'violin':
        count += 1
print("violin accuracy is ", count / len(files[1051:]))

violin accuracy is  0.6341463414634146


In [13]:
# accuracy of flute_gmm
source = "/Users/Kaede/Desktop/final_project/wav_files/flute/"
files = [os.path.join(source, f) for f in os.listdir(source) if f.endswith('.wav')]
count = 0.
for f in files[618:]:
    sr, audio = read(f)
    features = get_MFCC(sr, audio)
    scores = None
    log_likelihood = np.zeros(len(models))
    for i in range(len(models)):
        gmm = models[i]  # checking with each model one by one
        scores = np.array(gmm.score(features))
        log_likelihood[i] = scores.sum()
    winner = np.argmax(log_likelihood)
    if classes[winner] == 'flute':
        count += 1
print("flute accuracy is ", count / len(files[618:]))

flute accuracy is  0.7894736842105263


In [14]:
# accuracy of clarinet_gmm
source = "/Users/Kaede/Desktop/final_project/wav_files/clarinet/"
files = [os.path.join(source, f) for f in os.listdir(source) if f.endswith('.wav')]
count = 0.
for f in files[592:]:
    sr, audio = read(f)
    features = get_MFCC(sr, audio)
    scores = None
    log_likelihood = np.zeros(len(models))
    for i in range(len(models)):
        gmm = models[i]  # checking with each model one by one
        scores = np.array(gmm.score(features))
        log_likelihood[i] = scores.sum()
    winner = np.argmax(log_likelihood)
    if classes[winner] == 'clarinet':
        count += 1
print("clarinet accuracy is ", count / len(files[592:]))

clarinet accuracy is  0.65748031496063


In [15]:
# accuracy of saxophone_gmm
source = "/Users/Kaede/Desktop/final_project/wav_files/saxophone/"
files = [os.path.join(source, f) for f in os.listdir(source) if f.endswith('.wav')]
count = 0.
cla_count = 0.
flu_count = 0.
tru_count = 0.
str_count = 0.
for f in files[512:]:
    sr, audio = read(f)
    features = get_MFCC(sr, audio)
    scores = None
    log_likelihood = np.zeros(len(models))
    for i in range(len(models)):
        gmm = models[i]  # checking with each model one by one
        scores = np.array(gmm.score(features))
        log_likelihood[i] = scores.sum()
    winner = np.argmax(log_likelihood)
    if classes[winner] == 'saxophone':
        count += 1
    if classes[winner] == 'clarinet':
        cla_count += 1
    if classes[winner] == 'flute':
        flu_count += 1
    if classes[winner] == 'trumpet':
        tru_count += 1
    if classes[winner] == 'cello' or classes[winner] == 'violin':
        str_count += 1
        
    
print("saxophone accuracy is ", count / len(files[512:]))
print(cla_count/len(files[512:]))
print(flu_count/len(files[512:]))
print(tru_count/len(files[512:]))
print(str_count/len(files[512:]))

saxophone accuracy is  0.36818181818181817
0.19545454545454546
0.1590909090909091
0.08181818181818182
0.19090909090909092


In [16]:
# accuracy of trumpet_gmm
source = "/Users/Kaede/Desktop/final_project/wav_files/trumpet/"
files = [os.path.join(source, f) for f in os.listdir(source) if f.endswith('.wav')]
count = 0.
for f in files[339:]:
    sr, audio = read(f)
    features = get_MFCC(sr, audio)
    scores = None
    log_likelihood = np.zeros(len(models))
    for i in range(len(models)):
        gmm = models[i]  # checking with each model one by one
        scores = np.array(gmm.score(features))
        log_likelihood[i] = scores.sum()
    winner = np.argmax(log_likelihood)
    if classes[winner] == 'trumpet':
        count += 1
print("trumpet accuracy is ", count / len(files[339:]))

trumpet accuracy is  0.5


In [17]:
(0.96875+0.6217228464419475+0.6341463414634146+0.7894736842105263+0.65748031496063+0.36818181818181817+0.5)/7

0.6485364293226196