## Traversing a directory for data

In [None]:
import tensorflow as tf

filenames = tf.train.match_filenames_once('/usr/local/lib/python2.7/dist-packages/bregman/audio/*.wav')
count_num_files = tf.size(filenames)
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.WholeFileReader()
filename, file_contents = reader.read(filename_queue)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    num_files = sess.run(count_num_files)
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    
    for i in range(num_files):
        audio_file = sess.run(filename)
        print(audio_file)
    coord.request_stop()
    coord.join(threads)

## Representing audio in Python

In [1]:
from bregman.suite import *

def get_chromogram(audio_file):
    F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
    return F.X

get_chromogram('/usr/local/lib/python2.7/dist-packages/bregman/audio/amen.wav')

array([[ 0.00481223,  0.00400106,  0.00375707, ...,  0.00066583,
         0.        ,  0.        ],
       [ 0.00412802,  0.00448563,  0.00632198, ...,  0.00069418,
         0.        ,  0.        ],
       [ 0.00361466,  0.00637525,  0.00960479, ...,  0.00075464,
         0.        ,  0.        ],
       ..., 
       [ 0.0040263 ,  0.00391508,  0.00314862, ...,  0.0008951 ,
         0.        ,  0.        ],
       [ 0.00414494,  0.00316777,  0.00275692, ...,  0.00081087,
         0.        ,  0.        ],
       [ 0.00237285,  0.00151895,  0.00155102, ...,  0.00052872,
         0.        ,  0.        ]])

## Obtaining a dataset for k-means

In [3]:
import tensorflow as tf
from bregman.suite import *

filenames = tf.train.match_filenames_once('/usr/local/lib/python2.7/dist-packages/bregman/audio/*.wav')
count_num_files = tf.size(filenames)
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.WholeFileReader()
filename, file_contents = reader.read(filename_queue)

chromo = tf.placeholder(tf.float32)
max_freqs = tf.argmax(chromo, 0)

def get_next_chromogram(sess):
    audio_file = sess.run(filename)
    F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
    return F.X

def extract_feature_vector(sess, chromo_data):
    num_features, num_samples = np.shape(chromo_data)
    freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
    hist, bins = np.histogram(freq_vals, bins=range(num_features + 1))
    return hist.astype(float) /num_samples

def get_dataset(sess):
    num_files = sess.run(count_num_files)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    xs = []
    for i in range(num_files):
        chromo_data = get_next_chromogram(sess)
        x = [extract_feature_vector(sess, chromo_data)]
        x = np.matrix(x)
        if len(xs) == 0:
            xs = x
        else:
            xs = np.vstack((xs, x))
    coord.request_stop()
    coord.join(threads)
    return xs

## Implementing k-means

In [17]:
k = 2
max_iterations = 100

def initial_cluster_centroids(x, k): 
    return x[0:k, :] # x의 앞의 벡터 k개만 가져와서 임의의 centroid로 잡음.

def assign_cluster(X, centroids):
    expanded_vectors = tf.expand_dims(X, 0)
    expanded_centroids = tf.expand_dims(centroids, 1)
    distances = tf.reduce_sum(tf.square(tf.sub(expanded_vectors, expanded_centroids)), 2)
    mins = tf.argmin(distances, 0)
    return mins

def recompute_centroids(X, Y):
    sums = tf.unsorted_segment_sum(X, Y, k)
    counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k)
    return sums / counts

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    X = get_dataset(sess)
    centroids = initial_cluster_centroids(X, k)
    i, converged = 0, False
    while not converged and i < max_iterations:
        i += 1
        Y = assign_cluster(X, centroids)
        print(sess.run(Y))
        centroids = sess.run(recompute_centroids(X,Y))
    print(centroids)

[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1 1 0 0]
[0 1