## Traversing a directory for data

In [1]:
import tensorflow as tf

filenames = tf.train.match_filenames_once('/usr/local/lib/python2.7/dist-packages/bregman/audio/*.wav')
count_num_files = tf.size(filenames)
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.WholeFileReader()
filename, file_contents = reader.read(filename_queue)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    num_files = sess.run(count_num_files)
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    
    for i in range(num_files):
        audio_file = sess.run(filename)
        print(audio_file)
    coord.request_stop()
    coord.join(threads)

/usr/local/lib/python2.7/dist-packages/bregman/audio/bigben.wav
/usr/local/lib/python2.7/dist-packages/bregman/audio/ladokhope.wav
/usr/local/lib/python2.7/dist-packages/bregman/audio/amen.wav
/usr/local/lib/python2.7/dist-packages/bregman/audio/gmin.wav
/usr/local/lib/python2.7/dist-packages/bregman/audio/fastcar.wav


## Representing audio in Python

In [2]:
from bregman.suite import *

def get_chromogram(audio_file):
    F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
    return F.X

get_chromogram('/usr/local/lib/python2.7/dist-packages/bregman/audio/amen.wav')

array([[ 0.00481223,  0.00400106,  0.00375707, ...,  0.00066583,
         0.        ,  0.        ],
       [ 0.00412802,  0.00448563,  0.00632198, ...,  0.00069418,
         0.        ,  0.        ],
       [ 0.00361466,  0.00637525,  0.00960479, ...,  0.00075464,
         0.        ,  0.        ],
       ..., 
       [ 0.0040263 ,  0.00391508,  0.00314862, ...,  0.0008951 ,
         0.        ,  0.        ],
       [ 0.00414494,  0.00316777,  0.00275692, ...,  0.00081087,
         0.        ,  0.        ],
       [ 0.00237285,  0.00151895,  0.00155102, ...,  0.00052872,
         0.        ,  0.        ]])

## Obtaining a dataset for k-means

In [23]:
import tensorflow as tf
from bregman.suite import *

filenames = tf.train.match_filenames_once('/usr/local/lib/python2.7/dist-packages/bregman/audio/*.wav')
count_num_files = tf.size(filenames)
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.WholeFileReader()
filename, file_contents = reader.read(filename_queue)

chromo = tf.placeholder(tf.float32)
max_freqs = tf.argmax(chromo, 0)

def get_next_chromogram(sess):
    audio_file = sess.run(filename)
    F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
    return F.X

def extract_feature_vector(sess, chromo_data):
    num_features, num_samples = np.shape(chromo_data)
    freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
    hist, bins = np.histogram(freq_vals, bins=range(num_features + 1))
    return hist.astype(float) /num_samples

def get_dataset(sess):
    num_files = sess.run(count_num_files)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    xs = []
    for i in range(num_files):
        chromo_data = get_next_chromogram(sess)
        x = [extract_feature_vector(sess, chromo_data)]
        x = np.matrix(x)
        if len(xs) == 0:
            xs = x
        else:
            xs = np.vstack((xs, x))
    coord.request_stop()
    coord.join(threads)
    return xs

## Implementing k-means

In [24]:
k = 2
max_iterations = 100

def initial_cluster_centroids(x, k): 
    return x[0:k, :] # x의 앞의 벡터 k개만 가져와서 임의의 centroid로 잡음.

def assign_cluster(X, centroids):
    expanded_vectors = tf.expand_dims(X, 0)
    expanded_centroids = tf.expand_dims(centroids, 1)
    distances = tf.reduce_sum(tf.square(tf.sub(expanded_vectors, expanded_centroids)), 2)
    mins = tf.argmin(distances, 0)
    return mins

def recompute_centroids(X, Y):
    sums = tf.unsorted_segment_sum(X, Y, k)
    counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k)
    return sums / counts

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    X = get_dataset(sess)
    centroids = initial_cluster_centroids(X, k)
    i, converged = 0, False
    while not converged and i < max_iterations:
        i += 1
        Y = assign_cluster(X, centroids)
        centroids = sess.run(recompute_centroids(X,Y))
    print(centroids)

[[ 0.08977582  0.02911966  0.03448276  0.1239824   0.04211028  0.0651085
   0.02921997  0.25663169  0.16929682  0.01060826  0.09619049  0.05347335]
 [ 0.16007127  0.04976793  0.07495436  0.09422362  0.04749126  0.03667979
   0.0556081   0.06573767  0.05154968  0.16371175  0.15931238  0.04089219]]


## Organizing data for segmentation

In [26]:
import tensorflow as tf
import numpy as np
from bregman.suite import *

k = 4
segment_size = 50
max_iterations = 100

chromo = tf.placeholder(tf.float32)
max_freqs = tf.argmax(chromo, 0)

def get_chromogram(audio_file):
    F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
    return F.X

def get_dataset(sess, audio_file):
    chromo_data = get_chromogram(audio_file)
    print('chromo_data', np.shape(chromo_data))
    chromo_length = np.shape(chromo_data)[1]
    xs = []
    for i in range(chromo_length / segment_size):
        chromo_segment = chromo_data[:, i*segment_size:(i+1)*segment_size]
        x = extract_feature_vector(sess, chromo_segment)
        if len(xs) == 0:
            xs = x
        else:
            xs = np.vstack((xs, x))
    return xs

## Segmenting an audio clip

In [27]:
with tf.Session() as sess:
    X = get_dataset(sess, '/usr/local/lib/python2.7/dist-packages/bregman/audio/bigben.wav')
    print(np.shape(X))
    centroids = initial_cluster_centroids(X, k)
    i, converged = 0, False
    while not converged and i < max_iterations:
        i += 1
        Y = assign_cluster(X, centroids)
        centroids = sess.run(recompute_centroids(X,Y))
        if i % 50 == 0:
            print('iteration', i)
    segments = sess.run(Y)
    print(segments)
    for i in range(len(segments)):
        seconds = (i * segment_size) / float(10)
        min, sec = divmod(seconds, 60)
        time_str = '{}m {}s'.format(min, sec)
        print(time_str, segments[i])

('chromo_data', (12, 278))
(5, 12)
('iteration', 50)
('iteration', 100)
[0 1 2 3 1]
('0.0m 0.0s', 0)
('0.0m 5.0s', 1)
('0.0m 10.0s', 2)
('0.0m 15.0s', 3)
('0.0m 20.0s', 1)


## Setting up the SOM algorithm

In [75]:
import tensorflow as tf
import numpy as np

class SOM:
    def __init__(self, width, height, dim):
        self.num_iters = 5
        self.width = width
        self.height = height
        self.dim = dim
        self.node_locs = self.get_locs()
        
        nodes = tf.Variable(tf.random_normal([width*height, dim]))
        self.nodes = nodes
        
        x = tf.placeholder(tf.float32, [dim])
        iter = tf.placeholder(tf.float32)
        
        self.x = x
        self.iter = iter
        
        self.bmu_loc = self.get_bmu_loc(x)
        
        self.propagate_nodes = self.get_propagation(x, iter)
    
    def get_propagation(self, x, iter):
        num_nodes = self.width * self.height
        rate = 1.0 - tf.div(iter, self.num_iters)
        alpha = rate * 0.5
        sigma = rate * tf.to_float(tf.maximum(self.width, self.height)) / 2.
        expanded_bmu_loc = tf.expand_dims(tf.to_float(self.bmu_loc), 0)
        sqr_dists_from_bmu = tf.reduce_sum(tf.square(tf.sub(expanded_bmu_loc, self.node_locs)), 1)
        neigh_factor = tf.exp(-tf.div(sqr_dists_from_bmu, 2 * tf.square(sigma)))
        rate = tf.mul(alpha, neigh_factor)
        rate_factor = tf.pack([tf.tile(tf.slice(rate, [i], [1]), [self.dim]) for i in range(num_nodes)])
        nodes_diff = tf.mul(rate_factor, tf.sub(tf.pack([x for i in range(num_nodes)]), self.nodes))
        update_nodes = tf.add(self.nodes, nodes_diff)
        return tf.assign(self.nodes, update_nodes)
    
    def get_bmu_loc(self, x):
        expanded_x = tf.expand_dims(x, 0)
        sqr_diff = tf.square(tf.sub(expanded_x, self.nodes))
        dists = tf.reduce_sum(sqr_diff, 1)
        bmu_idx = tf.argmin(dists, 0)
        bmu_loc = tf.pack([tf.mod(bmu_idx, self.width), tf.div(bmu_idx, self.width)])
        return bmu_loc
    
    def get_locs(self):
        locs = [[x, y] for y in range(self.height)
            for x in range(self.width)]
        return tf.to_float(locs)
    
    def train(self, data):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(self.num_iters):
                for data_x in data:
                    print(data_x)
                    sess.run(self.bmu_loc, feed_dict={self.x: data_x})
                    print(sess.run(self.propagate_nodes, feed_dict={self.x: data_x, self.iter: i}))
     #       centroid_grid = [[] for i in range(self.width)]
     #       self.nodes_val = list(sess.run(self.nodes))
     #       self.locs_val = list(sess.run(self.node_locs))
     #       for i, l in enumerate(self.locs_val):
     #           centroid_grid[int(l[0])].append(self.nodes_val[i])
     #       self.centroid_grid = centroid_grid

## Test out and visualize results

In [76]:
from matplotlib import pyplot as plt
import numpy as np
%matplotlib inline 

colors = np.array([[0., 0., 1.],
                  [0., 0., 0.95],
                  [0., 0.05, 1.],
                  [0., 1., 0.],
                  [0., 0.95, 0.],
                  [0., 1., 0.05],
                  [1., 0., 0.],                
                  [1., 0.05, 0.],
                   [1., 0., 0.05],
                   [1., 1., 0.]])

som = SOM(4,4,3)
som.train(colors)

#plt.imshow(som.centroid_grid)
#plt.show()

[ 0.  0.  1.]
1.0
[ 0.    0.    0.95]
1.0
[ 0.    0.05  1.  ]
1.0
[ 0.  1.  0.]
1.0
[ 0.    0.95  0.  ]
1.0
[ 0.    1.    0.05]
1.0
[ 1.  0.  0.]
1.0
[ 1.    0.05  0.  ]
1.0
[ 1.    0.    0.05]
1.0
[ 1.  1.  0.]
1.0
[ 0.  0.  1.]
0.8
[ 0.    0.    0.95]
0.8
[ 0.    0.05  1.  ]
0.8
[ 0.  1.  0.]
0.8
[ 0.    0.95  0.  ]
0.8
[ 0.    1.    0.05]
0.8
[ 1.  0.  0.]
0.8
[ 1.    0.05  0.  ]
0.8
[ 1.    0.    0.05]
0.8
[ 1.  1.  0.]
0.8
[ 0.  0.  1.]
0.6
[ 0.    0.    0.95]
0.6
[ 0.    0.05  1.  ]
0.6
[ 0.  1.  0.]
0.6
[ 0.    0.95  0.  ]
0.6
[ 0.    1.    0.05]
0.6
[ 1.  0.  0.]
0.6
[ 1.    0.05  0.  ]
0.6
[ 1.    0.    0.05]
0.6
[ 1.  1.  0.]
0.6
[ 0.  0.  1.]
0.4
[ 0.    0.    0.95]
0.4
[ 0.    0.05  1.  ]
0.4
[ 0.  1.  0.]
0.4
[ 0.    0.95  0.  ]
0.4
[ 0.    1.    0.05]
0.4
[ 1.  0.  0.]
0.4
[ 1.    0.05  0.  ]
0.4
[ 1.    0.    0.05]
0.4
[ 1.  1.  0.]
0.4
[ 0.  0.  1.]
0.2
[ 0.    0.    0.95]
0.2
[ 0.    0.05  1.  ]
0.2
[ 0.  1.  0.]
0.2
[ 0.    0.95  0.  ]
0.2
[ 0.    1.    0.05]
0.2
[ 1.

In [39]:
x = [[[1, 2, 3], [4,5,6] , [7,8,9]], [[1,2,3], [4,5,6], [7,8,9]]]

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(tf.reduce_sum(x,2)))

[[ 6 15 24]
 [ 6 15 24]]
