In [None]:
import os, platform

distr = platform.dist()[0]

PATH = os.path.expanduser('~/datasets/letsdance') if distr == 'Ubuntu' else '/run/media/nast/DATA/letsdance'
TRAIN_PATH = "letsdance_split/train"
VALID_PATH = "letsdance_split/validation"
TEST_PATH = "letsdance_split/test"

print("dataset path:", PATH)

In [None]:
from scipy import misc
import matplotlib.pyplot as plt
%matplotlib inline

path = "letsdance_split/train/ballet/Et31LySAxf0_020_0266.jpg"
image = misc.imread(os.path.join(PATH, path))
plt.imshow(image)
plt.show()

In [None]:
import numpy as np
print(image.shape, np.amin(image), np.amax(image))

In [None]:
from collections import Counter


# A vector of filenames.


def get_file_names_in_dataset(dataset_path):
    classes = os.listdir(os.path.join(PATH, dataset_path))
    file_names_in_dataset = {}
    for cl in classes:
        file_names_in_dataset[cl] = sorted(os.listdir(os.path.join(PATH, dataset_path, cl)))
    return file_names_in_dataset


def video_name_from_file_name(file_name):
    return '_'.join(file_name.split('_')[:-1])


def get_num_of_frames_in_videos(list_of_file_names):
    videos_names = map(lambda x: video_name_from_file_name(x), list_of_file_names)
    return Counter(videos_names)
 
    
def select_videos_with_N_frames(list_of_file_names, N):
    nfr = get_num_of_frames_in_videos(list_of_file_names)
    video_names, _ = zip(*filter(lambda x: x[1] == N, nfr.items()))
    return video_names


def select_video_names_for_dances(file_names_in_dataset, N):
    """Selects videos with N frames for each dance so all dances
    have equal number of videos. Number of videos for a dance is
    the smallest number of videos having N frames among all dances."""
    selected = {}
    for dance_name, list_of_file_names in file_names_in_dataset.items():
        videos_with_N_frames = select_videos_with_N_frames(list_of_file_names, N)
        selected[dance_name] = videos_with_N_frames
    min_num_of_videos_with_N_frames = min(map(len, selected.values()))
    for k, v in selected.items():
        selected[k] = sorted(v)[:min_num_of_videos_with_N_frames]
    return selected


def select_file_names_for_work(file_names_in_dataset, N):
    video_names = select_video_names_for_dances(file_names_in_dataset, N)
    selected_file_names = {}
    for dance, list_of_file_names in file_names_in_dataset.items():
        selected_file_names[dance] = [fn for fn in list_of_file_names
                                      if video_name_from_file_name(fn) in video_names[dance]]
    return selected_file_names


def prepend_path(file_names_in_dataset, path):
    for dance, loffn in file_names_in_dataset.items():
        file_names_in_dataset[dance] = list(map(lambda x: os.path.join(path, dance, x), sorted(loffn)))
    return file_names_in_dataset
        
    
file_names_in_dataset = get_file_names_in_dataset(TRAIN_PATH)

print("beforer filtering")
for dance, loffn in file_names_in_dataset.items():
    print(dance,
          'total number of frames: {}'.format(len(loffn)),
          'number of videos: {}'.format(len(get_num_of_frames_in_videos(loffn))),
          end='\n\n', sep='\n')
print('*********\n\nAfter filtering')
file_names_for_train = select_file_names_for_work(file_names_in_dataset, 300)
dance, loffn = list(file_names_for_train.items())[0]
print('total number of frames: {}'.format(len(loffn)),
      'number of videos: {}'.format(len(get_num_of_frames_in_videos(loffn))),
      end='\n\n', sep='\n')

file_names_for_train = prepend_path(file_names_for_train, os.path.join(PATH, TRAIN_PATH))

In [None]:
valid_file_names_in_dataset = get_file_names_in_dataset(VALID_PATH)
print("beforer filtering")
for dance, loffn in valid_file_names_in_dataset.items():
    print(dance,
          'total number of frames: {}'.format(len(loffn)),
          'number of videos: {}'.format(len(get_num_of_frames_in_videos(loffn))),
          end='\n\n', sep='\n')
print('*********\n\nAfter filtering')
valid_file_names_for_work = select_file_names_for_work(valid_file_names_in_dataset, 300)
dance, loffn = list(valid_file_names_for_work.items())[0]
print('total number of frames: {}'.format(len(loffn)),
      'number of videos: {}'.format(len(get_num_of_frames_in_videos(loffn))),
      end='\n\n', sep='\n')
valid_file_names_for_work = prepend_path(valid_file_names_for_work, os.path.join(PATH, VALID_PATH))

In [None]:
test_file_names_in_dataset = get_file_names_in_dataset(TEST_PATH)
print("beforer filtering")
for dance, loffn in test_file_names_in_dataset.items():
    print(dance,
          'total number of frames: {}'.format(len(loffn)),
          'number of videos: {}'.format(len(get_num_of_frames_in_videos(loffn))),
          end='\n\n', sep='\n')
print('*********\n\nAfter filtering')
test_file_names_for_work = select_file_names_for_work(test_file_names_in_dataset, 300)
dance, loffn = list(test_file_names_for_work.items())[0]
print('total number of frames: {}'.format(len(loffn)),
      'number of videos: {}'.format(len(get_num_of_frames_in_videos(loffn))),
      end='\n\n', sep='\n')
test_file_names_for_work = prepend_path(test_file_names_for_work, os.path.join(PATH, TEST_PATH))

In [None]:
import tensorflow as tf

BATCH_SIZE = 30
NUM_DANCES = len(file_names_for_train)
print(NUM_DANCES)
NUM_FRAMES_PER_DANCE_TRAIN = len(list(file_names_for_train.values())[0])

def _parse_function(filename, label):
    image_string = tf.read_file(filename)
    image_decoded = tf.image.decode_jpeg(image_string)
    # image_resized = tf.image.resize_images(image_decoded, [28, 28])
    return tf.image.resize_images(image_decoded, (224, 224)), label

def build_dataset(file_names_for_dataset):
    datasets_by_dance = {}

    for idx, (dance, loffn) in enumerate(sorted(file_names_for_dataset.items())):
        labels = tf.constant([idx] * len(loffn))
        filenames = tf.constant(loffn)
        datasets_by_dance[dance] = tf.data.Dataset.from_tensor_slices(
            (filenames, labels)
        ).shuffle(len(loffn)).map(_parse_function)
    # print()
    dance_zip = tf.data.Dataset.zip(tuple(datasets_by_dance.values()))
    # print(dance_zip)
    return dance_zip.batch(BATCH_SIZE // NUM_DANCES)

train_dataset = build_dataset(file_names_for_train)
valid_dataset = build_dataset(valid_file_names_for_work)
test_dataset = build_dataset(test_file_names_for_work)

In [None]:
REG_RATE = 5e-4
STDDEV = 0.005


def tf_accuracy(preds, labels):
    print(labels)
    return tf.reduce_sum(
        tf.to_float(
            tf.equal(
                tf.argmax(labels, axis=-1, output_type=tf.int32),
                labels
            )
        )
    ) / tf.to_float(tf.shape(labels)[0])


def tf_perplexity(preds):
    log_preds = tf.log(preds)
    inter = tf.exp(tf.reduce_sum((-preds * log_preds), axis=-1))
    return tf.reduce_mean(inter)


iterator = tf.data.Iterator.from_structure(train_dataset.output_types,
                                           train_dataset.output_shapes)

next_element = iterator.get_next()

inputs, labels = zip(*next_element)

inputs = tf.concat(inputs, 0)
inputs = tf.to_float(tf.reshape(inputs, tf.concat([tf.shape(inputs)[:-1], [3]], 0)))
labels = tf.concat(labels, 0)
print(labels)
labels_oh = tf.one_hot(labels, NUM_DANCES, dtype=tf.float32)

conv1 = tf.layers.Conv2D(
    96,
    11,
    (4, 4),
    activation=tf.nn.relu,
    name='conv1',
    kernel_initializer=tf.truncated_normal_initializer(stddev=STDDEV),
)

conv2 = tf.layers.Conv2D(
    256,
    5,
    (1, 1),
    activation=tf.nn.relu,
    name='conv2',
    padding='same',
    kernel_initializer=tf.truncated_normal_initializer(stddev=STDDEV),
)

conv3 = tf.layers.Conv2D(
    384,
    3,
    (1, 1),
    activation=tf.nn.relu,
    name='conv3', 
    padding='same',
    kernel_initializer=tf.truncated_normal_initializer(stddev=STDDEV),
)

conv4 = tf.layers.Conv2D(
    384,
    3,
    (1, 1),
    activation=tf.nn.relu,
    name='conv4',  
    padding='same',
    kernel_initializer=tf.truncated_normal_initializer(stddev=STDDEV),
)

conv5 = tf.layers.Conv2D(
    256,
    3,
    (1, 1),
    activation=tf.nn.relu,
    name='conv5',    
    padding='same',
    kernel_initializer=tf.truncated_normal_initializer(stddev=STDDEV),
)

dropout_rate = tf.placeholder(tf.float32)
learning_rate = tf.placeholder(tf.float32)

h = tf.reshape(inputs, [-1, 150528])
logits = tf.contrib.layers.fully_connected(
    h, 10, activation_fn=tf.nn.softmax, weights_initializer=tf.truncated_normal_initializer(stddev=STDDEV)
)

# h = conv1(inputs)
# h = tf.layers.max_pooling2d(h, 3, 2)
# h = tf.nn.local_response_normalization(h)

# h = conv2(h)
# h = tf.layers.max_pooling2d(h, 3, 2)
# h = tf.nn.local_response_normalization(h)

# h = conv3(h)

# h = conv4(h)

# h = conv5(h)
# h = tf.layers.max_pooling2d(h, 3, 2)

# h = tf.nn.dropout(h, 1. - dropout_rate)

# h = tf.reshape(h, [-1, 9216])

# h = tf.contrib.layers.fully_connected(
#     h, 4096, weights_initializer=tf.truncated_normal_initializer(stddev=STDDEV)
# )

# h = tf.nn.dropout(h, 1. - dropout_rate)

# h = tf.contrib.layers.fully_connected(
#     h, 4096, weights_initializer=tf.truncated_normal_initializer(stddev=STDDEV)
# )

# logits = tf.contrib.layers.fully_connected(
#     h, 10, activation_fn=tf.nn.softmax, weights_initializer=tf.truncated_normal_initializer(stddev=STDDEV)
# )

loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels_oh)

preds = tf.nn.softmax(logits)

accuracy = tf_accuracy(logits, labels)

perplexity = tf_perplexity(preds)

l2_loss = sum(map(tf.nn.l2_loss, tf.get_collection(tf.GraphKeys.WEIGHTS)))

opt = tf.train.MomentumOptimizer(learning_rate, 0.9)
train_op = opt.minimize(loss + REG_RATE * l2_loss)

saver = tf.train.Saver(max_to_keep=None)

training_init_op = iterator.make_initializer(train_dataset)
validation_init_op = iterator.make_initializer(valid_dataset)
test_init_op = iterator.make_initializer(test_dataset)

In [None]:
LEARNING_RATE_PATIENCE = 10
STOP_PATIENCE = 20
STEP_PERIOD = 100
INIT_LEARNING_RATE = 0.01
DECAY = 0.1

train_results_path = 'results/train'
valid_results_path = 'results/valid'
test_results_path = 'results/test'
checkpoint_path = 'checkpoints'

for p in [train_results_path, valid_results_path, test_results_path, checkpoint_path]:
    if not os.path.isdir(p):
        if os.path.isfile(p):
            os.remove(p)
        os.makedirs(p)
        
def log(dataset='train', step=None, epoch=None, **kwargs):
    appendix = '.txt' if step is None else '_step.txt'
    first_value = epoch if step is None else step 
    for k, v in kwargs.items():
        with open(os.path.join('results', dataset, k + appendix), 'w') as f:
            if dataset == 'test':
                f.write('{}\n'.format(v))
            else:
                f.write('{} {}\n'.format(first_value, v))
            
            
def test(dataset):
    init_op = validation_init_op if dataset == 'valid' else test_init_op
    sess.run(init_op)
    count, accumulated_loss, accumulated_acc, accumulated_perpl = 0, 0, 0, 0
    while True:
        try:
            l, acc, perpl = sess.run([loss, accuracy, perplexity], feed_dict={dropout_rate: 0.})
            accumulated_loss += l
            accumulated_acc += acc
            accumulated_perpl += perpl
            count += 1
        except tf.errors.OutOfRangeError:
            break
    accumulated_loss /= count
    accumulated_acc /= count
    accumulated_perpl /= count
    return accumulated_loss, accumulated_acc, accumulated_perpl

step = 0
epoch = 0
lr_impatience = 0
stop_impatience = 0
lr = INIT_LEARNING_RATE

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    l, acc, perpl = test('valid')
    print('EPOCH {} | step {} | loss {:.4} | accuracy {:.4} | perplexity {:.4}'.format(epoch, step, l, acc, perpl))
    log(epoch=epoch, loss=l, accuracy=acc, perplexity=perpl, dataset='valid')
    best_loss = l
    saver.save(os.path.join(checkpoint_path, 'best'))
    while stop_impatience < STOP_PATIENCE:
        sess.run(training_init_op)
        while True:
            try:
                _, l, acc, perpl = sess.run(
                    [train_op, loss, accuracy, perplexity],
                    feed_dict={learning_rate: lr, dropout_rate: 0.5}
                )
                step += 1
                if STEP_PERIOD is not None:
                    if step % STEP_PERIOD == 0:
                        log(step=step, loss=l, accuracy=acc, perplexity=perpl)
                        print('step {} | loss {:.4} | accuracy {:.4}'.format(step, l, acc))
            except tf.errors.OutOfRangeError:
                break
        epoch += 1
        l, acc, perpl = test('valid')
        print('EPOCH {} | step {} | loss {:.4} | accuracy {:.4} | perplexity {:.4}'.format(epoch, step, l, acc, perpl))
        log(epoch=epoch, loss=l, accuracy=acc, perplexity=perpl, dataset='valid')
        if l < best_loss:
            lr_impatience, stop_impatience = 0, 0
            saver.save(os.path.join(checkpoint_path, 'best'))
        else:
            lr_impatience += 1
            stop_impatience += 1
        if lr_impatience >= LEARNING_RATE_PATIENCE:
            lr *= DECAY
    l, acc, perpl = test('test')
    log(loss=l, accuracy=acc, perplexity=perpl, dataset='test')
    print('Testing! EPOCH {} | step {} | loss {:.4} | accuracy {:.4} | perplexity {:.4}'.format(epoch, step, l, acc, perpl))

In [None]:
iterator = dance_zip.make_initializable_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
    for _ in range(10):
        sess.run(iterator.initializer)
        i = 0
        while True:
            try:
                res = sess.run(next_element)
                if i < 5:
                    print(i)
                    array = res[0][0]
                    plt.imshow(array)
                    plt.show()
                i += 1
            except tf.errors.OutOfRangeError:
                break
        print('*' * 10)
