# In this notebook, we'll show how to explore data in the TFRecord format on a smaller scale. To reach the scalability that TensorFlow/Google Cloud can do, please refer to the code in their github https://github.com/google/youtube-8m

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output
#print(check_output(["ls", "../input"]).decode("utf8"))

# Any results you write to the current directory are saved as output.

In [2]:
import tensorflow as tf
import numpy as np
from IPython.display import YouTubeVideo


video_lvl_record = "traina1.tfrecord"
#frame_lvl_record = "../input/frame_level/train-1.tfrecord"

In [3]:
vid_ids = []
labels = []
mean_rgb = []
mean_audio = []

for example in tf.python_io.tf_record_iterator(video_lvl_record):
    tf_example = tf.train.Example.FromString(example)

    vid_ids.append(tf_example.features.feature['video_id'].bytes_list.value[0].decode(encoding='UTF-8'))
    labels.append(tf_example.features.feature['labels'].int64_list.value)
    mean_rgb.append(tf_example.features.feature['mean_rgb'].float_list.value)
    mean_audio.append(tf_example.features.feature['mean_audio'].float_list.value)

In [4]:
print('Number of videos in this tfrecord: ',len(mean_rgb))
print('First video feature length',len(mean_rgb[0]))
print('First 20 features of the first youtube video (',vid_ids[0],')')
print(mean_rgb[0][:20])

Number of videos in this tfrecord:  1218
First video feature length 1024
First 20 features of the first youtube video ( a1lWP30U9EU )
[0.820950984954834, -0.08375737816095352, -0.3228481709957123, 0.5311046242713928, -0.37270453572273254, 0.3170918822288513, -0.5742282271385193, -0.6536586880683899, -0.3352373242378235, -0.8848564028739929, 0.12176278978586197, -0.7401829957962036, 0.4177038371562958, -0.36411207914352417, 0.700456440448761, 0.06471271067857742, 0.928856611251831, -0.2995685338973999, -0.08016052097082138, -0.2395210862159729]


In [9]:
mean_audio[7]

[0.9900146126747131, -0.8069257140159607, -0.5610304474830627, -0.5356910824775696, 0.3517039120197296, 0.8091915249824524, 0.07848692685365677, 1.1496354341506958, -0.31772080063819885, 0.07434988766908646, -0.348662406206131, 1.6425464153289795, -0.6701448559761047, -0.11466110497713089, -0.1431894451379776, -0.04665851965546608, -0.02192247100174427, -0.24342645704746246, -0.3828791677951813, -0.21386386454105377, -0.4283865988254547, -0.7872747778892517, 0.03677177429199219, 1.011734127998352, -0.31591084599494934, -0.065361388027668, -0.994816243648529, 0.02617061324417591, -0.181457057595253, -1.0243788957595825, -0.20326270163059235, -0.33444133400917053, 0.6745653748512268, 0.2861146032810211, 0.7539448142051697, -1.3617199659347534, -1.0062793493270874, 0.8640934824943542, -0.6804874539375305, -0.7529718279838562, -0.42407718300819397, -0.286089688539505, 0.6047528386116028, 0.3712686598300934, 0.21793963015079498, -0.1704249531030655, 0.6775819659233093, 0.028583886101841927,

In [8]:
def play_one_vid(record_name, video_index):
    return vid_ids[video_index]
    
# this worked on my local jupyter notebook, but doesn't show on kaggle kernels:
YouTubeVideo(play_one_vid(video_lvl_record, 7))

In [5]:
print('mean_audio has length of: ')
print([len(x) for x in mean_audio][:5])
print('mean_rgb has length of: ')
print([len(x) for x in mean_rgb][:5])

mean_audio has length of: 
[128, 128, 128, 128, 128]
mean_rgb has length of: 
[1024, 1024, 1024, 1024, 1024]


In [None]:
# now, let's read the frame-level data
# due to execution time, we're only going to read the first video

feat_rgb = []
feat_audio = []

for example in tf.python_io.tf_record_iterator(frame_lvl_record):        
    tf_seq_example = tf.train.SequenceExample.FromString(example)
    n_frames = len(tf_seq_example.feature_lists.feature_list['audio'].feature)
    sess = tf.InteractiveSession()
    rgb_frame = []
    audio_frame = []
    # iterate through frames
    for i in range(n_frames):
        rgb_frame.append(tf.cast(tf.decode_raw(
                tf_seq_example.feature_lists.feature_list['rgb'].feature[i].bytes_list.value[0],tf.uint8)
                       ,tf.float32).eval())
        audio_frame.append(tf.cast(tf.decode_raw(
                tf_seq_example.feature_lists.feature_list['audio'].feature[i].bytes_list.value[0],tf.uint8)
                       ,tf.float32).eval())
        
        
    sess.close()
    feat_rgb.append(rgb_frame)
    feat_audio.append(audio_frame)
    break

In [None]:
print('The first video has %d frames' %len(feat_rgb[0]))

In [None]:
from matplotlib import pyplot as plt
%matplotlib inline
from sklearn.manifold import TSNE
import numpy as np

# now let's explore a little on the labels
Find the most commonly appeared label in this record:

In [None]:
import networkx as nx
from itertools import combinations

G=nx.Graph()

G.clear()
for list_of_nodes in labels:
    filtered_nodes = set(list_of_nodes).intersection(set(top_n_labels))  
    for node1,node2 in list(combinations(filtered_nodes,2)): 
        node1_name = label_mapping[node1]
        node2_name = label_mapping[node2]
        G.add_node(node1_name)
        G.add_node(node2_name)
        G.add_edge(node1_name, node2_name)

nx.draw_networkx(G)

In [None]:
colors = plt.cm.rainbow(np.linspace(0, 1, n))
mean_rgb_top_n = []
labels_for_tsne = []
# filtering mean_rgb so it only contains top n labels
for idx, list_of_nodes in enumerate(labels):
    for node in list_of_nodes:
        if node in top_n_labels:
            mean_rgb_top_n.append(mean_rgb[idx])
            labels_for_tsne.append(node)


X_embedded = TSNE(n_components=2, random_state=0).fit_transform(mean_rgb_top_n) 


fig = plt.figure()
ax = fig.add_subplot(111)

handles = []
for indx, color in enumerate(colors):
    this_label = top_n_labels[indx]
    X_embedded_filtered = X_embedded[np.array([x==this_label for x in labels_for_tsne])]
    handles.append(ax.scatter(X_embedded_filtered[:, 0], X_embedded_filtered[:, 1], c=color, marker="o",edgecolor='none'))

ax.legend(handles, top_n_labels)

plt.show()

In [15]:
a = np.zeros([30,10, 3])
a[0][0] = [1, 2, 3]

In [2]:
from tensorflow.contrib.slim.nets import vgg

In [4]:
import tensorflow.contrib.slim as slim
import tensorflow as tf
import numpy as  np

In [12]:
image = np.zeros([30, 300, 1024])
# image = np.zeros([30, 224, 224])
image = tf.convert_to_tensor(image, dtype = tf.float32)
image = tf.expand_dims(image, 3)
# image = tf.reshape(image, [32, 32])
print(image)

Tensor("ExpandDims_2:0", shape=(30, 300, 1024, 1), dtype=float32)


In [6]:
vgg.vgg_16(image)

(<tf.Tensor 'vgg_16/fc8/squeezed:0' shape=(30, 1000) dtype=float32>,
 OrderedDict([('vgg_16/conv1/conv1_1',
               <tf.Tensor 'vgg_16/conv1/conv1_1/Relu:0' shape=(30, 224, 224, 64) dtype=float32>),
              ('vgg_16/conv1/conv1_2',
               <tf.Tensor 'vgg_16/conv1/conv1_2/Relu:0' shape=(30, 224, 224, 64) dtype=float32>),
              ('vgg_16/pool1',
               <tf.Tensor 'vgg_16/pool1/MaxPool:0' shape=(30, 112, 112, 64) dtype=float32>),
              ('vgg_16/conv2/conv2_1',
               <tf.Tensor 'vgg_16/conv2/conv2_1/Relu:0' shape=(30, 112, 112, 128) dtype=float32>),
              ('vgg_16/conv2/conv2_2',
               <tf.Tensor 'vgg_16/conv2/conv2_2/Relu:0' shape=(30, 112, 112, 128) dtype=float32>),
              ('vgg_16/pool2',
               <tf.Tensor 'vgg_16/pool2/MaxPool:0' shape=(30, 56, 56, 128) dtype=float32>),
              ('vgg_16/conv3/conv3_1',
               <tf.Tensor 'vgg_16/conv3/conv3_1/Relu:0' shape=(30, 56, 56, 256) dtype=float32>)

In [11]:
net = slim.conv2d(image, 64, [3, 3], stride=[1, 2], normalizer_fn=slim.batch_norm)
net = slim.conv2d(net, 64, [3, 3], stride=[1, 2], normalizer_fn=slim.batch_norm)
print(net)
net = slim.max_pool2d(net, [2, 2])
print(net)
net = slim.conv2d(net, 128, [3, 3], stride=[2, 2])
net = slim.conv2d(net, 128, [3, 3])
print(net)
net = slim.max_pool2d(net, [2, 2])
print(net)
net = slim.conv2d(net, 256, [3, 3], stride=[2, 2])
net = slim.conv2d(net, 256, [3, 3])
print(net)
net = slim.max_pool2d(net, [2, 2])
print(net)
net = slim.conv2d(net, 512, [3, 3], stride=[2, 2])
net = slim.conv2d(net, 512, [3, 3])
print(net)
net = slim.relu(net, 512)
print(net)
net = slim.max_pool2d(net, [2, 2])
print(net)
net = slim.conv2d(net, 512, [3, 3])
net = slim.conv2d(net, 512, [3, 3])
print(net)
net = slim.relu(net, 512)
print(net)
net = slim.max_pool2d(net, [2, 2])
print(net)
net = slim.fully_connected(net, 2048)
print(net)
net = tf.squeeze(net, [1, 2])
print(net)
# net = tf.reshape(net, [-1])
# print(net)
# net = slim.avg_pool2d(net, [2, 2])
# print(net)
# net = tf.reshape(net, [-1, 512])
# print(net)
# net = tf.reduce_sum(net, axis=[0]) / tf.convert_to_tensor(100, dtype = tf.float32)
# print(net.get_shape())

Tensor("Conv_31/Relu:0", shape=(30, 300, 256, 64), dtype=float32)
Tensor("MaxPool2D_15/MaxPool:0", shape=(30, 150, 128, 64), dtype=float32)
Tensor("Conv_33/Relu:0", shape=(30, 75, 64, 128), dtype=float32)
Tensor("MaxPool2D_16/MaxPool:0", shape=(30, 37, 32, 128), dtype=float32)
Tensor("Conv_35/Relu:0", shape=(30, 19, 16, 256), dtype=float32)
Tensor("MaxPool2D_17/MaxPool:0", shape=(30, 9, 8, 256), dtype=float32)
Tensor("Conv_37/Relu:0", shape=(30, 5, 4, 512), dtype=float32)
Tensor("fully_connected_9/Relu:0", shape=(30, 5, 4, 512), dtype=float32)
Tensor("MaxPool2D_18/MaxPool:0", shape=(30, 2, 2, 512), dtype=float32)
Tensor("Conv_39/Relu:0", shape=(30, 2, 2, 512), dtype=float32)
Tensor("fully_connected_10/Relu:0", shape=(30, 2, 2, 512), dtype=float32)
Tensor("MaxPool2D_19/MaxPool:0", shape=(30, 1, 1, 512), dtype=float32)
Tensor("fully_connected_11/Relu:0", shape=(30, 1, 1, 2048), dtype=float32)
Tensor("Squeeze_2:0", shape=(30, 2048), dtype=float32)


In [69]:
a = np.ndarray([2,512])
a = tf.convert_to_tensor(a, dtype = tf.float32)
b = []
b.append(net)
b = tf.convert_to_tensor(b, dtype = tf.float32)
b

<tf.Tensor 'packed_1:0' shape=(1, 512) dtype=float32>

In [79]:
net = slim.max_pool2d(net, [2, 2], scope='pool')

In [40]:
batch_size = 50
num_frames = [100 for x in range(batch_size)]
image = np.zeros([batch_size, 100, 1024])
model_input = tf.convert_to_tensor(image, dtype = tf.float32)
num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
feature_size = model_input.get_shape().as_list()[2]
print(feature_size)
denominators = tf.reshape(
    tf.tile(num_frames, [1, feature_size]), [-1, feature_size])
print("denominators: ", denominators)
avg_pooled = tf.reduce_sum(model_input, axis=[1]) / denominators
print(model_input)
print(tf.reduce_sum(model_input, axis=[1]))
print(avg_pooled)

1024
denominators:  Tensor("Reshape_9:0", shape=(50, 1024), dtype=float32)
Tensor("Const_14:0", shape=(50, 100, 1024), dtype=float32)
Tensor("Sum_12:0", shape=(50, 1024), dtype=float32)
Tensor("truediv_6:0", shape=(50, 1024), dtype=float32)


In [35]:
tf.convert_to_tensor(np.zeros([2]), dtype = tf.float32)

<tf.Tensor 'Const_12:0' shape=(2,) dtype=float32>

In [37]:
np.zeros([1])

array([ 0.])

In [42]:
tf.convert_to_tensor([1], dtype = tf.float32)

<tf.Tensor 'Const_16:0' shape=(1,) dtype=float32>

In [6]:
print(net)

Tensor("pool8/MaxPool:0", shape=(30, 1, 1, 1024), dtype=float32)


In [6]:
number_of_layers = 2
lstm_size = 1024

stacked_lstm = tf.contrib.rnn.MultiRNNCell(
        [
            tf.contrib.rnn.BasicLSTMCell(
                lstm_size, forget_bias=1.0)
            for _ in range(number_of_layers)
            ])
print(stacked_lstm)

<tensorflow.contrib.rnn.python.ops.core_rnn_cell_impl.MultiRNNCell object at 0x10e368ba8>


In [9]:
minput = np.zeros([128, 300, 1, 1, 1024])
minput = tf.convert_to_tensor(minput, dtype = tf.float32)
print(minput)
num_frames = [300 for x in range(128)]

Tensor("Const_2:0", shape=(128, 300, 1, 1, 1024), dtype=float32)


In [21]:
print(outputs)
print(state)
print('------------------')
print(state[-1].h)

Tensor("rnn/transpose:0", shape=(128, 300, 1024), dtype=float32)
(LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(?, 1024) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 1024) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 1024) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_5:0' shape=(?, 1024) dtype=float32>))
------------------
Tensor("rnn/while/Exit_5:0", shape=(?, 1024), dtype=float32)


In [27]:
state[1]

LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 1024) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_5:0' shape=(?, 1024) dtype=float32>)

In [28]:
image = np.zeros([128, 300, 1024])
image = tf.convert_to_tensor(image, dtype = tf.float32)
image = tf.reshape(image, [-1, 300, 32, 32])
image = tf.expand_dims(image, 4)
image = tf.unstack(image, 300, 1)
print(image)

[<tf.Tensor 'unstack_5:0' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:1' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:2' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:3' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:4' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:5' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:6' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:7' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:8' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:9' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:10' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:11' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:12' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:13' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:14' shape=(128, 32, 32, 1) dtype=float32>, <tf.Tensor 'unstack_5:15' shape=(1

In [29]:
net = slim.conv2d(image, 64, [3, 3, 3])
print(net)
net = slim.relu(net, 64)
print(net)
net = slim.max_pool2d(net, [2, 2])
print(net)

Tensor("Conv_44/Relu:0", shape=(300, 128, 32, 32, 64), dtype=float32)
Tensor("fully_connected_13/Relu:0", shape=(300, 128, 32, 32, 64), dtype=float32)


ValueError: The `pool_size` argument must be a tuple of 2 integers. Received: [2, 2, 2]