In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd

import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split


from input_generator import load_dataset
from data_parser import get_tags_and_labels, get_vocab, get_tags

In [None]:
# Get dataframe with tags and category_id columns
csvpath = 'data/CAvideos.cvs'
tab_dataframe = get_tags_and_labels(csvpath)


# Creating different coloumn based on different tags
new = tab_dataframe["tags"].str.split(".", expand = True)

for i in range(0, 124):
    name = "tags"+str(i)
    tab_dataframe[name] = new[i]

# Dropping old tags columns 
tab_dataframe.drop(columns =["tags"], inplace = True) 
tab_dataframe.fillna("NA", inplace = True) 


tab_dataframe

In [None]:
# Getting leaning vocab
vocab = get_vocab(get_tags(csvpath))
voc_di = {vo:i for i,vo in enumerate(vocab)}
voc_di['NA'] = 0

In [None]:
# Mapping tags to indexes in vocab
for index in range(0, 124):
    tab_dataframe['tags{}'.format(index)] = tab_dataframe['tags{}'.format(index)].map(voc_di)
    
tab_dataframe

In [None]:
for column in tab_dataframe.columns:
    if column == 'category_id':
        pass
    else:
        if tab_dataframe[column].max() == 0:
            tab_dataframe[column] = tab_dataframe[column]

        else:
            tab_dataframe[column] = tab_dataframe[column] / tab_dataframe[column].max()

tab_dataframe

In [None]:
# Create train,validation and test examples from dataframe
train_ds, val_ds, test_ds = load_dataset(tab_dataframe)

In [None]:
# First training batch
for feature_batch, label_batch in train_ds.take(1):
    print('A batch of ages:', feature_batch['tags0'])
    print('A batch of targets:', label_batch )


In [None]:
example_batch = next(iter(train_ds))[0]

# A utility method to create a feature column
# and to transform a batch of data
def demo(feature_column):
    feature_layer = layers.DenseFeatures(feature_column)
    print(feature_layer(example_batch).numpy())


In [None]:
heads = ['tags{}'.format(i) for i in range(0, 124)]
feature_columns = []

# numeric cols
for header in heads:
    feature_columns.append(feature_column.numeric_column(header))


In [None]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

model = tf.keras.Sequential([
  feature_layer,
  layers.Dense(128, activation='relu'),
  layers.Dense(128, activation='relu'),
  layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'],
              run_eagerly=True)

model.fit(train_ds,
          validation_data=val_ds,
          epochs=5)
