In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
from tensorflow import keras
import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split


from input_generator import load_dataset, load_dataset_with_lables
from data_parser import get_tags_and_labels, get_vocab, get_tags

In [None]:
# Get dataframe with tags and category_id columns
csvpath = 'data/CAvideos.cvs'
tab_dataframe = get_tags_and_labels(csvpath)

# Creating different coloumn based on different tags
new = tab_dataframe["tags"].str.split(".", expand = True)

max_of_tags = 15

for i in range(0, max_of_tags):
    name = "tags"+str(i)
    tab_dataframe[name] = new[i]

# Dropping old tags columns 
tab_dataframe.drop(columns =["tags"], inplace = True) 
tab_dataframe.fillna("notags", inplace = True) 


tab_dataframe.head()

In [None]:
# Getting leaning vocab
vocab = get_vocab(get_tags(csvpath))
voc_di = {i:vo for i,vo in enumerate(vocab)}
voc_di[0] = 'notags'
voc_di = {vo:i for i, vo in voc_di.items()}

In [None]:
# Mapping tags to indexes in vocab
for index in range(0, max_of_tags):
    tab_dataframe['tags{}'.format(index)] = tab_dataframe['tags{}'.format(index)].map(voc_di)
    
tab_dataframe.fillna(0.0, inplace = True) 
tab_dataframe.head()

In [None]:
from sklearn import preprocessing
df = tab_dataframe.copy()
x = df.values
standard_scaler = preprocessing.StandardScaler()
x_scaled = standard_scaler.fit_transform(x)
df = pd.DataFrame(x_scaled, columns=tab_dataframe.columns)


tab_dataframe.head()

In [None]:
# Create train,validation and test examples from dataframe
# train_ds, val_ds, test_ds = load_dataset(tab_dataframe)
(train_ds, train_lb), (val_ds, val_lb), (test_ds, test_lb) = load_dataset_with_lables(tab_dataframe, 'category_id')


In [None]:
# first 5 training examples
print(train_ds.values[5:10])
train_lb.values[0:5]

In [None]:
hidden = 32
model = tf.keras.Sequential([
    layers.Flatten(input_shape=(max_of_words,)),
    layers.Dense(hidden,activation='relu'),
    layers.Dropout(0.7),
    layers.Dense(2*hidden,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.7),
    layers.Dense(44, activation='softmax', kernel_regularizer=keras.regularizers.l2(0.01))])

model.compile(optimizer='nadam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy', 'sparse_categorical_crossentropy'],
              )

history = model.fit(train_ds.values, train_lb.values,
                    batch_size=64,
                    epochs=100,
                    validation_data=(val_ds, val_lb))
