In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [47]:
import pathlib
import os

user_file = "\\Data\\Combined Data.csv"


path = pathlib.Path().resolve()
parent_path = path.parent.absolute()

user_dataframe = pd.read_csv(str(parent_path) + user_file)

unique_masalah_user = np.unique(' '.join(user_dataframe['Tipe_Masalah']).split(' '))
unique_masalah_caregiver = np.unique(' '.join(user_dataframe['Caregiver_Tipe_Masalah']).split(' '))

In [48]:
def convert_categorical_data(df, col='Tipe_Masalah'):
    ### Join every string in every row, split the result, pull out the unique values.
    genres = np.unique(' '.join(df[col]).split(' '))
    ### Drop 'NA'
    genres = np.delete(genres, np.where(genres == ''))
    if(col=='Tipe_Masalah'):
        for genre in genres:
            df[genre] = df[col].str.contains(genre).astype('int')
    else:
        for genre in genres:
            df['Caregiver-'+genre] = df[col].str.contains(genre).astype('int')
    df.drop(col, axis=1, inplace=True)

In [49]:
convert_categorical_data(user_dataframe)
convert_categorical_data(user_dataframe, col='Caregiver_Tipe_Masalah')

In [50]:
user_dataframe.head()

Unnamed: 0,USER_ID,Gender,Age,CAREGIVER_ID,Caregiver_gender,Caregiver_Age,ADHD-Hiperaktif-dan-kurang-fokus,Depresi,Gangguan-kecemasan,Gangguan-makan,Gangguan-stres-pascatrauma,Skizofrenia,Caregiver-ADHD-Hiperaktif-dan-kurang-fokus,Caregiver-Depresi,Caregiver-Gangguan-kecemasan,Caregiver-Gangguan-makan,Caregiver-Gangguan-stres-pascatrauma,Caregiver-Skizofrenia
0,100,Perempuan,34,8,Pria,28,0,0,0,1,0,1,0,0,0,1,1,1
1,99,Pria,21,27,Pria,31,0,1,0,1,0,0,1,1,0,1,0,0
2,98,Perempuan,31,27,Pria,31,0,1,0,1,0,0,1,1,0,1,0,0
3,97,Perempuan,21,26,Perempuan,24,0,1,0,0,0,1,0,1,0,0,1,1
4,96,Perempuan,36,17,Perempuan,20,1,0,1,0,0,0,1,0,0,0,0,0


In [6]:
caregiver_dataframe.head()

Unnamed: 0,CAREGIVEN_ID,Gender,Age,Unnamed: 4,ADHD-(Hiperaktif-dan-kurang-fokus),Depresi,Gangguan-kecemasan,Gangguan-makan,Gangguan-stres-pascatrauma,Skizofrenia
0,1,Perempuan,28,1,0,0,0,1,1,0
1,2,Pria,33,1,0,1,0,0,0,1
2,3,Pria,19,1,0,0,0,0,0,0
3,4,Perempuan,18,1,0,0,0,1,0,1
4,5,Perempuan,36,1,0,0,1,0,0,1


In [7]:
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  ds = tf.data.Dataset.from_tensor_slices(dict(dataframe))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [8]:
batch_size = 5# A small batch sized is used for demonstration purposes
caregiver_ds = df_to_dataset(caregiver_dataframe, batch_size=batch_size)
user_ds = df_to_dataset(user_dataframe, batch_size=batch_size)

In [9]:
for feature_batch in user_ds.take(1):
  print('Every feature:', list(feature_batch))
  print('A batch of ages:', feature_batch)

Every feature: ['USER_ID', 'Gender', 'Age', 'ADHD-Hiperaktif-dan-kurang-fokus', 'Depresi', 'Gangguan-kecemasan', 'Gangguan-maka', 'Gangguan-makan', 'Gangguan-stres-pascatrauma', 'Skizofrenia']
A batch of ages: {'USER_ID': <tf.Tensor: shape=(5,), dtype=int64, numpy=array([91, 40, 60, 51, 42], dtype=int64)>, 'Gender': <tf.Tensor: shape=(5,), dtype=string, numpy=array([b'Pria', b'Pria', b'Pria', b'Pria', b'Pria'], dtype=object)>, 'Age': <tf.Tensor: shape=(5,), dtype=int64, numpy=array([35, 44, 28, 19, 21], dtype=int64)>, 'ADHD-Hiperaktif-dan-kurang-fokus': <tf.Tensor: shape=(5,), dtype=int32, numpy=array([0, 0, 0, 0, 1])>, 'Depresi': <tf.Tensor: shape=(5,), dtype=int32, numpy=array([1, 0, 0, 1, 1])>, 'Gangguan-kecemasan': <tf.Tensor: shape=(5,), dtype=int32, numpy=array([0, 0, 1, 0, 0])>, 'Gangguan-maka': <tf.Tensor: shape=(5,), dtype=int32, numpy=array([0, 0, 0, 0, 0])>, 'Gangguan-makan': <tf.Tensor: shape=(5,), dtype=int32, numpy=array([0, 0, 0, 0, 0])>, 'Gangguan-stres-pascatrauma': <t

In [10]:
# We will use this batch to demonstrate several types of feature columns
example_batch = next(iter(user_ds))

In [11]:
# A utility method to create a feature column
# and to transform a batch of data
def demo(feature_column):
  feature_layer = layers.DenseFeatures(feature_column)
  print(feature_layer(example_batch).numpy())

In [12]:
age = feature_column.numeric_column('Age')
age_buckets = feature_column.bucketized_column(age, boundaries=[17, 24, 30, 44])
demo(age_buckets)

[[0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]]


KeyError: 'ADHD-(Hiperaktif-dan-kurang-fokus)'

In [14]:
 user_dataframe['ADHD-Hiperaktif-dan-kurang-fokus'].unique()

array([0, 1])

In [58]:
feature_columns = []

number_feature = ["Age"]
number_feature += unique_masalah_user.tolist()
number_feature += unique_masalah_caregiver.tolist()
print(number_feature)
# numeric cols
for header in number_feature:
    if(header == ""):
        pass
    feature_columns.append(feature_column.numeric_column(header))

age = feature_column.numeric_column('Age')
age_buckets = feature_column.bucketized_column(age, boundaries=[17, 21, 25, 29, 33, 37, 41, 46])
feature_columns.append(age_buckets)

['Age', 'ADHD-Hiperaktif-dan-kurang-fokus', 'Depresi', 'Gangguan-kecemasan', 'Gangguan-makan', 'Gangguan-stres-pascatrauma', 'Skizofrenia', '', 'ADHD-Hiperaktif-dan-kurang-fokus', 'Depresi', 'Gangguan-kecemasan', 'Gangguan-makan', 'Gangguan-stres-pascatrauma', 'Skizofrenia']


In [16]:
# indicator_columns
indicator_column_names = ['Gender','Caregiver_Gender']
for col_name in indicator_column_names:
  categorical_column = feature_column.categorical_column_with_vocabulary_list(
      col_name, user_dataframe[col_name].unique())
  indicator_column = feature_column.indicator_column(categorical_column)
  feature_columns.append(indicator_column)

In [17]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [18]:
def create_feature_layer(df):
    feature_columns = []

    for header in ['ADHD-(Hiperaktif-dan-kurang-fokus)','Depresi','Gangguan-kecemasan','Gangguan-makan','Gangguan-stres-pascatrauma', 'Skizofrenia','Age']:
        feature_columns.append(feature_column.numeric_column(header))
    
    # numeric cols

    age = feature_column.numeric_column('Age')
    age_buckets = feature_column.bucketized_column(age, boundaries=[17, 21, 25, 29, 33, 37, 41, 46])
    feature_columns.append(age_buckets)

    # indicator_columns
    indicator_column_names = ['Gender']
    for col_name in indicator_column_names:
      categorical_column = feature_column.categorical_column_with_vocabulary_list(
          col_name, user_dataframe[col_name].unique())
      indicator_column = feature_column.indicator_column(categorical_column)
      feature_columns.append(indicator_column)

In [19]:
model = tf.keras.Sequential([
    feature_layer,
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(user_ds,
          epochs=10)

Epoch 1/10


ValueError: in user code:

    File "C:\Users\ASUS\anaconda3\envs\notebook\lib\site-packages\keras\engine\training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\ASUS\anaconda3\envs\notebook\lib\site-packages\keras\engine\training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\ASUS\anaconda3\envs\notebook\lib\site-packages\keras\engine\training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\ASUS\anaconda3\envs\notebook\lib\site-packages\keras\engine\training.py", line 861, in train_step
        self._validate_target_and_loss(y, loss)
    File "C:\Users\ASUS\anaconda3\envs\notebook\lib\site-packages\keras\engine\training.py", line 818, in _validate_target_and_loss
        raise ValueError(

    ValueError: Target data is missing. Your model was compiled with loss=<keras.losses.BinaryCrossentropy object at 0x000001E9FBC049A0>, and therefore expects target data to be provided in `fit()`.
