In [1]:
import sys
sys.path.append('../../..')

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from sklearn import preprocessing

from utils.preprocessing import *
import core.config as conf

## Load data 

In [2]:
data_path = conf.raw_lzo_path + 'part-00000'
data = read_data(data_path)

In [3]:
engager_df = data[['engager_follower_count', 'engager_following_count', 'engager_is_verified', 'engager_account_creation']]

In [4]:
label = data['like_timestamp'].fillna(0).apply(lambda x: 1 if x > 0 else 0)

## Parameters

In [5]:
MODEL_NAME = 'autoencoder_test'
BATCH_SIZE = 2
MAX_SEQUENCE = 25
EPOCH = 30
UNITS = 1024
EMBEDDING_DIM = 256
VALIDATION_SPLIT = 0.1 

## Preprocessing

In [6]:
def scaling(df_x):
    standard_scaler = preprocessing.StandardScaler()
    standard_scaler.fit(df_x.values)
    ss = standard_scaler.transform(df_x.values)
    df_x = pd.DataFrame(ss, columns = df_x.columns)
    return df_x

In [7]:
engager_df = scaling(engager_df)

In [8]:
engager_df

Unnamed: 0,engager_follower_count,engager_following_count,engager_is_verified,engager_account_creation
0,-0.064095,-0.219199,-0.040756,0.367779
1,-0.041385,0.289623,-0.040756,-2.145428
2,-0.017458,0.458831,-0.040756,-1.918288
3,-0.068758,-0.302309,-0.040756,-1.873046
4,-0.067643,-0.195283,-0.040756,0.039344
...,...,...,...,...
3033897,-0.055477,-0.217405,-0.040756,0.228995
3033898,-0.057099,-0.116358,-0.040756,0.955743
3033899,0.001298,0.058231,-0.040756,-1.265901
3033900,-0.078491,-0.372264,-0.040756,0.543163


## Model

### Encoder Layer

In [50]:
class Encoder(tf.keras.models.Model):
    def __init__(self, encoder_dim):
        super(Encoder, self).__init__()
        self.encoder_dim = encoder_dim
        self.dense1 = tf.keras.layers.Dense(16, activation = 'relu', input_dim=encoder_dim)
        self.dense2 = tf.keras.layers.Dense(8)
        self.dense3 = tf.keras.layers.Dense(4)
        self.dense4 = tf.keras.layers.Dense(1)
    
    def call(self, x):
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        return x

    def summary(self):
        inputs = tf.keras.layers.Input((self.encoder_dim))
        tf.keras.models.Model(inputs, self.call(inputs)).summary()
    

In [51]:
model = Encoder(engager_df.shape[1])


In [52]:
model.summary()
# model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy', # softmax : sparse_categorical_crossentropy, sigmoid : binary_crossentropy
    metrics=['binary_crossentropy', 'accuracy'])

Model: "model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        [(None, 4)]               0         
_________________________________________________________________
dense_16 (Dense)             (None, 16)                80        
_________________________________________________________________
dense_17 (Dense)             (None, 8)                 136       
_________________________________________________________________
dense_18 (Dense)             (None, 4)                 36        
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 5         
Total params: 257
Trainable params: 257
Non-trainable params: 0
_________________________________________________________________


In [53]:
result = model.fit(
    x = engager_df[:2000000],
    y = label[:2000000],
    validation_data=(engager_df[2000000:], label[2000000:]),
    epochs=30,
    batch_size=32
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
