In [1]:
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras import metrics
import os


In [2]:
class DeepModel(tf.keras.models.Sequential):

    def __init__(self, model_file=None,
                 loss_func='mean_absolute_error',
                 n_input_layer=305,
                 number_of_hidden_layers=5,
                 neurons_hidden_layer=305,
                 neurons_output_layer=4,
                 dropout=True,
                 dropout_rate=0.2,
                 batch_norm=True,
                 metrics=[
                     metrics.RootMeanSquaredError(),
                     metrics.MeanAbsoluteError(),
                     metrics.MeanAbsolutePercentageError()],
                 * args, **kwargs):

        super(DeepModel, self).__init__(*args, **kwargs)

  

        for i in range(number_of_hidden_layers):
            if batch_norm:
                    self.add(BatchNormalization())
                    
            self.add(Dense(
                neurons_hidden_layer / 2 ** i, activaton='relu'))
            
            if dropout:
                self.add(Dropout(dropout_rate))

        self.add(Dense(
            neurons_output_layer, activation='linear'))

        if (loss_func == 'mean_absolute_error'):
            self.compile(optimizer='Adam',
                         loss='mean_absolute_error', metrics=metrics)

        elif (loss_func == 'mean_squared_error'):
            self.compile(optimizer='Adam',
                         loss='mean_squared_error', metrics=metrics)

        elif (loss_func == 'mean_squared_logarithmic_error'):
            self.compile(
                optimizer='Adam', loss='mean_squared_logarithmic_error', metrics=metrics)






In [3]:
def get_data(path: str):
    df = pd.read_csv(path)

    df = df.dropna()
    df.describe()

    platform_dummies = pd.get_dummies(df['platform'], drop_first=True)
    df = df.drop(['platform'], axis=1)
    df = pd.concat([df, platform_dummies], axis=1)

    media_source_dummies = pd.get_dummies(df['media_source'], drop_first=True)
    df = df.drop(['media_source'], axis=1)
    df = pd.concat([df, media_source_dummies], axis=1)

    country_code_dummies = pd.get_dummies(df['country_code'], drop_first=True)
    df = df.drop(['country_code'], axis=1)
    df = pd.concat([df, country_code_dummies], axis=1)

    df = df.drop(['install_date'], axis=1)

    Y = df[['target_sub_ltv_day30', 'target_iap_ltv_day30',
            'target_ad_ltv_day30']]

    df.drop(['target_sub_ltv_day30', 'target_iap_ltv_day30',
            'target_ad_ltv_day30', 'target_full_ltv_day30'], axis=1, inplace=True)

    return df, Y


def split(X, y):
    return train_test_split(X, y, test_size=0.001)


def preprocess(X_train, X_test):

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test



In [None]:
X, y = get_data('data.csv')

pd_X_train, pd_X_test, pd_y_train, pd_y_test = split(X, y)
del X, y


X_train = pd_X_train.values
y_train = pd_y_train.values

del pd_X_train, pd_y_train
# Some coments, nevermind

In [None]:
model = DeepModel( number_of_hidden_layers=5,
                 neurons_hidden_layer=305,
                 neurons_output_layer=3, dropout = False)

In [None]:
stopping = tf.keras.callbacks.EarlyStopping(
                        monitor="val_loss", patience=4)

history = model.fit(
                        X_train, y_train, verbose=1, epochs=EPOCHS,
                        validation_data=(pd_X_test, pd_y_test), callbacks=[stopping])