In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import metrics

In [None]:
PATH = ""

In [2]:
class DeepModel(tf.keras.models.Sequential):

    def __init__(self, model_file=None,
                 loss_func='mean_absolute_error',
                 n_input_layer=305,
                 number_of_hidden_layers=5,
                 neurons_hidden_layer=305,
                 neurons_output_layer=4,
                 metrics=[
                     metrics.RootMeanSquaredError(),
                     metrics.MeanAbsoluteError(),
                     metrics.MeanAbsolutePercentageError()],
                 * args, **kwargs):

        super(DeepModel, self).__init__(*args, **kwargs)

        self.add(tf.keras.layers.Dense(n_input_layer, activation='relu'))
        
        for i in range(number_of_hidden_layers):
            self.add(tf.keras.layers.Dense(
                neurons_hidden_layer / 2**i, activation='relu'))

            self.add(tf.keras.layers.Dense(
                neurons_output_layer, activation='linear'))

            if (loss_func == 'mean_absolute_error'):
                self.compile(optimizer='Adam',
                             loss='mean_absolute_error', metrics=metrics)

            elif (loss_func == 'categorical_crossentropy'):
                self.compile(optimizer='Adam',
                             loss='mean_squared_error', metrics=metrics)

            elif (loss_func == 'mean_squared_logarithmic_error'):
                self.compile(
                    optimizer='Adam', loss='mean_squared_logarithmic_error', metrics=metrics)

In [3]:
def get_data(path: str):
    df = pd.read_csv(path)
    
    df = df.dropna()
    df.describe()
    
    platform_dummies = pd.get_dummies(df['platform'], drop_first=True)
    df = df.drop(['platform'],axis=1)
    df = pd.concat([df, platform_dummies],axis=1)
    
    media_source_dummies = pd.get_dummies(df['media_source'], drop_first=True)
    df = df.drop(['media_source'], axis=1)
    df = pd.concat([df, media_source_dummies], axis=1)
   
    country_code_dummies = pd.get_dummies(df['country_code'], drop_first=True)
    df = df.drop(['country_code'], axis=1)
    df = pd.concat([df, country_code_dummies], axis=1)
    
    df = df.drop(['install_date'], axis=1)
    
    Y = df[['target_sub_ltv_day30', 'target_iap_ltv_day30', 'target_ad_ltv_day30', 'target_full_ltv_day30']]
    
    df.drop(['target_sub_ltv_day30', 'target_iap_ltv_day30', 'target_ad_ltv_day30', 'target_full_ltv_day30'], axis = 1, inplace = True)
    
    return df, Y

In [4]:
from sklearn.model_selection import train_test_split

def split(X, y):
    return train_test_split(X, y, test_size=0.001, random_state=69)

In [5]:
from sklearn.preprocessing import MinMaxScaler
def preprocess(X_train, X_test):

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test


In [6]:
X, y = get_data(PATH + 'data.csv')

In [7]:
model = DeepModel()

In [8]:
pd_X_train, pd_X_test, pd_y_train, pd_y_test = split(X, y)
del X, y

In [9]:
pd_X_train.shape

(1464991, 302)

In [10]:
pd_y_train.shape

(1464991, 4)

In [11]:

X_train = pd_X_train.values
y_train = pd_y_train.values

In [12]:
del pd_X_train, pd_y_train

In [13]:
model.fit(X_train, y_train, verbose = 1, epochs = 1)



<keras.callbacks.History at 0x7f9404cb2af0>

In [15]:
model.save(PATH + "model2")

INFO:tensorflow:Assets written to: model2/assets


In [16]:
model.evaluate(pd_X_test, pd_y_test)



[0.07437730580568314, 0.8714499473571777, 0.07437730580568314, 1385131.75]