In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import metrics

In [4]:
def get_model(model_file = None, loss_func = 'mean_absolute_error', output_layer = 4, 
              number_of_hidden_layers = 7, 
              input_layer = 305,
              n_hidden_layer = 305,
              metrics = [
                metrics.RootMeanSquaredError(),
                metrics.MeanAbsoluteError(),
                    metrics.MeanAbsolutePercentageError()
            ]):
        if(not model_file):
            model = tf.keras.models.Sequential()
            
            model.add(tf.keras.layers.Dense(input_layer, activation='relu'))
            for i in range(1, number_of_hidden_layers+1):
                model.add(tf.keras.layers.Dense(n_hidden_layer/i, activation='relu'))
                
            model.add(tf.keras.layers.Dense(output_layer,))
            
            print(loss_func, output_layer, model_file)
            if (loss_func == 'mean_absolute_error'):
                model.compile(optimizer='Adam', loss = 'mean_absolute_error', metrics=metrics)
            
            elif (loss_func == 'categorical_crossentropy'):  
                model.compile(optimizer='Adam', loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics)
            return model

        else:
            return tf.keras.models.load_model(model_file)

class DeepModel:
    
    

    def __init__(self, model_file = None, loss_func = 'mean_absolute_error'):
        self.model = get_model(model_file= model_file, loss_func= loss_func)
     
        

        
    def train(self, X, Y, *args, **kargs):
        self.model.fit(X, Y, *args, **kargs)
        
    
    def predict(self, *args, **kargs):
        self.model.predict(*args, **kargs)
    
    def evaluate(self, *args, **kargs):
        self.model.evaluate(*args, **kargs)
        
    def summury(self):
        return model.summury()
        

In [5]:
def get_data(path: str):
    df = pd.read_csv(path)
    
    df = df.dropna()
    df.describe()
    
    platform_dummies = pd.get_dummies(df['platform'], drop_first=True)
    df = df.drop(['platform'],axis=1)
    df = pd.concat([df, platform_dummies],axis=1)
    
    media_source_dummies = pd.get_dummies(df['media_source'], drop_first=True)
    df = df.drop(['media_source'], axis=1)
    df = pd.concat([df, media_source_dummies], axis=1)
   
    country_code_dummies = pd.get_dummies(df['country_code'], drop_first=True)
    df = df.drop(['country_code'], axis=1)
    df = pd.concat([df, country_code_dummies], axis=1)
    
    df = df.drop(['install_date'], axis=1)
    
    Y = df[['target_sub_ltv_day30', 'target_iap_ltv_day30', 'target_ad_ltv_day30', 'target_full_ltv_day30']]
    
    df.drop(['target_sub_ltv_day30', 'target_iap_ltv_day30', 'target_ad_ltv_day30', 'target_full_ltv_day30'], axis = 1, inplace = True)
    
    return df, Y

In [6]:
from sklearn.model_selection import train_test_split

def split(X, y):
    return train_test_split(X, y, test_size=0.001, random_state=69)

In [7]:
from sklearn.preprocessing import MinMaxScaler
def preprocess(X_train, X_test):

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test


In [8]:
X, y = get_data('data.csv')

In [9]:
model = DeepModel()

mean_absolute_error 4 None


In [10]:
X_train, X_test, y_train, y_test = split(X, y)
del X, y

In [11]:
X_train.shape

(1464991, 302)

In [12]:
y_train.shape

(1464991, 4)

In [13]:
model.train(X_train, y_train, verbose = 1, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
model.model.save("model1")

INFO:tensorflow:Assets written to: model1/assets
