In [1]:
import numpy as np
import pandas as pd

from warnings import filterwarnings

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import RobustScaler, normalize, LabelEncoder, StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import GroupKFold, KFold, StratifiedKFold

filterwarnings('ignore')

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [2]:
CONFIG = {
    'TRAIN_PATH': '/kaggle/input/datathon-entel-2022-reto2/train.csv',
    'TEST_PATH': '/kaggle/input/datathon-entel-2022-reto2/test.csv',
    'SAMPLE_SUBMISSION': '/kaggle/input/datathon-entel-2022-reto2/test_sample.csv'
}

df_train = pd.read_csv(CONFIG['TRAIN_PATH'])
df_test = pd.read_csv(CONFIG['TEST_PATH'])
df_sub = pd.read_csv(CONFIG['SAMPLE_SUBMISSION'])

In [3]:
n = 40
x_base = 100

In [4]:
def vol_col(x):
    return np.sqrt(np.log(x).stack().apply(lambda x: x if str(x) != '-inf' else 0).unstack(level=1)).sum().to_dict()

def vol(x):
    return np.sqrt(np.log(x).stack().apply(lambda x: x if str(x) != '-inf' else 0).unstack(level=1)).sum().fillna(0)


def get_value(df, col, exp, exp2):
    return df[col].replace(df.groupby([col])[df.iloc[:, 5: 5 + n].columns].agg(exp).agg(exp2, axis=1).to_dict())
    
    
def replace_value_count(df, col):
    return df[col].replace(df[col].value_counts(normalize=True).to_dict())


def count_0_values(df):
    return df.iloc[:, 5: n + 5].stack().apply(lambda x: 1 if x > 0 else 0).unstack(level=1).sum(axis=1)


def best_department(df):
    return df.Z_DEPARTAMENTO.apply(lambda x: 1 if x  in ['d6c21b948958417ca98b682a573eb8aa1084b292d32f760f253ef53da13e5589'] else 0)

def best_sell_point(df):
    return df.Z_PUNTO_VENTA.apply(lambda x: 1 if x in 
                                  ['da45328ba820604eb99694768f2a430cd933d161601dcb8491b4a9b555232c59',
                                   'e1f2d2708f545ddc1d7266ba0cc5ccc88147b77fdf3450e68a974e93018ecf60'] else 0)

# Featuring

In [5]:
def featuring(df):
    
    df = pd.concat([df_train.iloc[:, :5], df], axis=1)

    sum1 = df.iloc[:, 5: n + 5].sum(axis=1)
    med1 = df.iloc[:, 5: n + 5].mean(axis=1)
    std1 = df.iloc[:, 5: n + 5].mean(axis=1)
    max1 = df.iloc[:, 5: n + 5].max(axis=1)
    
    vol1 = df.iloc[:, 5: n + 5].T.ewm(5).mean()
    vol1 = vol(vol1)
    
    sum_last_5 = df.iloc[:, 5: -5].sum(axis=1)
    sum_last_5_r = sum_last_5.apply(lambda x: 1 if x > 0 else 0)
    
    sumr = sum1.apply(lambda x: 1 if x > 0 else 0)

    vol_z_modelo = df.Z_MODELO.replace(vol_col(df.groupby(['Z_MODELO'])[df.columns[5: n + 5]].mean().ewm(1, axis=1).mean().T))
    vol_z_marca = df.Z_MARCA.replace(vol_col(df.groupby(['Z_MARCA'])[df.columns[5: n + 5]].mean().ewm(1, axis=1).mean().T))
    vol_z_gama = df.Z_GAMA.replace(vol_col(df.groupby(['Z_GAMA'])[df.columns[5: n + 5]].mean().ewm(1, axis=1).mean().T))
    vol_z_depa = df.Z_DEPARTAMENTO.replace(vol_col(df.groupby(['Z_DEPARTAMENTO'])[df.columns[5: n + 5]].mean().ewm(1, axis=1).mean().T))
    vol_z_m_p = df.Z_PUNTO_VENTA.replace(vol_col(df.groupby(['Z_PUNTO_VENTA'])[df.columns[5 : n + 5]].mean().ewm(1, axis=1).mean().T))
    
    df_z_max_punto_venta = get_value(df, 'Z_PUNTO_VENTA', 'max', 'max')
    df_z_max_modelo = get_value(df, 'Z_MODELO', 'max', 'max')
    df_z_max_gama = get_value(df, 'Z_GAMA', 'max', 'max')
    df_z_max_marca = get_value(df, 'Z_MARCA', 'max', 'max')
    df_z_max_dept = get_value(df, 'Z_DEPARTAMENTO', 'max', 'max')
    
    df_z_sum_punto_venta = get_value(df, 'Z_PUNTO_VENTA', 'sum', 'mean')
    df_z_sum_modelo = get_value(df, 'Z_MODELO', 'sum', 'mean')
    df_z_sum_gama = get_value(df, 'Z_GAMA', 'sum', 'mean')
    df_z_sum_marca = get_value(df, 'Z_MARCA', 'sum', 'mean')
    df_z_sum_dept = get_value(df, 'Z_DEPARTAMENTO', 'sum', 'mean')
        
    Z_MARCA = replace_value_count(df, 'Z_MARCA')
    Z_GAMA = replace_value_count(df, 'Z_GAMA')
    Z_MODELO = replace_value_count(df, 'Z_MODELO')
    Z_DEPARTAMENTO = replace_value_count(df, 'Z_DEPARTAMENTO')
    Z_PUNTO_VENTA =  replace_value_count(df, 'Z_PUNTO_VENTA')
    
    count_t = count_0_values(df)

    b_punto_venta = best_sell_point(df)
    b_departameto = best_department(df)
    
    features = df.iloc[:, 5: n + 5].ewm(1, axis=1).mean().stack().apply(lambda x: x if x < x_base else x_base).unstack(level=1)
    
    return pd.concat([
        features,
        vol1,  
        sum1,
        med1, 
        std1,
        max1,
        sumr,
        
        sum_last_5,
        sum_last_5_r,
        
        vol_z_modelo,
        vol_z_marca,
        vol_z_gama,
        vol_z_depa,
        vol_z_m_p,
        
        count_t,
        
        b_punto_venta,
        b_departameto,
        
        df_z_max_punto_venta,
        df_z_max_modelo, 
        df_z_max_gama,
        df_z_max_marca,
        df_z_max_dept,
        
        df_z_sum_punto_venta,
        df_z_sum_modelo,
        df_z_sum_gama,
        df_z_sum_marca,
        df_z_sum_dept,
        Z_MARCA,
        Z_GAMA,
        Z_MODELO,
        Z_DEPARTAMENTO,
        Z_PUNTO_VENTA
], axis=1).T.reset_index(drop=True).T

In [6]:
x_train = featuring(df_train.iloc[:, 5:45])
y_train = df_train.iloc[:, 45:55]

x_test = featuring(df_train.iloc[:, 15:55])

In [7]:
print(f'X TRAIN SHAPE: {x_train.shape}')
print(f'Y TRAIN SHAPE: {y_train.shape}')
print(f'X TEST SHAPE: {x_test.shape}')

In [8]:
def data_sequence_to_models(c):
    
    correlated_features = set()
    correlation_matrix = x_train.iloc[:, c:].corr()

    for i in range(len(correlation_matrix .columns)):
        for j in range(i):
            if abs(correlation_matrix.iloc[i, j]) > 0.95:
                colname = correlation_matrix.columns[i]
                correlated_features.add(colname)
                
    x_train_model = x_train.drop(labels=correlated_features, axis=1)
    x_test_model = x_test.drop(labels=correlated_features, axis=1)

    print(f'TRAIN SHAPE: {x_train_model.shape}')
    
    sc = RobustScaler()
    _x_train = sc.fit_transform(x_train_model)
    _x_test = sc.transform(x_test_model)
    
    return _x_train, _x_test

In [9]:
x_train_data, x_test_data = data_sequence_to_models(n)

y_train = y_train.stack().apply(lambda x: x if x < x_base else x_base).unstack(level=1).values

In [10]:
def LSTM(l_features, l_extras_features):
    
    features = keras.layers.Input(shape=l_features)
    tabular = keras.layers.Input(shape=l_extras_features)

    out_features = keras.layers.LSTM(250, return_sequences=True)(features)
    out_features = keras.layers.Dropout(0.2)(out_features)
    out_features = keras.layers.LSTM(150, return_sequences=True)(out_features)
    out_features = keras.layers.Dropout(0.2)(out_features)
    out_features = keras.layers.LSTM(100)(out_features)
    out_features = keras.layers.Flatten()(out_features)

    out_features = keras.layers.Dense(50, activation='linear')(out_features)
    out_features = keras.layers.Dropout(0.2)(out_features)
    out_features = keras.layers.Dense(32, activation='linear')(out_features)

    for n_hidden in [1024, 512, 256, 128, 64, 32]:
        out_tabular = keras.layers.Dense(n_hidden, activation='linear')(tabular)
        out_tabular = keras.layers.BatchNormalization()(out_tabular)
        out_tabular = keras.layers.Dropout(0.2)(out_tabular)

    out = tf.keras.layers.Multiply()([out_features, out_tabular])
    out = keras.layers.Dense(10, activation='relu')(out)

    model = keras.Model(inputs = [features, tabular], outputs = out)

    mse = tf.keras.losses.MeanSquaredError()
    rmse = tf.keras.metrics.RootMeanSquaredError()
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0004), loss=mse, metrics=[rmse])

    return model

In [11]:
def training_LSTM():

    EPOCH = 1000
    BATCH_SIZE = 512

    models = []

    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2022)
    # kf = KFold(n_splits=5, shuffle=True, random_state=2022)
    
    y_group = pd.Series(y_train.sum(axis=1)).apply(lambda x:  x if x < 15 else 15).values

    for fold, (train_idx, val_idx) in enumerate(kf.split(x_train_data, y_group)):

        print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)

        X_train, X_valid = x_train_data[train_idx], x_train_data[val_idx]
        
        x_t_features, x_v_features = X_train[:, :n], X_valid[:, :n]
        x_t_features = np.reshape(x_t_features, (x_t_features.shape[0], x_t_features.shape[1], 1))
        x_v_features = np.reshape(x_v_features, (x_v_features.shape[0], x_v_features.shape[1], 1))
        
        x_t_extras, x_v_extras = X_train[:, n:], X_valid[:, n:]
        
        Y_train, Y_valid = y_train[train_idx], y_train[val_idx]

        l_fet = x_t_features.shape[-2:]
        l_ext = x_t_extras.shape[-1]

        model = LSTM(l_fet, l_ext)

        es = keras.callbacks.EarlyStopping(monitor='val_root_mean_squared_error', 
                                           min_delta=1e-05,
                                           patience=50,
                                           verbose=1, 
                                           mode='min', 
                                           restore_best_weights=True)
        plateau = keras.callbacks.ReduceLROnPlateau(monitor='val_root_mean_squared_error',
                                                    factor=0.1,
                                                    patience=10,
                                                    verbose=1,
                                                    min_lr=5e-7, 
                                                    mode='min')

        model.fit([x_t_features, x_t_extras], Y_train,
                  validation_data=([x_v_features, x_v_extras], Y_valid),
                  epochs=EPOCH,
                  batch_size=BATCH_SIZE,
                  callbacks = [es, plateau],
                  verbose=1)

        eval_model(model, [x_v_features, x_v_extras], Y_valid)

        models.append(model)

    return models

def eval_model(model, x_valid, y_valid):
    preds = pd.DataFrame(np.round(model.predict(x_valid)).astype('int32')).stack().reset_index(drop=True)
    y_valid = pd.DataFrame(y_valid).stack().reset_index(drop=True)
    print(f' RMSE --> {mean_squared_error(y_valid, preds, squared=False)}')

In [12]:
models = training_LSTM()

In [13]:
preds_LSTM_40 = []

x_test_features = x_test_data[:, :n]
x_test_features = np.reshape(x_test_features, (x_test_features.shape[0], x_test_features.shape[1], 1))

x_test_extras = x_test_data[:, n:]

for model in models:
    _pred = model.predict([x_test_features, x_test_extras])
    preds_LSTM_40.append(_pred)
    
pred_sub_LSTM_40 = (preds_LSTM_40[0] + preds_LSTM_40[1] + preds_LSTM_40[2] + preds_LSTM_40[3] + preds_LSTM_40[4]) / 5

# MODELO EN 20 SEMANAS

In [14]:
n = 20

In [15]:
x_train = pd.concat([
    featuring(df_train.iloc[:, 5:25]),
    featuring(df_train.iloc[:, 15:35]),
    featuring(df_train.iloc[:, 25:45]),
], axis=0).reset_index(drop=True)

y_train = pd.concat([
    pd.DataFrame(df_train.iloc[:, 25:35].values),
    pd.DataFrame(df_train.iloc[:, 35:45].values),
    pd.DataFrame(df_train.iloc[:, 45:55].values),
], axis=0).reset_index(drop=True)


x_test = featuring(df_train.iloc[:, 35:55])

In [16]:
print(f'X TRAIN SHAPE: {x_train.shape}')
print(f'Y TRAIN SHAPE: {y_train.shape}')
print(f'X TEST SHAPE: {x_test.shape}')

In [17]:
x_train_data, x_test_data = data_sequence_to_models(n)

y_train = y_train.stack().apply(lambda x: x if x < x_base else x_base).unstack(level=1).values

In [18]:
print(f'X TRAIN SHAPE: {x_train_data.shape}')
print(f'Y TRAIN SHAPE: {y_train.shape}')
print(f'X TEST SHAPE: {x_test_data.shape}')

In [19]:
models_LSTM_20 = training_LSTM()

In [20]:
preds_LSTM_20 = []

x_test_features = x_test_data[:, :n]
x_test_features = np.reshape(x_test_features, (x_test_features.shape[0], x_test_features.shape[1], 1))

x_test_extras = x_test_data[:, n:]

for model in models_LSTM_20:
    _pred = model.predict([x_test_features, x_test_extras])
    preds_LSTM_20.append(_pred)
    
pred_sub_LSTM_20 = (preds_LSTM_20[0] + preds_LSTM_20[1] + preds_LSTM_20[2] + preds_LSTM_20[3] + preds_LSTM_20[4]) / 5

# MODELO BASADO EN 10 SEMANAS

In [21]:
n = 10

In [22]:
x_train = pd.concat([
    featuring(df_train.iloc[:, 5:15]),
    featuring(df_train.iloc[:, 15:25]),
    featuring(df_train.iloc[:, 25:35]),
    featuring(df_train.iloc[:, 35:45]),
], axis=0).reset_index(drop=True)

y_train = pd.concat([
    pd.DataFrame(df_train.iloc[:, 15:25].values),
    pd.DataFrame(df_train.iloc[:, 25:35].values),
    pd.DataFrame(df_train.iloc[:, 35:45].values),
    pd.DataFrame(df_train.iloc[:, 45:55].values)
], axis=0).reset_index(drop=True)


x_test = featuring(df_train.iloc[:, 45:55])

In [23]:
print(f'X TRAIN SHAPE: {x_train.shape}')
print(f'Y TRAIN SHAPE: {y_train.shape}')
print(f'X TEST SHAPE: {x_test.shape}')

In [24]:
x_train_data, x_test_data = data_sequence_to_models(n)

y_train = y_train.stack().apply(lambda x: x if x < x_base else x_base).unstack(level=1).values

In [25]:
models_LSTM_10 = training_LSTM()

In [26]:
preds_LSTM_10 = []

x_test_features = x_test_data[:, :n]
x_test_features = np.reshape(x_test_features, (x_test_features.shape[0], x_test_features.shape[1], 1))

x_test_extras = x_test_data[:, n:]

for model in models_LSTM_10:
    _pred = model.predict([x_test_features, x_test_extras])
    preds_LSTM_10.append(_pred)
    
pred_sub_LSTM_10 = (preds_LSTM_10[0] + preds_LSTM_10[1] + preds_LSTM_10[2] + preds_LSTM_10[3] + preds_LSTM_10[4]) / 5

In [27]:
pred_sub_LSTM = pred_sub_LSTM_40 * 0.2 + pred_sub_LSTM_20 * 0.5  + pred_sub_LSTM_10 * 0.3

# MODELOS MLP

In [28]:
x_train = featuring(df_train.iloc[:, 5:45])
y_train = df_train.iloc[:, 45:55]

x_test = featuring(df_train.iloc[:, 15:55])

In [29]:
print(f'X TRAIN SHAPE: {x_train.shape}')
print(f'Y TRAIN SHAPE: {y_train.shape}')
print(f'X TEST SHAPE: {x_test.shape}')

In [30]:
x_train_data, x_test_data = data_sequence_to_models(0)

y_train = y_train.stack().apply(lambda x: x if x < x_base else x_base).unstack(level=1).values

In [31]:
def MLP(l_extras_features):
    
    tabular = keras.layers.Input(shape=l_extras_features)

    for n_hidden in [1024, 512, 256, 128, 64, 32]:
        out_tabular = keras.layers.Dense(n_hidden, activation='linear')(tabular)
        out_tabular = keras.layers.BatchNormalization()(out_tabular)
        out_tabular = keras.layers.Dropout(0.2)(out_tabular)

    out = keras.layers.Dense(10, activation='relu')(out_tabular)

    model = keras.Model(inputs = [tabular], outputs = out)

    mse = tf.keras.losses.MeanSquaredError()
    rmse = tf.keras.metrics.RootMeanSquaredError()
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0004), loss=mse, metrics=[rmse])

    return model

In [32]:
def training_MLP():

    EPOCH = 1000
    BATCH_SIZE = 512

    models = []

    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2022)
    # kf = KFold(n_splits=5, shuffle=True, random_state=2022)
    
    y_group = pd.Series(y_train.sum(axis=1)).apply(lambda x:  x if x < 15 else 15).values

    for fold, (train_idx, val_idx) in enumerate(kf.split(x_train_data, y_group)):

        print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)

        X_train, X_valid = x_train_data[train_idx], x_train_data[val_idx]
                        
        Y_train, Y_valid = y_train[train_idx], y_train[val_idx]

        l_ext = X_train.shape[-1]

        model = MLP(l_ext)

        es = keras.callbacks.EarlyStopping(monitor='val_root_mean_squared_error', 
                                           min_delta=1e-05,
                                           patience=30,
                                           verbose=1, 
                                           mode='min', 
                                           restore_best_weights=True)
        plateau = keras.callbacks.ReduceLROnPlateau(monitor='val_root_mean_squared_error',
                                                    factor=0.1,
                                                    patience=10,
                                                    verbose=1,
                                                    min_lr=5e-7, 
                                                    mode='min')

        model.fit([X_train], Y_train,
                  validation_data=([X_valid], Y_valid),
                  epochs=EPOCH,
                  batch_size=BATCH_SIZE,
                  callbacks = [es, plateau],
                  verbose=1)

        eval_model(model, [X_valid], Y_valid)

        models.append(model)

    return models

In [33]:
models_MLP_40 = training_MLP()

In [34]:
preds_MLP_40 = []

for model in models_MLP_40:
    _pred = model.predict([x_test_data])
    preds_MLP_40.append(_pred)
    
pred_sub_MLP_40 = (preds_MLP_40[0] + preds_MLP_40[1] + preds_MLP_40[2] + preds_MLP_40[3] + preds_MLP_40[4]) / 5

# MODELO EN 20 SEMANAS

In [35]:
n = 20

In [36]:
x_train = pd.concat([
    featuring(df_train.iloc[:, 5:25]),
    featuring(df_train.iloc[:, 15:35]),
    featuring(df_train.iloc[:, 25:45]),
], axis=0).reset_index(drop=True)

y_train = pd.concat([
    pd.DataFrame(df_train.iloc[:, 25:35].values),
    pd.DataFrame(df_train.iloc[:, 35:45].values),
    pd.DataFrame(df_train.iloc[:, 45:55].values),
], axis=0).reset_index(drop=True)


x_test = featuring(df_train.iloc[:, 35:55])

In [37]:
print(f'X TRAIN SHAPE: {x_train.shape}')
print(f'Y TRAIN SHAPE: {y_train.shape}')
print(f'X TEST SHAPE: {x_test.shape}')

In [38]:
x_train_data, x_test_data = data_sequence_to_models(0)

y_train = y_train.stack().apply(lambda x: x if x < x_base else x_base).unstack(level=1).values

In [39]:
print(f'X TRAIN SHAPE: {x_train_data.shape}')
print(f'Y TRAIN SHAPE: {y_train.shape}')
print(f'X TEST SHAPE: {x_test_data.shape}')

In [40]:
models_MLP_20 = training_MLP()

In [41]:
preds_MLP_20 = []

for model in models_MLP_20:
    _pred = model.predict([x_test_data])
    preds_MLP_20.append(_pred)
    
pred_sub_MLP_20 = (preds_MLP_20[0] + preds_MLP_20[1] + preds_MLP_20[2] + preds_MLP_20[3] + preds_MLP_20[4]) / 5

# MODELO BASADO EN 10 SEMANAS

In [42]:
n = 10

In [43]:
x_train = pd.concat([
    featuring(df_train.iloc[:, 5:15]),
    featuring(df_train.iloc[:, 15:25]),
    featuring(df_train.iloc[:, 25:35]),
    featuring(df_train.iloc[:, 35:45]),
], axis=0).reset_index(drop=True)

y_train = pd.concat([
    pd.DataFrame(df_train.iloc[:, 15:25].values),
    pd.DataFrame(df_train.iloc[:, 25:35].values),
    pd.DataFrame(df_train.iloc[:, 35:45].values),
    pd.DataFrame(df_train.iloc[:, 45:55].values)
], axis=0).reset_index(drop=True)


x_test = featuring(df_train.iloc[:, 45:55])

In [44]:
print(f'X TRAIN SHAPE: {x_train.shape}')
print(f'Y TRAIN SHAPE: {y_train.shape}')
print(f'X TEST SHAPE: {x_test.shape}')

In [45]:
x_train_data, x_test_data = data_sequence_to_models(0)

y_train = y_train.stack().apply(lambda x: x if x < x_base else x_base).unstack(level=1).values

In [50]:
models_MLP_10 = training_MLP()

In [51]:
preds_MLP_10 = []

for model in models_MLP_10:
    _pred = model.predict([x_test_data])
    preds_MLP_10.append(_pred)
    
pred_sub_MLP_10 = (preds_MLP_10[0] + preds_MLP_10[1] + preds_MLP_10[2] + preds_MLP_10[3] + preds_MLP_10[4]) / 5

In [52]:
pred_sub_MLP = pred_sub_MLP_40 * 0.2 + pred_sub_MLP_20 * 0.5  + pred_sub_MLP_10 * 0.3

# INTEGRACIÓN DE MODELOS

In [None]:
pred_sub_last = df_train.iloc[:, 45:55] # .stack().reset_index(drop=True)

In [62]:
pred_sub = pred_sub_MLP # * 0.5 pred_sub_MLP * 0.3 + pred_sub_last * 0.2

In [63]:
df_submission = pd.merge(df_train.iloc[:, :5], pd.DataFrame(pred_sub), how='inner', left_index=True, right_index=True)
df_submission = df_submission.rename(columns={
    0: 'SEMANA_51', 
    1: 'SEMANA_52',
    2: 'SEMANA_53',
    3: 'SEMANA_54',
    4: 'SEMANA_55',
    5: 'SEMANA_56',
    6: 'SEMANA_57',
    7: 'SEMANA_58',
    8: 'SEMANA_59',
    9: 'SEMANA_60'
})

df_submission['BASE_ID'] = df_submission['Z_MODELO'].astype(str) + '|' + df_submission['Z_PUNTO_VENTA'].astype(str) + '|' + df_submission['Z_GAMA'].astype(str)
df_submission = df_submission.iloc[:, 5:]
df_submission = df_submission.set_index('BASE_ID').stack().to_frame().reset_index()
df_submission['BASE_ID'] = df_submission['BASE_ID'].astype(str) + '|' + df_submission['level_1'].astype(str)
df_submission = df_submission.drop(['level_1'], axis=1)
df_submission.columns = ['ID', 'Demanda']

In [64]:
df_submission.to_csv('entel_lstm_1_0.csv', index=False)

In [61]:
p = 0.999
print(df_submission.Demanda.quantile(p))
print(pd.DataFrame(y_train).stack().quantile(p))
# 76.832

<a href='./entel_lstm_1_0.csv'>download</a>

In [None]:
np.round(df_submission.Demanda).value_counts(normalize=True)