In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
import os,gc
import datetime


from keras.models import Model, load_model
from keras.layers import Input, Dropout, Dense, Embedding, SpatialDropout1D, concatenate, BatchNormalization, Flatten
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing import text, sequence
from keras.callbacks import Callback
from keras import backend as K
from keras.models import Model
from keras.losses import mean_squared_error as mse_loss

from keras import optimizers
from keras.optimizers import *
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import warnings
warnings.simplefilter('ignore')

Using TensorFlow backend.


In [2]:
class RAdam(Optimizer):
    """RAdam optimizer.
    # Arguments
        learning_rate: float >= 0. Learning rate.
        beta_1: float, 0 < beta < 1. Generally close to 1.
        beta_2: float, 0 < beta < 1. Generally close to 1.
        epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
        decay: float >= 0. Learning rate decay over each update.
        weight_decay: float >= 0. Weight decay for each param.
        amsgrad: boolean. Whether to apply the AMSGrad variant of this
            algorithm from the paper "On the Convergence of Adam and
            Beyond".
        total_steps: int >= 0. Total number of training steps. Enable warmup by setting a positive value.
        warmup_proportion: 0 < warmup_proportion < 1. The proportion of increasing steps.
        min_lr: float >= 0. Minimum learning rate after warmup.
    # References
        - [Adam - A Method for Stochastic Optimization](https://arxiv.org/abs/1412.6980v8)
        - [On the Convergence of Adam and Beyond](https://openreview.net/forum?id=ryQu7f-RZ)
        - [On The Variance Of The Adaptive Learning Rate And Beyond](https://arxiv.org/pdf/1908.03265v1.pdf)
    """

    def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999,
                 epsilon=None, decay=0., weight_decay=0., amsgrad=False,
                 total_steps=0, warmup_proportion=0.1, min_lr=0., **kwargs):
        learning_rate = kwargs.pop('lr', learning_rate)
        super(RAdam, self).__init__(**kwargs)
        with K.name_scope(self.__class__.__name__):
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.learning_rate = K.variable(learning_rate, name='learning_rate')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.decay = K.variable(decay, name='decay')
            self.weight_decay = K.variable(weight_decay, name='weight_decay')
            self.total_steps = K.variable(total_steps, name='total_steps')
            self.warmup_proportion = K.variable(warmup_proportion, name='warmup_proportion')
            self.min_lr = K.variable(min_lr, name='min_lr')
        if epsilon is None:
            epsilon = K.epsilon()
        self.epsilon = epsilon
        self.initial_decay = decay
        self.initial_weight_decay = weight_decay
        self.initial_total_steps = total_steps
        self.amsgrad = amsgrad

    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr

        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        if self.initial_total_steps > 0:
            warmup_steps = self.total_steps * self.warmup_proportion
            decay_steps = K.maximum(self.total_steps - warmup_steps, 1)
            decay_rate = (self.min_lr - lr) / decay_steps
            lr = K.switch(
                t <= warmup_steps,
                lr * (t / warmup_steps),
                lr + decay_rate * K.minimum(t - warmup_steps, decay_steps),
            )

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_' + str(i)) for (i, p) in enumerate(params)]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_' + str(i)) for (i, p) in enumerate(params)]

        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p), name='vhat_' + str(i)) for (i, p) in enumerate(params)]
        else:
            vhats = [K.zeros(1, name='vhat_' + str(i)) for i in range(len(params))]

        self.weights = [self.iterations] + ms + vs + vhats

        beta_1_t = K.pow(self.beta_1, t)
        beta_2_t = K.pow(self.beta_2, t)

        sma_inf = 2.0 / (1.0 - self.beta_2) - 1.0
        sma_t = sma_inf - 2.0 * t * beta_2_t / (1.0 - beta_2_t)

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            m_corr_t = m_t / (1.0 - beta_1_t)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                v_corr_t = K.sqrt(vhat_t / (1.0 - beta_2_t))
                self.updates.append(K.update(vhat, vhat_t))
            else:
                v_corr_t = K.sqrt(v_t / (1.0 - beta_2_t))

            r_t = K.sqrt((sma_t - 4.0) / (sma_inf - 4.0) *
                         (sma_t - 2.0) / (sma_inf - 2.0) *
                         sma_inf / sma_t)

            p_t = K.switch(sma_t >= 5, r_t * m_corr_t / (v_corr_t + self.epsilon), m_corr_t)

            if self.initial_weight_decay > 0:
                p_t += self.weight_decay * p

            p_t = p - lr * p_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates

    @property
    def lr(self):
        return self.learning_rate

    @lr.setter
    def lr(self, learning_rate):
        self.learning_rate = learning_rate

    def get_config(self):
        config = {
            'learning_rate': float(K.get_value(self.learning_rate)),
            'beta_1': float(K.get_value(self.beta_1)),
            'beta_2': float(K.get_value(self.beta_2)),
            'decay': float(K.get_value(self.decay)),
            'weight_decay': float(K.get_value(self.weight_decay)),
            'epsilon': self.epsilon,
            'amsgrad': self.amsgrad,
            'total_steps': float(K.get_value(self.total_steps)),
            'warmup_proportion': float(K.get_value(self.warmup_proportion)),
            'min_lr': float(K.get_value(self.min_lr)),
        }
        base_config = super(RAdam, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

### feature columns

In [3]:
categoricals = ["site_id", "building_id", "primary_use", "hour", "weekend",  "meter"]
drop_cols = ["sea_level_pressure", "wind_speed", 'wind_direction']
numericals = ["square_feet", "year_built", "air_temperature", "cloud_coverage",
              "dew_temperature", "precip_depth_1_hr", "floor_count"]
feat_cols = categoricals + numericals

In [4]:
directory = '/kaggle/input/ashrae-energy-prediction'

### weather

In [5]:
def fill_weather_dataset(weather_df):
    
    # Find Missing Dates
    time_format = "%Y-%m-%d %H:%M:%S"
    start_date = datetime.datetime.strptime(weather_df['timestamp'].min(), time_format)
    end_date = datetime.datetime.strptime(weather_df['timestamp'].max(), time_format)
    total_hours = int(((end_date - start_date).total_seconds() + 3600) / 3600)
    hours_list = [(end_date - datetime.timedelta(hours=x)).strftime(time_format) for x in range(total_hours)]

    missing_hours = []
    for site_id in range(16):
        site_hours = np.array(weather_df[weather_df['site_id'] == site_id]['timestamp'])
        new_rows = pd.DataFrame(np.setdiff1d(hours_list,site_hours), columns=['timestamp'])
        new_rows['site_id'] = site_id
        weather_df = pd.concat([weather_df,new_rows])

        weather_df = weather_df.reset_index(drop=True)           

    # Add new Features
    weather_df["datetime"] = pd.to_datetime(weather_df["timestamp"])
    weather_df["day"] = weather_df["datetime"].dt.day
    weather_df["week"] = weather_df["datetime"].dt.week
    weather_df["month"] = weather_df["datetime"].dt.month
    
    # Reset Index for Fast Update
    weather_df = weather_df.set_index(['site_id', 'day', 'month'])

    air_temperature_filler = pd.DataFrame(weather_df.groupby(['site_id', 'day', 'month'])['air_temperature'].mean(),columns=["air_temperature"])
    weather_df.update(air_temperature_filler,overwrite=False)

    # Step 1
    cloud_coverage_filler = weather_df.groupby(['site_id', 'day', 'month'])['cloud_coverage'].mean()
    # Step 2
    cloud_coverage_filler = pd.DataFrame(cloud_coverage_filler.fillna(method='ffill'),columns=["cloud_coverage"])
    weather_df.update(cloud_coverage_filler,overwrite=False)

    due_temperature_filler = pd.DataFrame(weather_df.groupby(['site_id', 'day', 'month'])['dew_temperature'].mean(),columns=["dew_temperature"])
    weather_df.update(due_temperature_filler,overwrite=False)

    # Step 1
    precip_depth_filler = weather_df.groupby(['site_id', 'day', 'month'])['precip_depth_1_hr'].mean()
    # Step 2
    precip_depth_filler = pd.DataFrame(precip_depth_filler.fillna(method='ffill'),columns=['precip_depth_1_hr'])
    weather_df.update(precip_depth_filler,overwrite=False)

    weather_df = weather_df.reset_index()
    weather_df = weather_df.drop(['datetime', 'day', 'week', 'month'],axis=1)
        
    return weather_df


In [6]:
weather_train = pd.read_csv(directory+'/weather_train.csv')
weather_test = pd.read_csv(directory+'/weather_test.csv')
weather = pd.concat([weather_train, weather_test], ignore_index=True)
del weather_train, weather_test
gc.collect()
weather = fill_weather_dataset(weather)
weather['timestamp'] = pd.to_datetime(weather['timestamp'])
weather = weather.sort_values(['site_id', 'timestamp']).reset_index(drop=True)

In [7]:
site_ids_offsets = pd.DataFrame({'site_id': {0: 5,
                                              1: 0,
                                              2: 9,
                                              3: 6,
                                              4: 8,
                                              5: 0,
                                              6: 6,
                                              7: 6,
                                              8: 5,
                                              9: 7,
                                              10: 8,
                                              11: 6,
                                              12: 0,
                                              13: 7,
                                              14: 6,
                                              15: 6}})
weather['offset'] = weather['site_id'].map(site_ids_offsets['site_id'])
# add offset
weather['timestamp'] = (weather['timestamp'] - pd.to_timedelta(weather['offset'], unit='H'))
del weather['offset']

### buildings

In [8]:
building_metadata = pd.read_csv(directory+'/building_metadata.csv')

### open data

In [9]:
sample_submission = pd.read_csv(directory+'/sample_submission.csv')
df_train = pd.read_csv(directory+'/train.csv', parse_dates=['timestamp'])
df_train = df_train.query('not (building_id==1099)')

In [10]:
#Based on this great kernel https://www.kaggle.com/arjanso/reducing-dataframe-memory-size-by-65
def reduce_mem_usage(df):
    start_mem_usg = df.memory_usage().sum() / 1024**2 
    print("Memory usage of properties dataframe is :",start_mem_usg," MB")
    NAlist = [] # Keeps track of columns that have missing values filled in. 
    for col in df.columns:
        if df[col].dtype != object:  # Exclude strings            
            # Print current column type
            #print("******************************")
            #print("Column: ",col)
            #print("dtype before: ",df[col].dtype)            
            # make variables for Int, max and min
            IsInt = False
            mx = df[col].max()
            mn = df[col].min()
            #print("min for this col: ",mn)
            #print("max for this col: ",mx)
            # Integer does not support NA, therefore, NA needs to be filled
            if not np.isfinite(df[col]).all(): 
                NAlist.append(col)
                df[col].fillna(mn-1,inplace=True)  
                   
            # test if column can be converted to an integer
            asint = df[col].fillna(0).astype(np.int64)
            result = (df[col] - asint)
            result = result.sum()
            if result > -0.01 and result < 0.01:
                IsInt = True            
            # Make Integer/unsigned Integer datatypes
            if IsInt:
                if mn >= 0:
                    if mx < 255:
                        df[col] = df[col].astype(np.uint8)
                    elif mx < 65535:
                        df[col] = df[col].astype(np.uint16)
                    elif mx < 4294967295:
                        df[col] = df[col].astype(np.uint32)
                    else:
                        df[col] = df[col].astype(np.uint64)
                else:
                    if mn > np.iinfo(np.int8).min and mx < np.iinfo(np.int8).max:
                        df[col] = df[col].astype(np.int8)
                    elif mn > np.iinfo(np.int16).min and mx < np.iinfo(np.int16).max:
                        df[col] = df[col].astype(np.int16)
                    elif mn > np.iinfo(np.int32).min and mx < np.iinfo(np.int32).max:
                        df[col] = df[col].astype(np.int32)
                    elif mn > np.iinfo(np.int64).min and mx < np.iinfo(np.int64).max:
                        df[col] = df[col].astype(np.int64)    
            # Make float datatypes 32 bit
            else:
                df[col] = df[col].astype(np.float32)
            
            # Print new column type
            #print("dtype after: ",df[col].dtype)
            #print("******************************")
    # Print final result
    print("___MEMORY USAGE AFTER COMPLETION:___")
    mem_usg = df.memory_usage().sum() / 1024**2 
    print("Memory usage is: ",mem_usg," MB")
    print("This is ",100*mem_usg/start_mem_usg,"% of the initial size")
    return df, NAlist

In [11]:
building_metadata, _ = reduce_mem_usage(building_metadata)

df_train = df_train.merge(building_metadata, on='building_id', how='left')
df_train = df_train.merge(weather, on=['site_id', 'timestamp'], how='left')
df_train = df_train.query('not (building_id <= 104 & meter == 0 & timestamp <= "2016-05-20")')
df_train = df_train[df_train['air_temperature'].notnull()|df_train['cloud_coverage'].notnull()|df_train['dew_temperature'].notnull()|df_train['precip_depth_1_hr'].notnull()].reset_index(drop=True)

Memory usage of properties dataframe is : 0.0664520263671875  MB
___MEMORY USAGE AFTER COMPLETION:___
Memory usage is:  0.024995803833007812  MB
This is  37.614810562571755 % of the initial size


In [12]:
df_train["hour"] = df_train["timestamp"].dt.hour
df_train["weekend"] = df_train["timestamp"].dt.weekday
df_train['year_built'] = df_train['year_built']-1900
df_train['square_feet'] = np.log1p(df_train['square_feet'])
df_train['meter_reading'] = np.log1p(df_train['meter_reading'])

dates_range = pd.date_range(start='2015-12-31', end='2019-01-01')
us_holidays = calendar().holidays(start=dates_range.min(), end=dates_range.max())
df_train['is_holiday'] = (df_train['timestamp'].dt.date.astype('datetime64').isin(us_holidays)).astype(np.int8)

del df_train["timestamp"]
df_train = df_train[~df_train['meter_reading'].isnull()].reset_index(drop=True)
df_train['meter_reading'].isnull().sum()

0

In [13]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

le = LabelEncoder()
df_train["primary_use"] = le.fit_transform(df_train["primary_use"])

target = df_train['meter_reading']
del df_train["meter_reading"]

df_train = df_train.drop(drop_cols+['is_holiday'], axis = 1)

In [14]:
scalers = {}
for col in numericals:
    ss = StandardScaler()
    df_train[col] = ss.fit_transform(df_train[col].values.reshape((-1, 1)))
    scalers[col] = ss
    
df_train, NAlist = reduce_mem_usage(df_train)

Memory usage of properties dataframe is : 1836.4669494628906  MB
___MEMORY USAGE AFTER COMPLETION:___
Memory usage is:  662.6427917480469  MB
This is  36.0824784754144 % of the initial size


In [15]:
def DenseNN(dense_dim_1=64, dense_dim_2=32, dense_dim_3=32, dense_dim_4=32, 
dropout1=0.2, dropout2=0.12, dropout3=0.12, dropout4=0.12, lr=0.001):

    #Inputs
    site_id = Input(shape=[1], name="site_id")
    building_id = Input(shape=[1], name="building_id")
    meter = Input(shape=[1], name="meter")
    primary_use = Input(shape=[1], name="primary_use")
    hour = Input(shape=[1], name="hour")
    weekend = Input(shape=[1], name="weekend")
    
    square_feet = Input(shape=[1], name="square_feet")
    year_built = Input(shape=[1], name="year_built")
    air_temperature = Input(shape=[1], name="air_temperature")
    cloud_coverage = Input(shape=[1], name="cloud_coverage")
    dew_temperature = Input(shape=[1], name="dew_temperature")
    precip = Input(shape=[1], name="precip_depth_1_hr")
    floor_count = Input(shape=[1], name="floor_count")
    
   
    #Embeddings layers
    emb_site_id = Embedding(16, 2)(site_id)
    emb_building_id = Embedding(1449, 6)(building_id)
    emb_meter = Embedding(4, 2)(meter)
    emb_primary_use = Embedding(16, 3)(primary_use)
    emb_hour = Embedding(24, 3)(hour)
    emb_weekend = Embedding(7, 2)(weekend)

    concat_emb = concatenate([Flatten() (emb_site_id), 
                              Flatten() (emb_building_id), 
                              Flatten() (emb_meter), 
                              Flatten() (emb_primary_use), 
                              Flatten() (emb_hour), 
                              Flatten() (emb_weekend)
    ])
    
    categ = Dropout(dropout1)(Dense(dense_dim_1,activation='relu') (concat_emb))
    categ = BatchNormalization()(categ)
    categ = Dropout(dropout2)(Dense(dense_dim_2,activation='relu') (categ))
    
    #main layer
    main_l = concatenate([categ, 
                          square_feet, 
                          air_temperature, 
                          cloud_coverage, 
                          dew_temperature, 
                          precip, 
                          year_built, 
                          floor_count
                         ])
    
    main_l = Dropout(dropout3)(Dense(dense_dim_3,activation='relu') (main_l))
    main_l = BatchNormalization()(main_l)
    main_l = Dropout(dropout4)(Dense(dense_dim_4,activation='relu') (main_l))
    #main_l = BatchNormalization()(main_l)
    #output
    output = Dense(1, activation='relu') (main_l)

    model = Model([ site_id,
                    building_id, 
                    meter, 
                    primary_use, 
                    square_feet, 
                    air_temperature,
                    cloud_coverage,
                    dew_temperature, 
                    floor_count,
                    year_built,
                    hour,
                    weekend, 
                    precip,
                  ], output)

    model.compile(optimizer = 'adam', #RAdam(learning_rate=0.001),
                  loss= 'mse',
                  metrics=[rmse])
    return model

def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=0))

In [16]:
def get_keras_data(df, num_cols, cat_cols):
    cols = num_cols + cat_cols
    X = {col: np.array(df[col]) for col in cols}
    return X

def train_model(model, X_t, y_train, batch_size, epochs, X_v, y_valid, fold):
    early_stopping = EarlyStopping(patience=7, verbose=0, monitor='val_rmse')
    model_checkpoint = ModelCheckpoint("model_" + str(fold) + ".hdf5",
                                       save_best_only=True, verbose=0, monitor='val_rmse', mode='min')
    reduce_lr = ReduceLROnPlateau(monitor='val_rmse', factor=0.5, patience=3, min_lr=1e-6, verbose=2, mode='min')
    hist = model.fit(X_t, y_train, batch_size=batch_size, epochs=epochs,
                            validation_data=(X_v, y_valid), verbose=2,
                            callbacks=[early_stopping, model_checkpoint, reduce_lr])

    model = load_model("model_" + str(fold) + ".hdf5", custom_objects={'rmse': rmse, 'RAdam':RAdam})
    
    return model

In [17]:
from sklearn.model_selection import KFold, StratifiedKFold

oof = np.zeros(len(df_train))
batch_size = 2048
epochs = 10
models = []
debug = False
folds = 4
seed = 1024

kf = KFold(n_splits=folds, shuffle=False, random_state=seed)

for fold_n, (train_index, valid_index) in enumerate(kf.split(df_train)):
    print('Fold:', fold_n)
    X_train, X_valid = df_train.iloc[train_index], df_train.iloc[valid_index]
    y_train, y_valid = target.iloc[train_index], target.iloc[valid_index]
    X_t = get_keras_data(X_train, numericals, categoricals)
    X_v = get_keras_data(X_valid, numericals, categoricals)
    
    model = DenseNN(dense_dim_1=256, dense_dim_2=64, dense_dim_3=32, dense_dim_4=32, 
                        dropout1=0.4, dropout2=0.2, dropout3=0.1, dropout4=0.1, lr=0.001)
    model = train_model(model, X_t, y_train, batch_size, epochs, X_v, y_valid, fold_n)
    oof[valid_index] = np.squeeze(model.predict(X_v))
    models.append(model)
    print('*'* 50)
    if debug:break
    

Fold: 0
Train on 14889240 samples, validate on 4963080 samples
Epoch 1/10
 - 271s - loss: 1.2610 - rmse: 1.1025 - val_loss: 1.2211 - val_rmse: 1.0909
Epoch 2/10
 - 265s - loss: 0.9665 - rmse: 0.9821 - val_loss: 1.2041 - val_rmse: 1.0841
Epoch 3/10
 - 263s - loss: 0.9338 - rmse: 0.9653 - val_loss: 1.2071 - val_rmse: 1.0853
Epoch 4/10
 - 263s - loss: 0.9206 - rmse: 0.9585 - val_loss: 1.2059 - val_rmse: 1.0840
Epoch 5/10
 - 264s - loss: 0.9135 - rmse: 0.9547 - val_loss: 1.1989 - val_rmse: 1.0806
Epoch 6/10
 - 261s - loss: 0.9085 - rmse: 0.9521 - val_loss: 1.1922 - val_rmse: 1.0781
Epoch 7/10
 - 261s - loss: 0.9049 - rmse: 0.9502 - val_loss: 1.2004 - val_rmse: 1.0816
Epoch 8/10
 - 262s - loss: 0.9012 - rmse: 0.9483 - val_loss: 1.2078 - val_rmse: 1.0855
Epoch 9/10
 - 261s - loss: 0.8988 - rmse: 0.9470 - val_loss: 1.2018 - val_rmse: 1.0822

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/10
 - 264s - loss: 0.8876 - rmse: 0.9410 - val_loss: 1.2030 - va

In [18]:
df_train['oof'] = oof
df_train[['oof']].to_csv('oof_n1.csv', index=False)

In [19]:
del df_train, target, X_train, X_valid, y_train, y_valid, X_t, X_v, kf
gc.collect()

test = pd.read_csv("../input/ashrae-energy-prediction/test.csv", parse_dates=['timestamp'])
test = test.merge(building_metadata, left_on = "building_id", right_on = "building_id", how = "left")

In [20]:
test = test.merge(weather, left_on = ["site_id", "timestamp"], right_on = ["site_id", "timestamp"], how = "left")
del weather, building_metadata
gc.collect()

test["hour"] = test["timestamp"].dt.hour
test["weekend"] = test["timestamp"].dt.weekday
test['year_built'] = test['year_built']-1900
test['square_feet'] = np.log1p(test['square_feet'])
test['is_holiday'] = (test['timestamp'].dt.date.astype('datetime64').isin(us_holidays)).astype(np.int8)
del test["timestamp"]

test["primary_use"] = le.transform(test["primary_use"])

In [21]:
for col in numericals:
    if col not in test:continue
    test[col] = scalers[col].transform(test[col].values.reshape((-1, 1)))

test = test[feat_cols]
test, NAlist = reduce_mem_usage(test)

from tqdm import tqdm
i=0
res=[]
step_size = 50000
for j in tqdm(range(int(np.ceil(test.shape[0]/50000)))):
    for_prediction = get_keras_data(test.iloc[i:i+step_size], numericals, categoricals)
    res.append(np.expm1(sum([model.predict(for_prediction) for model in models])/folds))
    i+=step_size

res = np.concatenate(res)

submission = pd.read_csv('/kaggle/input/ashrae-energy-prediction/sample_submission.csv')
submission['meter_reading'] = res
submission.loc[submission['meter_reading']<0, 'meter_reading'] = 0
submission.to_csv('submission.csv', index=False)
submission

Memory usage of properties dataframe is : 4175.422668457031  MB


  0%|          | 0/834 [00:00<?, ?it/s]

___MEMORY USAGE AFTER COMPLETION:___
Memory usage is:  1709.9349975585938  MB
This is  40.95238095238095 % of the initial size


100%|██████████| 834/834 [2:42:05<00:00, 11.66s/it]


Unnamed: 0,row_id,meter_reading
0,0,175.013351
1,1,96.971977
2,2,10.244420
3,3,269.377258
4,4,801.902161
...,...,...
41697595,41697595,8.442062
41697596,41697596,6.668690
41697597,41697597,4.882587
41697598,41697598,161.953339
