# Short 2

## Ensamble model of AutoGluon and Catboost

Name: Erlend Lokna, Student ID: 528564

Name: Johan Vik Mathisen, Student ID: 508258


Team name: Shaky Warriors

In [28]:
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
import catboost as cb
%matplotlib inline

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)



In [29]:
         
class DataSet:
    def __init__(self):
        """
        kind: observerd, estimated, train
        """

        train_a = pd.read_parquet('data/A/train_targets.parquet')
        train_b = pd.read_parquet('data/B/train_targets.parquet')
        train_c = pd.read_parquet('data/C/train_targets.parquet')

        # Estimated training data for each location
        X_train_estimated_a = pd.read_parquet('data/A/X_train_estimated.parquet')
        X_train_estimated_b = pd.read_parquet('data/B/X_train_estimated.parquet')
        X_train_estimated_c = pd.read_parquet('data/C/X_train_estimated.parquet')

        # Observed training data for each location
        X_train_observed_b = pd.read_parquet('data/B/X_train_observed.parquet')
        X_train_observed_a = pd.read_parquet('data/A/X_train_observed.parquet')
        X_train_observed_c = pd.read_parquet('data/C/X_train_observed.parquet')

        # Estimated test data for each location
        X_test_estimated_b = pd.read_parquet('data/B/X_test_estimated.parquet')
        X_test_estimated_a = pd.read_parquet('data/A/X_test_estimated.parquet')
        X_test_estimated_c = pd.read_parquet('data/C/X_test_estimated.parquet')

        Y_train = {
            'a': train_a, 
            'b':train_b, 
            'c':train_c
        }
        X_train_estimated = {
            'a':X_train_estimated_a,
            'b':X_train_estimated_b,
            'c':X_train_estimated_c
        }
        X_train_observed = {
            'a':X_train_observed_a,
            'b':X_train_observed_b,
            'c':X_train_observed_c
        }
        X_test_estimated = {
            'a':X_test_estimated_a,
            'b':X_test_estimated_b,
            'c':X_test_estimated_c
        }
        self.X_train_observed =  X_train_observed
        self.X_train_estimated = X_train_estimated
        self.X_test_estimated = X_test_estimated
        self.Y_train = Y_train

    def resample_to_hourly(self):
        for loc in ['a','b','c']:
            self.X_train_observed[loc] = to_hourly(self.X_train_observed[loc])
            self.X_train_estimated[loc] = to_hourly(self.X_train_estimated[loc])
            self.X_test_estimated[loc] = to_hourly(self.X_test_estimated[loc])


    def select_features(self, features):
        """ 
        Reduces dim by selecting only features from "features"
        This will remove "date_calc" from est.
        """
        for loc in ['a','b','c']:
            self.X_train_observed[loc] = self.X_train_observed[loc][features]
            self.X_train_estimated[loc] = self.X_train_estimated[loc][features]
            self.X_test_estimated[loc] = self.X_test_estimated[loc][features]

    def add_type(self):
        """
        0: Estimated data
        1: Observed data
        """
        for loc in ['a','b','c']:
            type_vec_X_tr = [1] * len(self.X_train_observed[loc])
            self.X_train_observed[loc]['type'] = type_vec_X_tr

            type_vec_X_tr_e = [0] * len(self.X_train_estimated[loc])
            self.X_train_estimated[loc]['type'] = type_vec_X_tr_e

            type_vec_X_te = [0] * len(self.X_test_estimated[loc])
            self.X_test_estimated[loc]['type'] = type_vec_X_te


    def add_location(self):
        """
        Adds a categorical feature "location" equal to the input string location.
        """
        for loc in ['a','b','c']:
            loc_vec_X_tr = [loc] * len(self.X_train_observed[loc])
            self.X_train_observed[loc]['location'] = loc_vec_X_tr

            loc_vec_X_tr_e = [loc] * len(self.X_train_estimated[loc])
            self.X_train_estimated[loc]['location'] = loc_vec_X_tr_e

            loc_vec_X_te = [loc] * len(self.X_test_estimated[loc])
            self.X_test_estimated[loc]['location'] = loc_vec_X_te

    def remove_nans(self, feature):
        for loc in ['a','b','c']:
            cols = self.X_train_observed['a'].columns
            if feature in cols:
                self.X_train_observed[loc] = self.X_train_observed[loc].dropna(subset = [feature], how = 'all')
                self.X_train_estimated[loc] = self.X_train_estimated[loc].dropna(subset = [feature], how = 'all')
                self.X_test_estimated[loc] = self.X_test_estimated[loc].dropna(subset = [feature], how = 'all')
            else:
                print("Feature not in data frame.")

    def combine_obs_est(self):
        """
        Concatinates the estimated and observed data. 
        Removes data_calc from est.
        """

        obs_a = self.X_train_observed['a']
        est_a = self.X_train_estimated['a']

        obs_b = self.X_train_observed['b']
        est_b = self.X_train_estimated['b']

        obs_c = self.X_train_observed['c']
        est_c = self.X_train_estimated['c']

        self.X_train = {
        'a':pd.concat([obs_a, est_a]),
        'b':pd.concat([obs_b, est_b]),
        'c':pd.concat([obs_c, est_c])
        }

        self.X_train['a'] = self.X_train['a'].reset_index(drop=True)
        self.X_train['b'] = self.X_train['b'].reset_index(drop=True)
        self.X_train['c'] = self.X_train['c'].reset_index(drop=True)

        self.X_train['a'], self.Y_train['a'] = match_X_Y(self.X_train['a'], self.Y_train['a'])
        self.X_train['b'], self.Y_train['b'] = match_X_Y(self.X_train['b'], self.Y_train['b'])
        self.X_train['c'], self.Y_train['c'] = match_X_Y(self.X_train['c'], self.Y_train['c'])
    
    def train_test(self):
        """
        Vanilla split. 
        """
        X_a = self.X_train['a']
        X_b = self.X_train['b']
        X_c = self.X_train['c']

        y_a = self.Y_train['a']
        y_b = self.Y_train['b']
        y_c = self.Y_train['c']

        y_train = pd.concat([y_a, y_b, y_c])
        y_train = y_train.reset_index(drop=True)

        X_train = pd.concat([X_a, X_b, X_c])
        X_test = pd.concat([self.X_test_estimated['a'], self.X_test_estimated['b'],self.X_test_estimated['c']])
        
        return X_train, X_test, y_train

    def scale_y_train(self, k_b = 5, k_c = 6):

        self.Y_train['b'] = self.Y_train['b'] * k_b 
        self.Y_train['c'] = self.Y_train['c']* k_c

    def drop_bad_data(self):
        for loc in ['a', 'b', 'c']:
            y_ind = get_constant_indices(self.Y_train[loc])
            self.Y_train[loc].drop(y_ind, errors='ignore')
            self.X_train[loc].drop(y_ind, errors='ignore')


    def cyclic_time_encoding(self):
        for loc in ['a', 'b', 'c']:
            for time_feature in ["time", "date_forecast"]:
                if time_feature in self.X_train[loc].columns:
                    self.X_train[loc]['sin_hour'] = np.sin(2*np.pi*self.X_train[loc][time_feature].dt.hour/24)
                    self.X_train[loc]['sin_month'] = np.sin(2*np.pi*self.X_train[loc][time_feature].dt.month/12)

                    self.X_train[loc]['cos_hour'] = np.cos(2*np.pi*self.X_train[loc][time_feature].dt.hour/24)
                    self.X_train[loc]['cos_month'] = np.cos(2*np.pi*self.X_train[loc][time_feature].dt.month/12)
                if time_feature in self.X_test_estimated[loc].columns:    
                    self.X_test_estimated[loc]['sin_hour'] = np.sin(2*np.pi*self.X_test_estimated[loc][time_feature].dt.hour/24)
                    self.X_test_estimated[loc]['sin_month'] = np.sin(2*np.pi*self.X_test_estimated[loc][time_feature].dt.month/12)

                    self.X_test_estimated[loc]['cos_hour'] = np.cos(2*np.pi*self.X_test_estimated[loc][time_feature].dt.hour/24)
                    self.X_test_estimated[loc]['cos_month'] = np.cos(2*np.pi*self.X_test_estimated[loc][time_feature].dt.month/12)

#Helper functions

def match_X_Y(X,Y):
    """ 
    date_forecast and time must be unique!
    Matches the timestamps of X to the timestamps of Y. 
    Makes sure that the length of X and Y are equal.
    """
    Y = Y.dropna()
    X = X.rename(columns={'date_forecast': 'time'})
    merge_df = Y.merge(X, on="time", how='inner')
    Y = merge_df['pv_measurement']
    X = merge_df.drop(columns = ['pv_measurement'])
    return X,Y

def to_hourly(df):
    df['date_forecast']
    df.set_index('date_forecast', inplace=True)
    df = df.resample('H').mean()
    df.reset_index(inplace=True)
    return df

def make_categorical(data, feature_list):
    for feature in feature_list:
        data[feature] = data[feature].astype('category')


def ReLU(x):
    return x * (x > 0)

def remap(x):
    if x<0.5:
        return 0
    else:
        return 1


def get_constant_indices(ser):
    mask = (ser != 0)
    constant_periods = ser[mask].groupby((ser[mask] != ser[mask].shift()).cumsum()).cumcount().add(1)
    
    drop_mask = constant_periods >= 12
    return constant_periods[drop_mask].index

In [30]:
selected_features = ['date_forecast', 'absolute_humidity_2m:gm3',
       'clear_sky_energy_1h:J', 'clear_sky_rad:W',
       'cloud_base_agl:m', 'dew_or_rime:idx', 'dew_point_2m:K',
       'diffuse_rad:W', 'diffuse_rad_1h:J', 'direct_rad:W', 'direct_rad_1h:J',
       'effective_cloud_cover:p', 'elevation:m', 'fresh_snow_12h:cm',
       'fresh_snow_1h:cm', 'fresh_snow_24h:cm', 'fresh_snow_3h:cm',
       'fresh_snow_6h:cm', 'is_in_shadow:idx', 'is_day:idx', 
       'msl_pressure:hPa', 'precip_5min:mm', 'precip_type_5min:idx',
       'pressure_100m:hPa', 'pressure_50m:hPa', 'prob_rime:p',
       'rain_water:kgm2', 'relative_humidity_1000hPa:p', 'sfc_pressure:hPa',
       'snow_depth:cm', 'snow_drift:idx',
       'snow_melt_10min:mm', 'snow_water:kgm2', 'sun_azimuth:d',
       'sun_elevation:d', 'super_cooled_liquid_water:kgm2', 't_1000hPa:K',
       'total_cloud_cover:p', 'visibility:m', 'wind_speed_10m:ms',
       'wind_speed_u_10m:ms', 'wind_speed_v_10m:ms', 'wind_speed_w_1000hPa:ms']

made_features = ['location', 'type', 'is_day:idx', 'is_in_shadow:idx', 'dew_or_rime:idx']

drop_feature = 'diffuse_rad:W'

In [31]:
data_collection = DataSet()
data_collection.select_features(selected_features)
data_collection.resample_to_hourly()
data_collection.remove_nans(drop_feature)
data_collection.add_location()
data_collection.add_type()

data_collection.combine_obs_est()
data_collection.drop_bad_data()
data_collection.cyclic_time_encoding()

In [32]:
X_a = data_collection.X_train['a']
X_b = data_collection.X_train['b']
X_c = data_collection.X_train['c']

y_a = data_collection.Y_train['a']
y_b = data_collection.Y_train['b']
y_c = data_collection.Y_train['c']

for f in made_features:
    if f not in ['location', 'type']:
        X_a[f] = X_a[f].map(remap)
        X_b[f] = X_b[f].map(remap)
        X_c[f] = X_c[f].map(remap)

make_categorical(X_a,made_features)
make_categorical(X_b,made_features)
make_categorical(X_c,made_features)


In [33]:

drop_cols = ['location', 'time']

df_a = pd.concat([X_a, y_a], axis=1).drop(columns=drop_cols)
df_b = pd.concat([X_b, y_b], axis=1).drop(columns=drop_cols)
df_c = pd.concat([X_c, y_c], axis=1).drop(columns=drop_cols)


In [34]:
seed = 246

data = dict()

# sample 50% of the data for each building with type = 0
df_a_tune = df_a[df_a['type'] == 0].sample(frac=0.5, random_state=seed)
df_b_tune = df_b[df_b['type'] == 0].sample(frac=0.5, random_state=seed)   
df_c_tune = df_c[df_c['type'] == 0].sample(frac=0.5, random_state=seed)

# drop these rows from the original data
df_a_train = df_a.drop(df_a_tune.index)
df_b_train = df_b.drop(df_b_tune.index)
df_c_train = df_c.drop(df_c_tune.index)

data['a'] = [df_a_train, df_a_tune]
data['b'] = [df_b_train, df_b_tune]
data['c'] = [df_c_train, df_c_tune]

# Model

In [35]:
#3 hours (per model)
time_in_sek = 60*60*2



In [36]:
label = 'pv_measurement'
predictor_a = TabularPredictor(label=label, eval_metric='mae').fit(
    train_data = data['a'][0], 
    time_limit = time_in_sek,
    presets='best_quality',
    num_bag_folds=8,
    num_stack_levels=0,
    tuning_data = data['a'][1],
    use_bag_holdout= True
)

No path specified. Models will be saved in: "AutogluonModels/ag-20231109_184819/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=20
Beginning AutoGluon training ... Time limit = 6s
AutoGluon will save models to "AutogluonModels/ag-20231109_184819/"
AutoGluon Version:  0.8.2
Python Version:     3.10.7
Operating System:   Darwin
Platform Machine:   x86_64
Platform Version:   Darwin Kernel Version 19.6.0: Thu Jan 13 01:26:33 PST 2022; root:xnu-6153.141.51~3/RELEASE_X86_64
Disk Space Avail:   245.31 GB / 500.07 GB (49.1%)
Train Data Rows:    31864
Train Data Columns: 47
Tuning Data Rows:    2197
Tuning Data Columns: 47
Label Column: pv_measurement
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (5733.42, 0.0, 650.65332, 1179.83452)
	If 'regression' is not the corre

In [37]:
predictor_b = TabularPredictor(label=label, eval_metric='mae').fit(
    train_data = data['b'][0], 
    time_limit = time_in_sek,
    presets='best_quality',
    num_bag_folds=8,
    num_stack_levels=0,
    tuning_data = data['b'][1],
    use_bag_holdout=True
)

No path specified. Models will be saved in: "AutogluonModels/ag-20231109_184826/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=20
Beginning AutoGluon training ... Time limit = 6s
AutoGluon will save models to "AutogluonModels/ag-20231109_184826/"
AutoGluon Version:  0.8.2
Python Version:     3.10.7
Operating System:   Darwin
Platform Machine:   x86_64
Platform Version:   Darwin Kernel Version 19.6.0: Thu Jan 13 01:26:33 PST 2022; root:xnu-6153.141.51~3/RELEASE_X86_64
Disk Space Avail:   245.29 GB / 500.07 GB (49.1%)
Train Data Rows:    31019
Train Data Columns: 47
Tuning Data Rows:    1800
Tuning Data Columns: 47
Label Column: pv_measurement
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (1152.3, -0.0, 99.69624, 196.54802)
	If 'regression' is not the correct

In [38]:
predictor_c = TabularPredictor(label=label, eval_metric='mae').fit(
    train_data = data['c'][0], 
    time_limit = time_in_sek,
    presets='best_quality',
    num_bag_folds=8,
    num_stack_levels=0,
    tuning_data = data['c'][1],
    use_bag_holdout=True
)

No path specified. Models will be saved in: "AutogluonModels/ag-20231109_184834/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=20
Beginning AutoGluon training ... Time limit = 6s
AutoGluon will save models to "AutogluonModels/ag-20231109_184834/"
AutoGluon Version:  0.8.2
Python Version:     3.10.7
Operating System:   Darwin
Platform Machine:   x86_64
Platform Version:   Darwin Kernel Version 19.6.0: Thu Jan 13 01:26:33 PST 2022; root:xnu-6153.141.51~3/RELEASE_X86_64
Disk Space Avail:   245.27 GB / 500.07 GB (49.0%)
Train Data Rows:    24606
Train Data Columns: 47
Tuning Data Rows:    1465
Tuning Data Columns: 47
Label Column: pv_measurement
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and label-values can't be converted to int).
	Label info (max, min, mean, stddev): (999.6, 0.0, 79.70535, 168.37633)
	If 'regression' is not the corr

In [39]:
predictor_a.refit_full()
predictor_b.refit_full()
predictor_c.refit_full()

Refitting models via `predictor.refit_full` using all of the data (combined train and validation)...
	Models trained in this way will have the suffix "_FULL" and have NaN validation score.
	This process is not bound by time_limit, but should take less time than the original `predictor.fit` call.
	To learn more, refer to the `.refit_full` method docstring which explains how "_FULL" models differ from normal models.
Fitting 1 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1_FULL ...
	0.06s	 = Training   runtime
Fitting 1 L1 models ...
Fitting model: KNeighborsDist_BAG_L1_FULL ...
	0.05s	 = Training   runtime
Fitting model: WeightedEnsemble_L2_FULL | Skipping fit via cloning parent ...
	0.22s	 = Training   runtime
Updated best model to "WeightedEnsemble_L2_FULL" (Previously "WeightedEnsemble_L2"). AutoGluon will default to using "WeightedEnsemble_L2_FULL" for predict() and predict_proba().
Refit complete, total runtime = 0.31s
Refitting models via `predictor.refit_full` using all of the

{'KNeighborsUnif_BAG_L1': 'KNeighborsUnif_BAG_L1_FULL',
 'KNeighborsDist_BAG_L1': 'KNeighborsDist_BAG_L1_FULL',
 'WeightedEnsemble_L2': 'WeightedEnsemble_L2_FULL'}

In [40]:
predictor_a.leaderboard(silent=True)

Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,-170.212313,5.421933,0.382748,0.000973,0.223835,2,True,3
1,KNeighborsDist_BAG_L1,-170.236486,2.651033,0.0752,2.651033,0.0752,1,True,2
2,KNeighborsUnif_BAG_L1,-170.539814,2.769927,0.083713,2.769927,0.083713,1,True,1
3,WeightedEnsemble_L2_FULL,,,0.333289,,0.223835,2,True,6
4,KNeighborsUnif_BAG_L1_FULL,,,0.059974,,0.059974,1,True,4
5,KNeighborsDist_BAG_L1_FULL,,,0.04948,,0.04948,1,True,5


In [41]:
predictor_b.leaderboard(silent=True)

Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,-30.552871,6.074384,0.349036,0.00065,0.181784,2,True,3
1,KNeighborsUnif_BAG_L1,-30.576173,3.2188,0.076689,3.2188,0.076689,1,True,1
2,KNeighborsDist_BAG_L1,-30.624493,2.854934,0.090563,2.854934,0.090563,1,True,2
3,WeightedEnsemble_L2_FULL,,,0.269425,,0.181784,2,True,6
4,KNeighborsUnif_BAG_L1_FULL,,,0.042561,,0.042561,1,True,4
5,KNeighborsDist_BAG_L1_FULL,,,0.04508,,0.04508,1,True,5


In [42]:
predictor_c.leaderboard(silent=True)


Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,KNeighborsDist_BAG_L1,-20.778725,1.742053,0.064517,1.742053,0.064517,1,True,2
1,WeightedEnsemble_L2,-20.778725,1.742646,0.178675,0.000593,0.114158,2,True,3
2,KNeighborsUnif_BAG_L1,-20.877947,1.831164,0.162366,1.831164,0.162366,1,True,1
3,WeightedEnsemble_L2_FULL,,,0.154097,,0.114158,2,True,6
4,KNeighborsUnif_BAG_L1_FULL,,,0.043888,,0.043888,1,True,4
5,KNeighborsDist_BAG_L1_FULL,,,0.039939,,0.039939,1,True,5


# Predictions

In [43]:
test_a = data_collection.X_test_estimated['a'].drop(columns=['location', 'date_forecast'])
test_b = data_collection.X_test_estimated['b'].drop(columns=['location', 'date_forecast'])
test_c = data_collection.X_test_estimated['c'].drop(columns=['location', 'date_forecast'])


In [44]:
y_pred_a = predictor_a.predict(test_a)
y_pred_b = predictor_b.predict(test_b)
y_pred_c = predictor_c.predict(test_c)

In [45]:
final_pred = pd.concat([y_pred_a, y_pred_b, y_pred_c]).reset_index(drop=True)
final_pred_AutoGluon = ReLU(final_pred)

### CatBoost

In [46]:
selected_features = ['date_forecast', 'absolute_humidity_2m:gm3',
       'air_density_2m:kgm3', 'clear_sky_energy_1h:J',
       'clear_sky_rad:W', 'dew_or_rime:idx',
       'dew_point_2m:K', 'diffuse_rad:W', 'diffuse_rad_1h:J', 'direct_rad:W',
       'direct_rad_1h:J', 'effective_cloud_cover:p', 'elevation:m',
       'fresh_snow_6h:cm', 'is_day:idx',
       'is_in_shadow:idx', 'msl_pressure:hPa', 'precip_5min:mm',
       'pressure_100m:hPa', 'pressure_50m:hPa',
       'prob_rime:p', 'rain_water:kgm2', 'relative_humidity_1000hPa:p',
       'sfc_pressure:hPa', 'snow_depth:cm',
       'sun_azimuth:d', 'sun_elevation:d', 'super_cooled_liquid_water:kgm2',
       't_1000hPa:K', 'total_cloud_cover:p', 'visibility:m',
       'wind_speed_10m:ms', 'wind_speed_u_10m:ms', 'wind_speed_v_10m:ms',
       'wind_speed_w_1000hPa:ms']

made_features = ['location', 'type', 'is_day:idx', 'is_in_shadow:idx', 'dew_or_rime:idx']

drop_feature = 'diffuse_rad:W'


#Loading all data
data_collection = DataSet()
#Preprocessing
data_collection.select_features(selected_features)
data_collection.resample_to_hourly()
data_collection.remove_nans(drop_feature)
data_collection.add_location()
data_collection.add_type()
data_collection.combine_obs_est()
data_collection.drop_bad_data()
data_collection.cyclic_time_encoding()

k_b = 5
k_c = 6
data_collection.scale_y_train(k_b = k_b, k_c = k_c)

X_train, X_test, y_train = data_collection.train_test()

for f in made_features:
    if f not in ['location', 'type']:
        X_train[f] = X_train[f].map(remap)
        X_test[f] = X_test[f].map(remap)

make_categorical(X_train,made_features)
X_train = X_train.drop('time', axis=1)

make_categorical(X_test,made_features)
X_test = X_test.drop('date_forecast', axis=1)

train_pool = cb.Pool(
    X_train,
    y_train,
    cat_features = made_features
)
test_pool = cb.Pool(
    X_test,
    cat_features = made_features
)

model = cb.CatBoostRegressor(
    iterations = 10000,
    depth = 9,
    learning_rate =0.005,
    loss_function ='MAE',
    cat_features = made_features
)

In [47]:
#train the model
model.fit(train_pool, silent=True)
# make the prediction using the resulting model
preds = model.predict(test_pool)

In [48]:
#scale back
length = int((X_test.shape[0]/3))
pred_a = preds[:length]
pred_b = preds[length:2*length] / k_b
pred_c = preds[2*length:3*length] / k_c
preds = np.concatenate([pred_a,pred_b, pred_c])
#Drop negative values
final_pred_cb = ReLU(preds)

### Combining for final result

In [49]:
final_pred_AutoGluon = pd.DataFrame({'predictions':final_pred_AutoGluon})
final_pred_AutoGluon['predictions'] = final_pred_AutoGluon['predictions'].apply(lambda x: 0 if x < 5 else x)

final_pred_AutoGluon.reset_index()
final_pred_AutoGluon = final_pred_AutoGluon.rename(columns={'index': 'id'})

final_pred_cb = pd.DataFrame({'predictions':final_pred_cb})
final_pred_cb['predictions'] = final_pred_cb['predictions'].apply(lambda x: 0 if x < 5 else x)

final_pred_cb.reset_index()
final_pred_cb = final_pred_cb.rename(columns={'index': 'id'})

In [50]:
final_pred = 0.5*(final_pred_AutoGluon + final_pred_cb)

final_pred.to_csv('Delivered_preds/final_cb_autoG.csv', index=True)
# 

Unnamed: 0,predictions
0,0.000000
1,0.000000
2,0.000000
3,24.018281
4,100.109775
...,...
2155,34.383669
2156,3.974511
2157,4.771526
2158,0.000000
