In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import sklearn
from sklearn import svm, preprocessing
from sklearn.impute import SimpleImputer
import h2o
from h2o.automl import H2OAutoML

### Objective 

Because we consider the revenue a continuous variable, we call this a _regression_ problem. It is common to evaluate regression problems according to some deviation measure of the error (difference) between the predictions and the ground truth values. Typical choices are Mean Squared Error (MSE) and its square root, the Root Mean Squared Error (RMSE).

However, both of these measures are quite sensitive to extreme values and work best if the typical scale of prediction errors are consistent across the dataset. This is not likely to be the case here because the revenue variable varies a lot. This means that a, say 10%, prediction error would matter a lot more if it is for one of the higher-earning stores than for one of the lower-earning ones. Consequently, we will use a variation that takes a log transform of the target variable before computing prediction errors.

**TL;DR**: submissions for this problem will be evaluated according to the `Root Mean Squared Log Error` (RMSLE):

- $\text{RMSLE}(y, \hat{y}) = \sqrt{\frac{1}{n} \sum_{i=1}^{n} (\log(1 + \hat{y}_i) - \log(1 + y_i))^2}$

In the equation above, $y_i$ corresponds to the ground truth value for datapoint $i$, $\hat{y}_i$ corresponds to the predicted value for datapoint $i$, and $n$ denotes the total number of datapoints (dimensionality of $y$, $\hat{y}$). See the cell below for an implementation.



In [23]:
def rmsle(y_true, y_pred):
    """
    Computes the Root Mean Squared Logarithmic Error 
    
    Args:
        y_true (np.array): n-dimensional vector of ground-truth values 
        y_pred (np.array): n-dimensional vecotr of predicted values 
    
    Returns:
        A scalar float with the rmsle value 
    
    Note: You can alternatively use sklearn and just do: 
        `sklearn.metrics.mean_squared_log_error(y_true, y_pred) ** 0.5`
    """
    assert (y_true >= 0).all(), 'Received negative y_true values'
    assert (y_pred >= 0).all(), 'Received negative y_pred values'
    assert y_true.shape == y_pred.shape, 'y_true and y_pred have different shapes'
    y_true_log1p = np.log1p(y_true)  # log(1 + y_true)
    y_pred_log1p = np.log1p(y_pred)  # log(1 + y_pred)
    return np.sqrt(np.mean(np.square(y_pred_log1p - y_true_log1p)))

In [77]:
df1= pd.read_csv('C:/Users/aminp/OneDrive/Dokumenter/NTNU/4år/Machinelearning/machine_learning/data/train_data/dataset_train_1.csv')
df2= pd.read_csv('C:/Users/aminp/OneDrive/Dokumenter/NTNU/4år/Machinelearning/machine_learning/data/train_data/dataset_train_2.csv')


df_train = pd.read_csv('C:/Users/aminp/OneDrive/Dokumenter/NTNU/4år/Machinelearning/machine_learning/data/train_data/dataset_train_without_rev_features.csv')
df_test = pd.read_csv('C:/Users/aminp/OneDrive/Dokumenter/NTNU/4år/Machinelearning/machine_learning/data/train_data/dataset_test_without_rev_features.csv')





In [3]:
h2o.init(ip="127.0.0.1",port="54321")



Checking whether there is an H2O instance running at http://127.0.0.1:54321 . connected.


0,1
H2O_cluster_uptime:,42 secs
H2O_cluster_timezone:,Europe/Berlin
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.38.0.2
H2O_cluster_version_age:,4 days
H2O_cluster_name:,H2O_from_python_aminp_hqvdpk
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,1.753 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,4


In [69]:
train_df = h2o.import_file("C:/Users/aminp/OneDrive/Dokumenter/NTNU/4år/Machinelearning/machine_learning/data/train_data/dataset_train_without_rev_features.csv")
test_df = h2o.import_file("C:/Users/aminp/OneDrive/Dokumenter/NTNU/4år/Machinelearning/machine_learning/data/train_data/dataset_test_without_rev_features.csv")





Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [72]:
train_df2 = train_df.drop(["store_id", "C1"])
train_df3 = train_df2.drop([0], axis = 0)
test_df2 = test_df.drop("C1")
test_df3 = test_df2.drop([0], axis = 0)
test_df3

store_id,grunnkrets_id_population_count,district_name_population_count,municipality_name_population_count,grunnkrets_name_num_kids,grunnkrets_name_num_kids+,grunnkrets_name_num_youths,grunnkrets_name_num_youthAdult,grunnkrets_name_num_adult,grunnkrets_name_num_adults+,grunnkrets_name_num_pensinors,grunnkrets_name_kids_%,grunnkrets_name_kids+_%,grunnkrets_name_youths_%,grunnkrets_name_youthAdult_%,grunnkrets_name_adult_%,grunnkrets_name_adults+_%,grunnkrets_name_pensinors_%,district_name_num_kids,district_name_num_kids+,district_name_num_youths,district_name_num_youthAdult,district_name_num_adult,district_name_num_adults+,district_name_num_pensinors,district_name_kids_%,district_name_kids+_%,district_name_youths_%,district_name_youthAdult_%,district_name_adult_%,district_name_adults+_%,district_name_pensinors_%,municipality_name_num_kids,municipality_name_num_kids+,municipality_name_num_youths,municipality_name_num_youthAdult,municipality_name_num_adult,municipality_name_num_adults+,municipality_name_num_pensinors,municipality_name_kids_%,municipality_name_kids+_%,municipality_name_youths_%,municipality_name_youthAdult_%,municipality_name_adult_%,municipality_name_adults+_%,municipality_name_pensinors_%,grunnkrets_name_couple_children_0_to_5_years,grunnkrets_name_couple_children_18_or_above,grunnkrets_name_couple_children_6_to_17_years,grunnkrets_name_couple_without_children,grunnkrets_name_single_parent_children_0_to_5_years,grunnkrets_name_single_parent_children_18_or_above,grunnkrets_name_single_parent_children_6_to_17_years,grunnkrets_name_singles,grunnkrets_name_%_dist_of_couple_children_0_to_5_years,grunnkrets_name_%_dist_of_couple_children_18_or_above,grunnkrets_name_%_dist_of_couple_children_6_to_17_years,grunnkrets_name_%_dist_of_couple_without_children,grunnkrets_name_%_dist_of_single_parent_children_0_to_5_years,grunnkrets_name_%_dist_of_single_parent_children_18_or_above,grunnkrets_name_%_dist_of_single_parent_children_6_to_17_years,grunnkrets_name_%_dist_of_singles,district_name_couple_children_0_to_5_years,district_name_couple_children_18_or_above,district_name_couple_children_6_to_17_years,district_name_couple_without_children,district_name_single_parent_children_0_to_5_years,district_name_single_parent_children_18_or_above,district_name_single_parent_children_6_to_17_years,district_name_singles,district_name_%_dist_of_couple_children_0_to_5_years,district_name_%_dist_of_couple_children_18_or_above,district_name_%_dist_of_couple_children_6_to_17_years,district_name_%_dist_of_couple_without_children,district_name_%_dist_of_single_parent_children_0_to_5_years,district_name_%_dist_of_single_parent_children_18_or_above,district_name_%_dist_of_single_parent_children_6_to_17_years,district_name_%_dist_of_singles,municipality_name_couple_children_0_to_5_years,municipality_name_couple_children_18_or_above,municipality_name_couple_children_6_to_17_years,municipality_name_couple_without_children,municipality_name_single_parent_children_0_to_5_years,municipality_name_single_parent_children_18_or_above,municipality_name_single_parent_children_6_to_17_years,municipality_name_singles,municipality_name_%_dist_of_couple_children_0_to_5_years,municipality_name_%_dist_of_couple_children_18_or_above,municipality_name_%_dist_of_couple_children_6_to_17_years,municipality_name_%_dist_of_couple_without_children,municipality_name_%_dist_of_single_parent_children_0_to_5_years,municipality_name_%_dist_of_single_parent_children_18_or_above,municipality_name_%_dist_of_single_parent_children_6_to_17_years,municipality_name_%_dist_of_singles,grunnkrets_id_lv1_desc_pop_per_num_stores,grunnkrets_id_lv2_desc_pop_per_num_stores,grunnkrets_id_lv3_desc_pop_per_num_stores,grunnkrets_id_lv4_desc_pop_per_num_stores,district_name_lv1_desc_pop_per_num_stores,district_name_lv2_desc_pop_per_num_stores,district_name_lv3_desc_pop_per_num_stores,district_name_lv4_desc_pop_per_num_stores,municipality_name_lv1_desc_pop_per_num_stores,municipality_name_lv2_desc_pop_per_num_stores,municipality_name_lv3_desc_pop_per_num_stores,municipality_name_lv4_desc_pop_per_num_stores,grunnkrets_name_density,district_name_density,municipality_name_density,is_mall,is_chain,grunnkrets_id_lv1_desc,grunnkrets_id_lv2_desc,grunnkrets_id_lv3_desc,grunnkrets_id_lv4_desc,district_name_lv1_desc,district_name_lv2_desc,district_name_lv3_desc,district_name_lv4_desc,municipality_name_lv1_desc,municipality_name_lv2_desc,municipality_name_lv3_desc,municipality_name_lv4_desc,revenue,number_of_all_stores,number_of_lv1_desc,number_of_lv2_desc,number_of_lv3_desc,number_of_lv4_desc,distance_to_mangler_viktighetsnivÃƒÆ’Ã†â€™Ãƒâ€šÃ‚Â¥,distance_to_standard_holdeplass,distance_to_lokalt_knutepunkt,distance_to_regionalt_knutepunkt,distance_to_annen_viktig_holdeplass,distance_to_nasjonalt_knutepunkt,number_of_all_stop_types,number_of_mangler_viktighetsnivÃƒÆ’Ã†â€™Ãƒâ€šÃ‚Â¥,number_of_standard_holdeplass,number_of_lokalt_knutepunkt,number_of_regionalt_knutepunkt,number_of_annen_viktig_holdeplass,number_of_nasjonalt_knutepunkt
914206820-914239427-717245,970.0,5836,665566,111.0,75.0,38.0,97.0,171.0,387.0,91.0,0.114433,0.0773196,0.0391753,0.1,0.176289,0.398969,0.0938144,564,357,211,512,1403,2212,577,0.0966415,0.061172,0.0361549,0.0877313,0.240404,0.379027,0.0988691,59059,41460,29076,71760,145667,214521,104023,0.088735,0.0622928,0.0436861,0.107818,0.218862,0.322314,0.156293,251,47,140,166,26,20,68,293,0.248269,0.0464886,0.138477,0.164194,0.0257171,0.0197824,0.0672601,0.289812,1184,182,684,1104,167,106,312,2062,0.204103,0.0313739,0.117911,0.190312,0.0287881,0.0182727,0.0537838,0.355456,131236,32765,101503,139078,13225,15179,26785,191856,0.201397,0.0502818,0.155769,0.213432,0.0202954,0.023294,0.0411048,0.294426,970.0,970.0,970.0,970.0,1167.2,1167.2,5836,5836,1770.12,2429.07,95080.9,95080.9,17009.4,11025.2,5228.49,False,False,1,1,1,1,5,5,1,1,376,274,7,7,,1064,364,265,5,5,0.00023121,0.0301654,0.0102815,0.0138859,0.159301,0.0264716,1168,1054,27,64,13,0,10
916789157-916823770-824309,,6668,52837,,,,,,,,,,,,,,,546,469,453,638,772,2133,1657,0.0818836,0.0703359,0.0679364,0.0956809,0.115777,0.319886,0.2485,4109,3656,3333,5432,6570,17469,12268,0.0777675,0.0691939,0.0630808,0.102807,0.124345,0.330621,0.232186,0,0,4,4,0,0,0,4,0.0,0.0,0.333333,0.333333,0.0,0.0,0.0,0.333333,1137,520,1229,1570,211,207,394,1150,0.177158,0.0810221,0.191493,0.244624,0.0328763,0.032253,0.0613898,0.179184,8681,4021,10454,12704,1206,1420,2643,10704,0.16748,0.0775761,0.201686,0.245095,0.023267,0.0273957,0.0509907,0.206509,,,,,3334.0,3334.0,6668,6668,4403.08,6604.62,52837.0,52837.0,,2151.98,687.982,False,True,1,1,1,1,2,2,1,1,12,8,1,1,,114,26,17,0,0,0.0224221,0.00101368,0.0193961,0.0192575,0.135651,0.489463,380,83,287,3,7,0,0
913341082-977479363-2948,2421.0,32673,665566,219.0,199.0,155.0,217.0,381.0,840.0,410.0,0.0904585,0.0821974,0.0640231,0.0896324,0.157373,0.346964,0.169352,2902,2657,2383,3329,4267,10770,6365,0.0888195,0.081321,0.0729348,0.101888,0.130597,0.32963,0.194809,59059,41460,29076,71760,145667,214521,104023,0.088735,0.0622928,0.0436861,0.107818,0.218862,0.322314,0.156293,403,111,412,366,141,59,155,788,0.165503,0.0455852,0.169199,0.150308,0.0579055,0.02423,0.063655,0.323614,6726,2770,7474,5654,716,949,1491,6630,0.207529,0.0854674,0.230608,0.174452,0.0220919,0.0292811,0.0460043,0.204566,131236,32765,101503,139078,13225,15179,26785,191856,0.201397,0.0502818,0.155769,0.213432,0.0202954,0.023294,0.0411048,0.294426,2421.0,2421.0,2421.0,2421.0,16336.5,16336.5,32673,32673,1770.12,2429.07,95080.9,95080.9,10231.3,3015.19,5228.49,True,True,1,1,1,1,2,2,1,1,376,274,7,7,,166,35,30,0,0,0.0036903,0.0174625,0.0126024,0.0783459,0.024432,0.171475,564,522,11,20,2,9,0
889682582-889697172-28720,149.0,9002,186644,15.0,18.0,7.0,16.0,20.0,57.0,16.0,0.100671,0.120805,0.0469799,0.107383,0.134228,0.38255,0.107383,754,570,464,1135,1342,2818,1919,0.0837592,0.0633193,0.0515441,0.126083,0.149078,0.313042,0.213175,15603,12596,9798,24475,32303,58362,33507,0.0835977,0.0674868,0.0524957,0.131132,0.173073,0.312692,0.179524,19,11,56,30,6,9,6,15,0.125,0.0723684,0.368421,0.197368,0.0394737,0.0592105,0.0394737,0.0986842,1758,569,1741,2406,96,206,303,2215,0.189154,0.0612223,0.187325,0.258877,0.0103292,0.0221648,0.0326017,0.238326,36426,12442,36372,46664,1792,4369,7101,46358,0.19019,0.0649631,0.189908,0.243646,0.00935653,0.0228118,0.0370763,0.242048,149.0,149.0,149.0,149.0,1800.4,1800.4,9002,9002,2170.28,2522.22,93322.0,93322.0,151.51,2857.58,1498.72,False,True,1,1,1,1,5,5,1,1,86,74,2,2,,250,73,63,1,1,0.00288747,0.00334835,0.0191456,0.746042,0.0336374,0.409014,651,237,395,15,0,4,0
997991699-998006945-417222,4921.0,13712,665566,693.0,379.0,206.0,338.0,1056.0,1696.0,553.0,0.140825,0.0770169,0.0418614,0.0686852,0.214591,0.344645,0.112376,1785,937,463,1208,3363,4589,1367,0.130178,0.0683343,0.033766,0.088098,0.24526,0.33467,0.0996937,59059,41460,29076,71760,145667,214521,104023,0.088735,0.0622928,0.0436861,0.107818,0.218862,0.322314,0.156293,1567,155,821,878,111,101,213,1041,0.320647,0.0317168,0.167997,0.17966,0.0227133,0.0206671,0.043585,0.213014,3971,410,1864,2772,364,240,536,3515,0.290448,0.0299883,0.136337,0.20275,0.0266238,0.0175541,0.0392042,0.257095,131236,32765,101503,139078,13225,15179,26785,191856,0.201397,0.0502818,0.155769,0.213432,0.0202954,0.023294,0.0411048,0.294426,1230.25,1640.33,4921.0,4921.0,1714.0,1958.86,13712,13712,1770.12,2429.07,95080.9,95080.9,10947.7,6126.05,5228.49,False,True,4,3,1,1,8,7,1,1,376,274,7,7,,1052,361,262,5,5,0.000919385,0.0229259,0.00798231,0.0173485,0.155829,0.0262471,1156,1042,27,64,13,0,10
914931487-815162862-756427,1030.0,11663,665566,353.0,303.0,280.0,379.0,409.0,1465.0,832.0,0.0877891,0.0753544,0.0696344,0.0942552,0.101716,0.364337,0.206914,1047,948,747,985,1231,4021,2684,0.0897711,0.0812827,0.0640487,0.0844551,0.105547,0.344765,0.230129,59059,41460,29076,71760,145667,214521,104023,0.088735,0.0622928,0.0436861,0.107818,0.218862,0.322314,0.156293,828,308,961,856,50,124,176,644,0.20978,0.0780339,0.243476,0.216874,0.0126678,0.0314163,0.0445908,0.163162,2473,918,2759,2536,117,266,475,1990,0.21441,0.0795908,0.239206,0.219872,0.0101439,0.0230623,0.0411826,0.172533,131236,32765,101503,139078,13225,15179,26785,191856,0.201397,0.0502818,0.155769,0.213432,0.0202954,0.023294,0.0411048,0.294426,515.0,1030.0,1030.0,1030.0,2332.6,2915.75,11663,11663,1770.12,2429.07,95080.9,95080.9,1493.66,3485.39,5228.49,False,True,2,1,1,1,5,4,1,1,376,274,7,7,,878,321,240,5,5,0.000333413,0.014301,0.0151867,0.0157964,0.142597,0.0637972,1059,951,26,59,13,0,10
967062979-972338656-6209,389.0,9591,186644,14.0,9.0,1.0,75.0,109.0,52.0,129.0,0.0359897,0.0231362,0.00257069,0.192802,0.280206,0.133676,0.33162,344,260,168,1723,2287,2368,2441,0.035867,0.0271087,0.0175164,0.179648,0.238453,0.246898,0.254509,15603,12596,9798,24475,32303,58362,33507,0.0835977,0.0674868,0.0524957,0.131132,0.173073,0.312692,0.179524,24,0,14,132,7,7,5,215,0.0594059,0.0,0.0346535,0.326733,0.0173267,0.0173267,0.0123762,0.532178,705,251,634,3154,96,173,229,4285,0.0740002,0.0263462,0.0665477,0.331059,0.0100766,0.0181589,0.0240369,0.449774,36426,12442,36372,46664,1792,4369,7101,46358,0.19019,0.0649631,0.189908,0.243646,0.00935653,0.0228118,0.0370763,0.242048,97.25,97.25,389.0,389.0,162.559,217.977,9591,9591,2170.28,2522.22,93322.0,93322.0,2702.41,3687.89,1498.72,True,True,4,4,1,1,59,44,1,1,86,74,2,2,,293,84,72,1,1,0.00171796,0.00175988,0.00406824,0.705444,0.00175988,0.393912,791,253,508,23,0,7,0
914631734-914748119-740036,1618.0,14219,665566,68.0,33.0,14.0,238.0,517.0,387.0,361.0,0.0420272,0.0203956,0.00865266,0.147095,0.31953,0.239184,0.223115,789,394,176,1972,4525,3961,2402,0.0554891,0.0277094,0.0123778,0.138688,0.318236,0.278571,0.168929,59059,41460,29076,71760,145667,214521,104023,0.088735,0.0622928,0.0436861,0.107818,0.218862,0.322314,0.156293,139,31,51,424,24,38,34,688,0.0972708,0.0216935,0.0356893,0.296711,0.016795,0.026592,0.0237929,0.481456,1712,314,788,3646,151,288,394,6127,0.127571,0.0233979,0.0587183,0.271684,0.0112519,0.0214605,0.0293592,0.456557,131236,32765,101503,139078,13225,15179,26785,191856,0.201397,0.0502818,0.155769,0.213432,0.0202954,0.023294,0.0411048,0.294426,323.6,809.0,1618.0,1618.0,1093.77,2031.29,14219,14219,1770.12,2429.07,95080.9,95080.9,8444.12,18412.5,5228.49,False,False,5,2,1,1,13,7,1,1,376,274,7,7,,1036,350,255,4,4,0.000673881,0.0410906,0.000396971,0.0226233,0.100008,0.0424961,934,827,19,66,11,0,11
970976361-973961837-23171,413.0,6302,27412,35.0,26.0,37.0,56.0,50.0,133.0,76.0,0.0847458,0.062954,0.0895884,0.135593,0.121065,0.322034,0.184019,418,324,292,808,927,1863,1670,0.0663281,0.0514123,0.0463345,0.128213,0.147096,0.29562,0.264995,1915,1809,1617,3256,3371,8806,6638,0.0698599,0.065993,0.0589888,0.11878,0.122975,0.321246,0.242157,79,25,80,100,12,6,18,102,0.187204,0.0592417,0.189573,0.236967,0.028436,0.014218,0.042654,0.241706,871,314,786,1578,153,176,259,1994,0.142065,0.0512151,0.128201,0.257381,0.0249551,0.0287066,0.0422443,0.325232,4319,1950,5574,7272,223,736,1138,6222,0.157432,0.0710797,0.203179,0.265073,0.0081286,0.026828,0.0414814,0.226799,413.0,413.0,413.0,413.0,350.111,525.167,6302,6302,1612.47,2492.0,27412.0,27412.0,1043.54,1823.41,441.039,True,True,1,1,1,1,18,12,1,1,17,11,1,1,,61,16,10,0,0,0.00234319,0.0549357,0.018311,0.0139766,0.238108,0.0135532,446,401,1,31,4,0,9
979425031-979584385-54031,711.0,6078,46394,21.0,13.0,10.0,131.0,260.0,184.0,92.0,0.0295359,0.0182841,0.0140647,0.184248,0.365682,0.25879,0.129395,283,210,178,858,1402,1827,1320,0.0465614,0.0345508,0.0292859,0.141165,0.230668,0.300592,0.217177,3818,3387,2863,5118,6818,15025,9365,0.0822951,0.0730051,0.0617106,0.110316,0.146959,0.323857,0.201858,60,17,30,182,4,16,14,353,0.0887574,0.0251479,0.0443787,0.269231,0.00591716,0.0236686,0.0207101,0.522189,657,241,573,1516,70,181,187,2457,0.111697,0.0409725,0.0974158,0.257735,0.0119007,0.0307718,0.0317919,0.417715,8992,3283,9786,10222,563,1169,1910,9540,0.197779,0.0722094,0.215242,0.224832,0.0123832,0.0257121,0.0420103,0.209832,355.5,355.5,711.0,711.0,405.2,467.538,6078,6078,2017.13,2319.7,46394.0,46394.0,5890.9,3095.01,1586.97,True,True,2,2,1,1,15,13,1,1,23,20,1,1,,67,20,17,0,0,0.032654,0.00147076,0.0399071,0.00629131,0.00349376,0.0968406,117,12,100,2,1,1,1


In [75]:
x = train_df3.columns 
y = "revenue"
x.remove(y)

aml = H2OAutoML(max_models=5, seed=1)
aml.train(x=x, y=y, training_frame= train_df3)

AutoML progress: |
18:40:05.712: AutoML: XGBoost is not available; skipping it.

███████████████████████████████████████████████████████████████| (done) 100%


Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
mae,6.39328,0.2645161,6.7912216,6.140243,6.4968615,6.358165,6.179908
mean_residual_deviance,159.75868,19.25999,179.18979,151.48695,177.15561,158.27531,132.68571
mse,159.75868,19.25999,179.18979,151.48695,177.15561,158.27531,132.68571
null_deviance,608161.3,76780.42,668847.06,560902.75,677428.5,635544.25,498083.9
r2,0.3234132,0.0272486,0.2971203,0.3012385,0.3143674,0.357229,0.3471108
residual_deviance,411872.34,59336.723,469835.66,391745.28,464324.88,408508.6,324947.3
rmse,12.620769,0.7704452,13.386179,12.308003,13.309981,12.580752,11.518929
rmsle,0.9209656,0.0084209,0.9345847,0.9149978,0.9235773,0.9170715,0.9145967


In [35]:

lb = h2o.automl.get_leaderboard(aml, extra_columns = "ALL")
best_model = aml.get_best_model()

lb

model_id,rmse,mse,mae,rmsle,mean_residual_deviance,algo
StackedEnsemble_AllModels_1_AutoML_3_20221101_174740,12.6435,159.858,6.40098,0.920381,159.858,StackedEnsemble
StackedEnsemble_BestOfFamily_1_AutoML_3_20221101_174740,12.6474,159.956,6.44071,0.930628,159.956,StackedEnsemble
GBM_1_AutoML_3_20221101_174740,12.6581,160.227,6.46545,0.938588,160.227,GBM
GBM_2_AutoML_3_20221101_174740,12.7978,163.784,6.4559,0.926581,163.784,GBM
GBM_3_AutoML_3_20221101_174740,12.8612,165.41,6.48997,0.933386,165.41,GBM
DRF_1_AutoML_3_20221101_174740,13.2599,175.825,6.78495,0.95253,175.825,DRF
GLM_1_AutoML_3_20221101_174740,15.3728,236.322,8.33833,1.20441,236.322,GLM


In [36]:
m = h2o.get_model(lb[4,"model_id"])
m.varimp(use_pandas=True)

Unnamed: 0,variable,relative_importance,scaled_importance,percentage
0,is_chain,2.515641e+06,1.000000,0.272489
1,municipality_name_lv3_desc_pop_per_num_stores,3.968751e+05,0.157763,0.042989
2,municipality_name_lv1_desc_pop_per_num_stores,3.172685e+05,0.126118,0.034366
3,number_of_lv4_desc,2.941659e+05,0.116935,0.031863
4,municipality_name_lv4_desc_pop_per_num_stores,2.667421e+05,0.106033,0.028893
...,...,...,...,...
135,municipality_name_singles,5.822728e+02,0.000231,0.000063
136,municipality_name_single_parent_children_0_to_...,1.542499e+02,0.000061,0.000017
137,municipality_name_num_kids+,0.000000e+00,0.000000,0.000000
138,municipality_name_num_adult,0.000000e+00,0.000000,0.000000


In [74]:
prediction = aml.leader.predict(test_df3)
df = test_df3.cbind(prediction)
df = df.as_data_frame(use_pandas=True)
result = df.loc[:,("store_id", 'predict')]
submission = result.rename(columns = {"store_id": "id",  "predict" : "predicted"})
submission.to_csv("StackedEnsemble_AllModels_1_AutoML_3_20221101_174740.csv", index = False)

submission

stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%


Unnamed: 0,id,predicted
0,914206820-914239427-717245,5.152239
1,916789157-916823770-824309,10.660138
2,913341082-977479363-2948,3.633863
3,889682582-889697172-28720,12.778327
4,997991699-998006945-417222,7.958262
...,...,...
8572,917323003-917383529-844309,17.766906
8573,917353379-917411824-845904,3.734782
8574,917072302-917089248-833647,12.619422
8575,916960557-916993161-829908,6.314860
