In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os


In [24]:
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 200)

In [12]:
#REDUCE MEMORY USAGE
## Function to reduce the DF size
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [4]:
train_df = pd.read_csv(r"E:\Data Science\Kaggle Competitions dataset/train.csv")
test_df = pd.read_csv("E:\Data Science\Kaggle Competitions dataset/test.csv")
building_metadata_df = pd.read_csv("E:\Data Science\Kaggle Competitions dataset/building_metadata.csv")
weather_test_df = pd.read_csv("E:\Data Science\Kaggle Competitions dataset/weather_test.csv")
weather_train_df = pd.read_csv("E:\Data Science\Kaggle Competitions dataset/weather_train.csv")

In [5]:
train_df = reduce_mem_usage(train_df)
test_df = reduce_mem_usage(test_df)
building_metadata_df = reduce_mem_usage(building_metadata_df)
weather_test_df = reduce_mem_usage(weather_test_df)
weather_train_df = reduce_mem_usage(weather_train_df)

Mem. usage decreased to 289.19 Mb (53.1% reduction)
Mem. usage decreased to 596.49 Mb (53.1% reduction)
Mem. usage decreased to  0.03 Mb (60.3% reduction)
Mem. usage decreased to  6.08 Mb (68.1% reduction)
Mem. usage decreased to  3.07 Mb (68.1% reduction)


In [6]:
train_df.head()

Unnamed: 0,building_id,meter,timestamp,meter_reading
0,0,0,2016-01-01 00:00:00,0.0
1,1,0,2016-01-01 00:00:00,0.0
2,2,0,2016-01-01 00:00:00,0.0
3,3,0,2016-01-01 00:00:00,0.0
4,4,0,2016-01-01 00:00:00,0.0


In [7]:
type(train_df["timestamp"][0])

str

In [8]:
weather_train_df.head()

Unnamed: 0,site_id,timestamp,air_temperature,cloud_coverage,dew_temperature,precip_depth_1_hr,sea_level_pressure,wind_direction,wind_speed
0,0,2016-01-01 00:00:00,25.0,6.0,20.0,,1019.5,0.0,0.0
1,0,2016-01-01 01:00:00,24.40625,,21.09375,-1.0,1020.0,70.0,1.5
2,0,2016-01-01 02:00:00,22.796875,2.0,21.09375,0.0,1020.0,0.0,0.0
3,0,2016-01-01 03:00:00,21.09375,2.0,20.59375,0.0,1020.0,0.0,0.0
4,0,2016-01-01 04:00:00,20.0,2.0,20.0,-1.0,1020.0,250.0,2.599609


In [9]:
building_metadata_df.head()

Unnamed: 0,site_id,building_id,primary_use,square_feet,year_built,floor_count
0,0,0,Education,7432,2008.0,
1,0,1,Education,2720,2004.0,
2,0,2,Education,5376,1991.0,
3,0,3,Education,23685,2002.0,
4,0,4,Education,116607,1975.0,


In [10]:
max(train_df["meter_reading"])

21904700.0

In [11]:
train_combined = pd.merge(train_df,building_metadata_df,on='building_id',how='left')
test_combined  = pd.merge(test_df,building_metadata_df,on='building_id',how='left')


In [12]:
train_combined = pd.merge(train_combined,weather_train_df,on=['site_id','timestamp'],how='left')
test_combined  = pd.merge(test_combined,weather_test_df,on=['site_id','timestamp'],how='left')

In [13]:
temp1 = pd.to_datetime(test_combined["timestamp"])
day = temp1.dt.day
day_of_week = temp1.dt.dayofweek
month = temp1.dt.month
year = temp1.dt.year
hour = temp1.dt.hour
minute = temp1.dt.minute
second = temp1.dt.second

In [14]:
test_combined["day"] = day
test_combined["day_of_week"] = day_of_week
test_combined["month"] = month
test_combined["year"] = year
test_combined["hour"] = hour
#test_combined["minute"] = minute
#test_combined["second"] = second

In [15]:
test_combined.head()

Unnamed: 0,row_id,building_id,meter,timestamp,site_id,primary_use,square_feet,year_built,floor_count,air_temperature,cloud_coverage,dew_temperature,precip_depth_1_hr,sea_level_pressure,wind_direction,wind_speed,day,day_of_week,month,year,hour
0,0,0,0,2017-01-01 00:00:00,0,Education,7432,2008.0,,17.796875,4.0,11.703125,,1021.5,100.0,3.599609,1,6,1,2017,0
1,1,1,0,2017-01-01 00:00:00,0,Education,2720,2004.0,,17.796875,4.0,11.703125,,1021.5,100.0,3.599609,1,6,1,2017,0
2,2,2,0,2017-01-01 00:00:00,0,Education,5376,1991.0,,17.796875,4.0,11.703125,,1021.5,100.0,3.599609,1,6,1,2017,0
3,3,3,0,2017-01-01 00:00:00,0,Education,23685,2002.0,,17.796875,4.0,11.703125,,1021.5,100.0,3.599609,1,6,1,2017,0
4,4,4,0,2017-01-01 00:00:00,0,Education,116607,1975.0,,17.796875,4.0,11.703125,,1021.5,100.0,3.599609,1,6,1,2017,0


In [16]:
submission = pd.DataFrame(test_combined["row_id"])

In [17]:
train_combined.head()

Unnamed: 0,building_id,meter,timestamp,meter_reading,site_id,primary_use,square_feet,year_built,floor_count,air_temperature,cloud_coverage,dew_temperature,precip_depth_1_hr,sea_level_pressure,wind_direction,wind_speed
0,0,0,2016-01-01 00:00:00,0.0,0,Education,7432,2008.0,,25.0,6.0,20.0,,1019.5,0.0,0.0
1,1,0,2016-01-01 00:00:00,0.0,0,Education,2720,2004.0,,25.0,6.0,20.0,,1019.5,0.0,0.0
2,2,0,2016-01-01 00:00:00,0.0,0,Education,5376,1991.0,,25.0,6.0,20.0,,1019.5,0.0,0.0
3,3,0,2016-01-01 00:00:00,0.0,0,Education,23685,2002.0,,25.0,6.0,20.0,,1019.5,0.0,0.0
4,4,0,2016-01-01 00:00:00,0.0,0,Education,116607,1975.0,,25.0,6.0,20.0,,1019.5,0.0,0.0


In [18]:
temp = pd.to_datetime(train_combined['timestamp'])

In [19]:
temp_test = pd.to_datetime(test_combined['timestamp'])

In [20]:
#temp1 = []
#for i in temp:
#    ts = pd.Timestamp(i)
#    ts.to_pydatetime()
#    temp1.append(ts)
#
#
#train_combined['timestamp'] = temp1

In [21]:
train_combined.shape

(20216100, 16)

In [22]:
del train_df
del test_df 
del building_metadata_df
del weather_test_df
del weather_train_df 


In [23]:
import gc 
gc.collect()

0

In [24]:
day = temp.dt.day
day_of_week = temp.dt.dayofweek
month = temp.dt.month
year = temp.dt.year
hour = temp.dt.hour
#minute = temp.dt.minute
#second = temp.dt.second


In [25]:
day_test = temp_test.dt.day
day_of_week_test = temp_test.dt.dayofweek
month_test = temp_test.dt.month
year_test = temp_test.dt.year
hour_test = temp_test.dt.hour

In [26]:
train_combined["day"] = day
train_combined["day_of_week"] = day_of_week
train_combined["month"] = month
train_combined["year"] = year
train_combined["hour"] = hour
#train_combined["minute"] = minute
#train_combined["second"] = second

In [27]:
test_combined["day"] = day_test
test_combined["day_of_week"] = day_of_week_test
test_combined["month"] = month_test
test_combined["year"] = year_test
test_combined["hour"] = hour_test

In [28]:
del day
del day_of_week
del month
del year
del hour
del minute
del second
del day_test
del day_of_week_test
del month_test
del year_test
del hour_test
gc.collect()

0

In [29]:
train_combined.shape

(20216100, 21)

In [30]:
train_combined = train_combined.drop("timestamp",axis = 1)

In [31]:
test_combined = test_combined.drop(["timestamp", "row_id"],axis = 1)

In [32]:
test_combined.head()

Unnamed: 0,building_id,meter,site_id,primary_use,square_feet,year_built,floor_count,air_temperature,cloud_coverage,dew_temperature,precip_depth_1_hr,sea_level_pressure,wind_direction,wind_speed,day,day_of_week,month,year,hour
0,0,0,0,Education,7432,2008.0,,17.796875,4.0,11.703125,,1021.5,100.0,3.599609,1,6,1,2017,0
1,1,0,0,Education,2720,2004.0,,17.796875,4.0,11.703125,,1021.5,100.0,3.599609,1,6,1,2017,0
2,2,0,0,Education,5376,1991.0,,17.796875,4.0,11.703125,,1021.5,100.0,3.599609,1,6,1,2017,0
3,3,0,0,Education,23685,2002.0,,17.796875,4.0,11.703125,,1021.5,100.0,3.599609,1,6,1,2017,0
4,4,0,0,Education,116607,1975.0,,17.796875,4.0,11.703125,,1021.5,100.0,3.599609,1,6,1,2017,0


In [33]:
temp = train_combined.isnull().any()
for count,i in enumerate(temp):
    if i== True:
        print(train_combined.columns.values[count])
        print((train_combined[train_combined.columns.values[count]].isnull().sum())/train_combined.shape[0])

year_built
0.5999003269671203
floor_count
0.826527718006935
air_temperature
0.004781238715677109
cloud_coverage
0.43655131306236117
dew_temperature
0.00495347767373529
precip_depth_1_hr
0.18544739094088375
sea_level_pressure
0.06092515371411895
wind_direction
0.0716779200736047
wind_speed
0.007107008770237583


In [34]:
temp_test = test_combined.isnull().any()
for count,i in enumerate(temp_test):
    if i == True:
        print(test_combined.columns.values[count])
        print((test_combined[test_combined.columns.values[count]].isnull().sum())/test_combined.shape[0])

year_built
0.5899159663865546
floor_count
0.8260504201680672
air_temperature
0.005321673189823874
cloud_coverage
0.46866438356164386
dew_temperature
0.006254532634971797
precip_depth_1_hr
0.18709860999194197
sea_level_pressure
0.06035901346840106
wind_direction
0.07143487874601896
wind_speed
0.007244757492037911


In [35]:
test_combined.shape

(41697600, 19)

In [36]:
train_combined.head()

Unnamed: 0,building_id,meter,meter_reading,site_id,primary_use,square_feet,year_built,floor_count,air_temperature,cloud_coverage,dew_temperature,precip_depth_1_hr,sea_level_pressure,wind_direction,wind_speed,day,day_of_week,month,year,hour
0,0,0,0.0,0,Education,7432,2008.0,,25.0,6.0,20.0,,1019.5,0.0,0.0,1,4,1,2016,0
1,1,0,0.0,0,Education,2720,2004.0,,25.0,6.0,20.0,,1019.5,0.0,0.0,1,4,1,2016,0
2,2,0,0.0,0,Education,5376,1991.0,,25.0,6.0,20.0,,1019.5,0.0,0.0,1,4,1,2016,0
3,3,0,0.0,0,Education,23685,2002.0,,25.0,6.0,20.0,,1019.5,0.0,0.0,1,4,1,2016,0
4,4,0,0.0,0,Education,116607,1975.0,,25.0,6.0,20.0,,1019.5,0.0,0.0,1,4,1,2016,0


In [37]:
train_combined.shape

(20216100, 20)

In [38]:
train_combined = train_combined.drop("floor_count",axis = 1)
test_combined = test_combined.drop("floor_count",axis = 1)


In [39]:
# NOT SURE IF WE SHOULD DROP THIS AS IT CAN BE USEFUL
test_combined = test_combined.drop("building_id",axis = 1)
train_combined = train_combined.drop("building_id",axis = 1)

In [40]:
y = train_combined["meter_reading"]
train_combined = train_combined.drop("meter_reading",axis = 1)

In [41]:
 temp = train_combined["year_built"].mode()

In [42]:
#Filling up the null values of train

train_combined["year_built"] = train_combined["year_built"].fillna(int(temp))
train_combined["air_temperature"] = train_combined["air_temperature"].astype(float)
train_combined["air_temperature"] = train_combined["air_temperature"].fillna(int(train_combined["air_temperature"].mean()))
train_combined["cloud_coverage"] = train_combined["cloud_coverage"].fillna(int(train_combined["air_temperature"].mode()))
train_combined["dew_temperature"] = train_combined["dew_temperature"].astype(float)
train_combined["dew_temperature"] = train_combined["dew_temperature"].fillna(int(train_combined["dew_temperature"].mean()))
train_combined["precip_depth_1_hr"] = train_combined["precip_depth_1_hr"].astype(float)
train_combined["precip_depth_1_hr"] = train_combined["precip_depth_1_hr"].fillna(int(train_combined["precip_depth_1_hr"].mode()))
train_combined["sea_level_pressure"] = train_combined["sea_level_pressure"].astype(float)
train_combined["sea_level_pressure"] = train_combined["sea_level_pressure"].fillna(int(train_combined["sea_level_pressure"].mean()))
train_combined["wind_direction"] = train_combined["wind_direction"].astype(float)
train_combined["wind_direction"] = train_combined["wind_direction"].fillna(int(train_combined["wind_direction"].mean()))
train_combined["wind_speed"] = train_combined["wind_speed"].astype(float)
train_combined["wind_speed"] = train_combined["wind_speed"].fillna(int(train_combined["wind_speed"].mean()))


In [43]:
test_combined["year_built"] = test_combined["year_built"].fillna(int(temp))
test_combined["air_temperature"] = test_combined["air_temperature"].astype(float)
test_combined["air_temperature"] = test_combined["air_temperature"].fillna(int(train_combined["air_temperature"].mean()))
test_combined["cloud_coverage"] = test_combined["cloud_coverage"].fillna(int(train_combined["air_temperature"].mode()))
test_combined["dew_temperature"] = test_combined["dew_temperature"].astype(float)
test_combined["dew_temperature"] = test_combined["dew_temperature"].fillna(int(train_combined["dew_temperature"].mean()))
test_combined["precip_depth_1_hr"] = test_combined["precip_depth_1_hr"].astype(float)
test_combined["precip_depth_1_hr"] = test_combined["precip_depth_1_hr"].fillna(int(train_combined["precip_depth_1_hr"].mode()))
test_combined["sea_level_pressure"] = test_combined["sea_level_pressure"].astype(float)
test_combined["sea_level_pressure"] = test_combined["sea_level_pressure"].fillna(int(train_combined["sea_level_pressure"].mean()))
test_combined["wind_direction"] = test_combined["wind_direction"].astype(float)
test_combined["wind_direction"] = test_combined["wind_direction"].fillna(int(train_combined["wind_direction"].mean()))
test_combined["wind_speed"] = test_combined["wind_speed"].astype(float)
test_combined["wind_speed"] = test_combined["wind_speed"].fillna(int(train_combined["wind_speed"].mean()))


In [44]:
print(train_combined.isnull().any().any())
print(test_combined.isnull().any().any())

False
False


In [45]:
train_combined.head()

Unnamed: 0,meter,site_id,primary_use,square_feet,year_built,air_temperature,cloud_coverage,dew_temperature,precip_depth_1_hr,sea_level_pressure,wind_direction,wind_speed,day,day_of_week,month,year,hour
0,0,0,Education,7432,2008.0,25.0,6.0,20.0,0.0,1019.5,0.0,0.0,1,4,1,2016,0
1,0,0,Education,2720,2004.0,25.0,6.0,20.0,0.0,1019.5,0.0,0.0,1,4,1,2016,0
2,0,0,Education,5376,1991.0,25.0,6.0,20.0,0.0,1019.5,0.0,0.0,1,4,1,2016,0
3,0,0,Education,23685,2002.0,25.0,6.0,20.0,0.0,1019.5,0.0,0.0,1,4,1,2016,0
4,0,0,Education,116607,1975.0,25.0,6.0,20.0,0.0,1019.5,0.0,0.0,1,4,1,2016,0


In [46]:
for i in train_combined.columns:
    print(i)
    print(train_combined[i].nunique())

meter
4
site_id
16
primary_use
16
square_feet
1397
year_built
116
air_temperature
619
cloud_coverage
11
dew_temperature
522
precip_depth_1_hr
128
sea_level_pressure
133
wind_direction
44
wind_speed
58
day
31
day_of_week
7
month
12
year
1
hour
24


In [47]:
train_combined["cloud_coverage"].unique()

array([ 6., 15.,  8.,  4.,  0.,  2.,  7.,  5.,  3.,  9.,  1.])

In [48]:
# Here we are treating time as a category as the values themselves do not have any meaning
# although we can also treat them as a numeric variable
# I should try that too and maybe compare the performance of both in both tree and non tree based models.


# THIS IS VERRRRY MEMORY INTENSIVE
train_combined["meter"] = train_combined["meter"].astype("category")
train_combined["site_id"] = train_combined["site_id"].astype("category")
train_combined["primary_use"] = train_combined["primary_use"].astype("category")
train_combined["day_of_week"] = train_combined["day_of_week"].astype("category")
train_combined["month"] = train_combined["month"].astype("category")
train_combined["hour"] = train_combined["hour"].astype("category")
train_combined["day"] = train_combined["day"].astype("category")



In [49]:
#This is for the test dataset
test_combined["meter"] = test_combined["meter"].astype("category")
test_combined["site_id"] = test_combined["site_id"].astype("category")
test_combined["primary_use"] = test_combined["primary_use"].astype("category")
test_combined["day_of_week"] = test_combined["day_of_week"].astype("category")
test_combined["month"] = test_combined["month"].astype("category")
test_combined["hour"] = test_combined["hour"].astype("category")
test_combined["day"] = test_combined["day"].astype("category")


In [50]:
train_combined = reduce_mem_usage(train_combined)

Mem. usage decreased to 713.35 Mb (53.2% reduction)


In [51]:
test_combined = reduce_mem_usage(test_combined)

Mem. usage decreased to 1471.34 Mb (53.2% reduction)


In [52]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

cat_cols = ["meter","site_id","primary_use","day_of_week","month","hour","day"]
label_encoders = {}
train_combined_cat = pd.DataFrame()
for col in cat_cols:
    print("Encoding {}".format(col))
    new_le = LabelEncoder()
    temp = list(new_le.fit_transform(train_combined[col]))
    train_combined_cat[col] = temp
    label_encoders[col] = new_le

Encoding meter
Encoding site_id
Encoding primary_use
Encoding day_of_week
Encoding month
Encoding hour
Encoding day


In [53]:
train_combined = train_combined.drop(cat_cols,axis = 1)

In [54]:
cat_columns_idx = [train_combined_cat.columns.get_loc(col) for col in cat_cols]

ohe = OneHotEncoder(categorical_features=cat_columns_idx, sparse=False, handle_unknown="ignore")

train_combined_cat_final = ohe.fit_transform(train_combined_cat)




In [55]:
del train_combined_cat
gc.collect()

20

In [56]:
temp = pd.DataFrame(train_combined_cat_final)

In [57]:
print(temp.shape)
print(train_combined.shape)

(20216100, 110)
(20216100, 10)


In [58]:
final_train = pd.concat([train_combined,temp],axis = 1) # too expensive find alternative

In [59]:
del train_combined
del temp
gc.collect()

0

In [60]:
final_train.shape

(20216100, 120)

In [61]:
#final_train = reduce_mem_usage(final_train) # too expensive dont run

In [62]:
#final_train.to_csv(r"E:\Data Science\Kaggle Competitions dataset\Ashrae\final_train_NN.csv")

In [65]:
del final_train
del train_combined_cat_final
gc.collect()


8

In [80]:
temp_test = test_combined.drop(cat_cols, axis = 1)

In [81]:
test_enc = pd.DataFrame()

In [None]:
#del test_combined
#gc.collect()

In [82]:
for col in cat_cols:
    print("Encoding {}".format(col))
    label_map = {val: label for label, val in enumerate(label_encoders[col].classes_)}
    print(label_map)
    test_enc[col] = test_combined[col].map(label_map)
    # fillna and convert to int
    #temp_test[col] = temp_test[col].fillna(9999).astype("category")

Encoding meter
{0: 0, 1: 1, 2: 2, 3: 3}
Encoding site_id
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15}
Encoding primary_use
{'Education': 0, 'Entertainment/public assembly': 1, 'Food sales and service': 2, 'Healthcare': 3, 'Lodging/residential': 4, 'Manufacturing/industrial': 5, 'Office': 6, 'Other': 7, 'Parking': 8, 'Public services': 9, 'Religious worship': 10, 'Retail': 11, 'Services': 12, 'Technology/science': 13, 'Utility': 14, 'Warehouse/storage': 15}
Encoding day_of_week
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6}
Encoding month
{1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11}
Encoding hour
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23}
Encoding day
{1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 

In [83]:
dummy_test = ohe.transform(test_enc)

In [86]:
temp = pd.DataFrame(dummy_test)

In [87]:
final_test = pd.concat([temp_test,temp],axis = 1)

In [88]:
final_test.shape

(41697600, 120)

In [89]:
#final_test.to_csv(r"E:\Data Science\Kaggle Competitions dataset\Ashrae\final_test_NN1.csv")

In [None]:
del dummy_test
del temp_test
gc.collect()

In [2]:
# for the submission file
test_df = pd.read_csv("E:\Data Science\Kaggle Competitions dataset/test.csv")
submission = pd.DataFrame()
submission["row_id"] = test_df["row_id"]

In [3]:
import gc
del test_df
gc.collect()

0

In [4]:
# Reading the saved pre processed test file
final_test = pd.read_csv("E:\Data Science\Kaggle Competitions dataset\Ashrae/final_test_NN1.csv",index_col=False)

In [5]:
final_test.head()

Unnamed: 0.1,Unnamed: 0,square_feet,year_built,air_temperature,cloud_coverage,dew_temperature,precip_depth_1_hr,sea_level_pressure,wind_direction,wind_speed,...,100,101,102,103,104,105,106,107,108,109
0,0,7432,2008.0,17.8,4.0,11.7,0.0,1021.5,100.0,3.6,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2720,2004.0,17.8,4.0,11.7,0.0,1021.5,100.0,3.6,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,5376,1991.0,17.8,4.0,11.7,0.0,1021.5,100.0,3.6,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,23685,2002.0,17.8,4.0,11.7,0.0,1021.5,100.0,3.6,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,116607,1975.0,17.8,4.0,11.7,0.0,1021.5,100.0,3.6,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
import tensorflow as tf
from tensorflow import keras

In [7]:
from tensorflow.keras.regularizers import l2 as reg2,l1 as reg1
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Activation,BatchNormalization,Dropout
from tensorflow.keras.models import model_from_json

In [8]:
json_file = open('modelNN_1.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("modelNN_1.h5")
print("Loaded model from disk")

Loaded model from disk


In [9]:
pred = loaded_model.predict(final_test.values)

In [10]:
submission["meter_reading"] = pred

In [13]:
submission = reduce_mem_usage(submission)
submission.to_csv(r"E:\Data Science\Kaggle Competitions dataset\Ashrae\submission.csv",index = False)

Mem. usage decreased to 318.13 Mb (33.3% reduction)
