# this notebook was initially run on Google Colab

In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Tue Mar 16 06:33:28 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.56       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    25W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# import libraries

In [3]:
'''
import libraries
'''
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from tensorflow.keras.optimizers import Adam
import os
import random
random.seed(0)
tf.random.set_seed(0)
np.random.seed(0)
os.environ['PYTHONHASHSEED'] = str(0)

# define variables

In [5]:
features = ['lucc_0','lucc_1','lucc_2','lucc_3','lucc_4','lucc_5','b1','b2','b3','b4','b5','b6','b7','lst','ndvi','ntl','elevation','entropy']

years = list(range(2001,2018))

dir_csv = '/city_scale_features-one_year_one_table/'

# load city scale feature dataset

In [6]:
''' load csv files '''

dict_df = {}
for year in range(2001,2018):
  cur_df = pd.read_csv(dir_csv + str(year) + '.csv')
  cur_df = cur_df[['name'] + features]
  dict_df[year] = cur_df

In [7]:
''' convert into numpy '''
dict_arr = {}
for year in range(2001,2018):
  cur_arr = dict_df[year][features].to_numpy()
  dict_arr[year] = cur_arr

# predict city scale energy intensity

In [8]:
''' customizing metric R2 '''
def R2(y_true, y_pred):
  flat_ture = tf.reshape(y_true, [-1])
  flat_prd = tf.reshape(y_pred, [-1])
  RSS = tf.math.reduce_sum((tf.math.square(flat_ture - flat_prd)),axis=-1)
  TSS = tf.math.reduce_sum(tf.math.square(flat_ture - tf.reduce_mean(flat_ture,axis=-1)))
  return 1-RSS/TSS


def singleModelInference(url_model,url_normalizer,dict_arr_):
  # load model
  best_model = keras.models.load_model(url_model,custom_objects={'R2':R2},compile=False)
  opt = tf.keras.optimizers.Adam(lr=0.001,clipvalue=0.5) 
  best_model.compile(optimizer=opt, loss='mean_squared_error', metrics=[tf.keras.metrics.RootMeanSquaredError(),R2])
  # load normalizer
  import joblib
  normalizer = joblib.load(filename=url_normalizer)

  dict_preds = {}
  for year in range(2001,2018):
    # normalize
    inputs = normalizer.transform(dict_arr_[year])
    # inference
    cur_pred = best_model.predict(inputs)
    dict_preds[year] = cur_pred
  
  return dict_preds

In [9]:
def ensemblePredict(dict_arr_):
  # dir_models
  dir_model0 = '/training and testing/DNN0/bestModel.h5'
  dir_model1 = '/training and testing/DNN1/bestModel.h5'
  dir_model2 = '/training and testing/DNN2/bestModel.h5'
  dir_model3 = '/training and testing/DNN3/bestModel.h5'
  # url_normalizers
  dir_normalizer0 = '/training and testing/DNN0/scaler.joblib'
  dir_normalizer1 = '/training and testing/DNN1/scaler.joblib'
  dir_normalizer2 = '/training and testing/DNN2/scaler.joblib'
  dir_normalizer3 = '/training and testing/DNN3/scaler.joblib'
  # infenrecing of each model
  dict_preds0 = singleModelInference(dir_model0,dir_normalizer0,dict_arr_)
  dict_preds1 = singleModelInference(dir_model1,dir_normalizer1,dict_arr_)
  dict_preds2 = singleModelInference(dir_model2,dir_normalizer2,dict_arr_)
  dict_preds3 = singleModelInference(dir_model3,dir_normalizer3,dict_arr_)

  # average output
  dict_predsFinnal = {}
  for y in years:
    dict_predsFinnal[y] = (dict_preds0[y] + dict_preds1[y] + dict_preds2[y] + dict_preds3[y]) / 4.
  
  return dict_predsFinnal

In [19]:
dict_preds = ensemblePredict(dict_arr)

# output

In [22]:
''' conver numpy to pandas '''
for year in range(2001,2018):
  dict_preds[year] = dict_preds[year][:,0]

In [23]:
df_preds = pd.DataFrame(dict_preds)
df_preds['name'] = cur_df['name']
df_preds = df_preds[['name']+[y for y in range(2001,2018)]]
df_preds

Unnamed: 0,name,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,122city,4641.377441,4715.981445,4676.191895,4435.705078,4085.680664,4473.245605,4524.135254,4248.649902,4226.234863,4323.723633,4281.814453,3967.821289,4165.224609,4121.303223,4125.276367,4037.898926,3940.208984
1,089city,4108.035645,4293.618164,4316.521484,4194.353027,3938.815430,4204.531738,4183.413086,3994.414307,3884.289062,4191.432617,4048.480957,3881.214844,3986.867188,3929.059082,4162.958496,4125.549805,3884.514648
2,092city,3591.889160,3216.222168,3233.633545,3031.963379,2887.667480,3022.610596,2976.979248,2948.052490,2907.799805,2538.403320,2656.169922,2727.394043,2789.165771,2498.836426,2521.938477,2681.987793,2540.376953
3,093city,5119.806152,4929.093750,4822.670898,4732.750000,4512.948730,4696.941406,4304.455078,4097.383301,3778.682129,3380.828613,3476.225586,3291.948730,3450.280762,3143.866699,3385.822998,3237.119141,3067.582275
4,095city,3830.513428,3532.358887,3733.147461,3544.839844,3430.377930,3744.206299,3554.330078,3450.403076,3419.483887,3347.188232,3339.176025,2925.209717,3277.431641,3326.217285,3415.988770,3307.195557,3129.931152
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389,103city,2346.158691,1603.075073,1666.799805,1621.692139,2206.930176,1759.777588,1213.828125,1235.842285,1199.266479,1083.387695,953.910339,1391.345215,1083.267334,831.005981,951.608398,972.187561,929.560791
390,106city,1208.289062,1023.191711,851.433777,872.455750,1093.376221,842.870605,741.678589,787.947510,809.385071,674.455688,684.029419,775.971191,622.421204,565.555542,489.194824,522.902954,487.471161
391,291city,907.683105,878.957031,837.123230,919.134277,774.843628,745.713806,742.649536,709.769226,663.787109,606.382385,659.468079,650.920532,563.377319,512.389587,536.430969,554.214294,508.509003
392,338city,1106.755249,1163.765381,1088.090942,1231.385010,1289.726562,1055.654541,941.722168,834.930969,784.730286,654.507935,650.150269,713.436584,545.314819,431.016541,455.120544,432.165405,345.874847


In [24]:
df_preds.to_csv('city scale energy intensity.csv')