## Create Subset

In [34]:
import pandas as pd
import numpy as np
import os
import pickle as pk
from collections import defaultdict

In [2]:
training_data = pd.read_csv('./Data/cali_example/training_data_2010.csv', usecols = [1, 2, 3, 4, 5, 6])
av = pd.read_csv('./Data/cali_example/AV_2010_align.csv', usecols = [1,2,3])
gm = pd.read_csv('./Data/cali_example/GM_2010_align.csv', usecols = [1,2,3])
gs = pd.read_csv('./Data/cali_example/GS_2010_align.csv', usecols = [1,2,3])

In [3]:
training_data.shape, av.shape, gm.shape, gs.shape

((83, 6), (974544, 3), (974544, 3), (974544, 3))

In [4]:
training_ = training_data[~training_data['pred_GS'].isnull()]

In [5]:
training_data = training_

In [6]:
training_data.shape

(80, 6)

In [None]:
av.isna().sum()

In [None]:
gm.isna().sum()

In [None]:
gs.isna().sum()

In [None]:
av.head()

### Getting all indices with no null predictions

In [7]:
av_ = av[~av['pm25'].isnull()]
gs_ = gs[~gs['pm25'].isnull()]

In [None]:
av_.shape, gs_.shape

In [8]:
indices = list(set(av_.index) & set(gs_.index))

In [9]:
len(indices)

723716

In [10]:
av = av.loc[indices]
gm = gm.loc[indices]
gs = gs.loc[indices]

In [None]:
av.shape, gm.shape, gs.shape

In [None]:
training_data.head()

In [None]:
min       max
x -124.48200 -114.1312
y   32.52883   42.0095

In [11]:
min_lon = -118.0
max_lon = -117.0
min_lat = 33.7
max_lat = 35.0

In [12]:
training_data = training_data.loc[(training_data.lon >= min_lon) & (training_data.lon <= max_lon) & (training_data.lat >= min_lat) & (training_data.lat <= max_lat)]

In [13]:
av = av.loc[(av.lon >= min_lon) & (av.lon <= max_lon) & (av.lat >= min_lat) & (av.lat <= max_lat)]

In [14]:
gm = gm.loc[(gm.lon >= min_lon) & (gm.lon <= max_lon) & (gm.lat >= min_lat) & (gm.lat <= max_lat)]

In [15]:
gs = gs.loc[(gs.lon >= min_lon) & (gs.lon <= max_lon) & (gs.lat >= min_lat) & (gs.lat <= max_lat)]

In [16]:
av.shape, gm.shape, gs.shape

((13000, 3), (13000, 3), (13000, 3))

In [None]:
av.isna().sum()

In [None]:
gm.isna().sum()

In [None]:
gs.isna().sum()

In [None]:
av = av.fillna(method='bfill')
gs = gs.fillna(method='bfill')

In [17]:
training_data.to_csv('./Cali_Example/example/data/training_data_2010.csv', index = False)
av.to_csv('./Cali_Example/example/data/AV_2010_align.csv')
gm.to_csv('./Cali_Example/example/data/GM_2010_align.csv')
gs.to_csv('./Cali_Example/example/data/GS_2010_align.csv')

## Create file to visualize BNE predictions

In [18]:
num_coords = av.shape[0]
num_mcmc = 5000

In [24]:
_SAVE_ADDR_PREFIX = "./Cali_Example/result_ca_2010/calibre_2d_annual_pm25_example_ca_2010"
family_name = 'hmc'

In [36]:
ensemble_mean_val = []
ensemble_uncn_val = []

num_subsegs = 2

for i in range(num_subsegs):
    print (i)
    with open(os.path.join(_SAVE_ADDR_PREFIX,
                           '{}/ensemble_mean_dict_{}.pkl'.format(family_name, i)), 'rb') as file:
        ensemble_mean_val.append(pk.load(file))

    with open(os.path.join(_SAVE_ADDR_PREFIX,
                           '{}/ensemble_uncn_dict_{}.pkl'.format(family_name, i)), 'rb') as file:
        ensemble_uncn_val.append(pk.load(file))

0
1


In [37]:
ensemble_uncn_val[1]['overall'].shape

(6500,)

In [38]:
num_coords = 0
mean_dict = defaultdict()
unc_dict = defaultdict()

for i in range(num_subsegs):
    num_coords += ensemble_mean_val[i]['overall'].shape[0]

post_mean_dict = {'overall': None, 'mean': None, 'resid': None}
post_uncn_dict = {'overall': None, 'mean': None, 'resid': None, 'noise': None}

for key in post_mean_dict:
    post_mean_dict[key] = np.concatenate([ensemble_mean_val[i][key] for i in range(num_subsegs)], axis = None).reshape(num_coords)

for key in post_uncn_dict:
    post_uncn_dict[key] = np.concatenate([ensemble_uncn_val[i][key] for i in range(num_subsegs)], axis = None).reshape(num_coords)

with open(os.path.join(_SAVE_ADDR_PREFIX,
                       '{}/ensemble_mean_dict.pkl'.format(family_name)), 'wb') as file:
    pk.dump(post_mean_dict, file, protocol=pk.HIGHEST_PROTOCOL)
with open(os.path.join(_SAVE_ADDR_PREFIX,
                       '{}/ensemble_uncn_dict.pkl'.format(family_name)), 'wb') as file:
    pk.dump(post_uncn_dict, file, protocol=pk.HIGHEST_PROTOCOL)

In [None]:
av_sub = av.iloc[:num_coords]
av_sub = av_sub.drop(['pm25'], axis = 1)

locations = av_sub[['lat', 'lon']]
locations['mean_overall'] = post_mean_dict['overall']
locations['mean_mean'] = post_mean_dict['mean']

locations.to_csv('./Cali_Example/example/data/model_predictions_sub.csv', index = False)

In [None]:
av_sub = av.iloc[:num_coords]
av_sub = av_sub.drop(['pm25'], axis = 1)

In [None]:
av_sub['mean_overall'] = np.mean(sample_val, axis = 1)
av_sub['mean_mean'] = np.mean(mean_val, axis = 1)

In [None]:
av_sub.to_csv('./Data/cali_example/model_predictions_sub.csv', index = False)

In [None]:
locations = pd.read_csv('./Cali_Example/example/data/AV_2010_align.csv', usecols = [1,2,3])

In [None]:
locations.shape

In [None]:
locations['mean_overall'] = post_mean_dict['overall']
locations['mean_mean'] = post_mean_dict['mean']

In [None]:
locations.head()

In [None]:
locations.to_csv('./Data/cali_example/model_predictions_LA.csv', index = False)