-
Notifications
You must be signed in to change notification settings - Fork 1
/
hdf5_file_analysis.py
206 lines (137 loc) · 5.7 KB
/
hdf5_file_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 6 11:25:54 2020
@author: wxi
"""
# hdf5 file processing imports
import pandas as pd
import numpy as np
import h5py
# Colin mapping file imports
import json
import matplotlib.pyplot as plt
import geopandas as gpd
import requests
import zipfile
from io import BytesIO
from mpl_toolkits.axes_grid1 import make_axes_locatable
import re
import mapclassify
# Configuration file
import tech_opp_config as cfg
def hdf5_to_df(filepath):
data = h5py.File(filepath , "r")
df_list = []
for key in list(data.keys())[:2]:
df_list.append(pd.DataFrame(np.array(data[key]).squeeze()))
df = pd.concat(df_list, axis = 1, sort = False)
df.rename({0: 'industries'}, axis = 'columns', inplace = True)
df_size = len(df)
for op_hour in ['ophours_high', 'ophours_low', 'ophours_mean']:
fuels = []
if "Coal" in data[op_hour].keys():
fuels.append("Coal")
if "Natural_gas" in data[op_hour].keys():
fuels.append("Natural_gas")
total_fuel = np.zeros((df_size,8760))
for fuel in fuels:
fuel_size = len(np.array(data[op_hour][fuel]).T)
if fuel_size != df_size:
print(fuel + " isn't complete for each county")
continue
df[op_hour + "_" + fuel] = list(np.array(data[op_hour][fuel]).T)
total_fuel += np.array(data[op_hour][fuel]).T
df[op_hour + "_total_fuel"] = list(total_fuel)
df[op_hour + "_" + "land_use"] = np.array(data[op_hour]['land_use'])
df[op_hour + "_" + "tech_opp"] = list(np.array(data[op_hour]['tech_opp']).T)
return df
class mapping:
def __init__(self, data, config):
self.config = config
self.data = data
cshp_file = config["cfp"]
sshp_file = config["sfp"]
self.cshp = gpd.read_file(cshp_file)
self.sshp = gpd.read_file(sshp_file)
#Convert to Mercator
self.cshp = self.cshp.to_crs(epsg=3395)
self.sshp = self.sshp.to_crs(epsg=3395)
cb_url = 'http://colorbrewer2.org/export/colorbrewer.json'
# Updated. Getting SSLError
try:
cb_r = requests.get(cb_url)
except requests.exceptions.SSLError as e:
print('Exception: {}.\n Using local json file.'.format(e))
with open(config['cb']) as json_file:
self.colors = json.load(json_file)
else:
self.colors = cb_r.json()
# to convert to HEX
for c in self.colors.keys():
for n in self.colors[c].keys():
rgb_list = self.colors[c][n]
if type(rgb_list) == str:
continue
hex_list = []
for v in rgb_list:
hex = tuple(
int(x) for x in re.search(
'([^a-z,(,](\w+,\w+,\w+)|(\w,\w+,\w+))', v
).group().split(',')
)
hex_list.append(hex)
self.colors[c][n] = hex_list
def make_county_choropleth(self, data_column, palette, filename,
class_scheme,scheme_kwds):
"""
Class_scheme and scheme_kwds is scheme provided by mapclassify
(e.g. ‘box_plot’, ‘equal_interval’, ‘fisher_jenks’, etc.). See
https://pysal.readthedocs.io/en/1.5/library/esda/mapclassify.html for
more details on both schemes and their keywords.
"""
#import energy results if path
def format_county_fips(cf):
cf = str(int(cf))
if len(cf)<=4:
cf = '0'+cf
return cf
self.data['COUNTY_FIPS'] = self.data.COUNTY_FIPS.apply(
lambda x: format_county_fips(x)
)
# match on geo_id
map_data = self.cshp.set_index('GEOID').join(
self.data.set_index('COUNTY_FIPS')[data_column]
)
map_data.dropna(subset=[data_column], inplace=True)
# set the range for the choropleth
#vmin, vmax = map_data.MMBtu_TOTAL.min(), map_data.MMBtu_TOTAL.max()
# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(10, 10))
# divider = make_axes_locatable(ax)
#
# cax = divider.append_axes("right", size="5%", pad=0.1)
if scheme_kwds == None:
map_data.plot(column=data_column, cmap=palette, linewidth=0.8,
ax=ax, edgecolor='0.8', scheme=class_scheme,
legend=True,
legend_kwds={'title':'Tech_Opp',
'loc': 'lower right',
'fontsize': 'small'})
else:
map_data.plot(column=data_column, cmap=palette, linewidth=0.8,
ax=ax, edgecolor='0.8', scheme=class_scheme,
classification_kwds=scheme_kwds, legend=True,
legend_kwds={'title':'Tech_Opp',
'loc': 'lower right',
'fontsize': 'small'})
ax.axis('off')
fig.savefig(filename+'.svg', dpi=500, bbox_inches='tight')
plt.close()
if __name__ == "__main__":
df = hdf5_to_df(cfg.config["filepath"])
# Make maps based on any data column in df file
mm = mapping(df, cfg.config)
mm.make_county_choropleth(
cfg.config["data"], palette='YlOrRd', filename=cfg.config["filename"],
class_scheme='Percentiles', scheme_kwds=None
)