Skip to content

Commit

Permalink
Merge pull request #19 from ClimateImpactLab/cleanup
Browse files Browse the repository at this point in the history
clean up aggregate, utils functions
  • Loading branch information
jgerardsimcock committed Apr 26, 2017
2 parents 729d0f5 + 97e2973 commit 21edf88
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 78 deletions.
112 changes: 43 additions & 69 deletions impactlab_tools/acp/aggregate.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@

import datafs
import xarray as xr
import pandas as pd
import metacsv
from impactlab_tools.utils.cache import DataCache


def population_weighted_mean(
Expand All @@ -12,85 +10,61 @@ def population_weighted_mean(
year=2012,
api=None,
pop=None):
'''
Find the population-weighted mean of a county-level xarray DataArray
if pop is None and api is None:
api = datafs.get_api()

if pop is None:
pop = _prep_pop_data(api)

if dim != 'fips':
pop = pop.rename({'fips': dim})

return (
((ds * pop[str(year)]).groupby(level).sum(dim=dim)) /
((pop[str(year)]).groupby(level).sum(dim=dim)))


def _prep_pop_data(api):
Parameters
----------
ds : array
:py:class:`~xarray.DataArray` to be aggregated. May contain any number
of dimensions >= 1.
pop_arch = api.get_archive(
'ACP/integration/socioeconomics/' +
'population/census/county_census_pop.nc')
level : str (optional)
Level of resolution to aggregate to. May be one of ``'fips'``,
``'state'``, ``'state_names'``, ``'state_abbrevs'``, ``'census'``,
or ``'national'`` (default ``'state'``)
try:
with pop_arch.get_local_path(version='1.0') as f:
with xr.open_dataset(f) as pop:
pop.load()
dim : str (optional)
dimension to aggregate along (default ``'fips'``)
return pop
year : int (optional)
population year (or column in the ``pop`` dataset) to use for the
weights. If not provided, 2012 population is used.
except (KeyError, ValueError):
pass
api : DataAPI (optional)
:py:class:`datafs.DataAPI` object to use. If not provided, creates a
new ``DataAPI`` object
csv_arch = api.get_archive(
'ACP/integration/socioeconomics/' +
'population/census/county_census_pop.csv')
pop : array (optional)
:py:class:`~xarray.DataArray` to use for weights. If not provided,
US Census Bureau 2014 vintage CO-EST2014-alldata.csv estimates from
the ACP are used
with csv_arch.open('rb', version='0.0.1') as f:
pop_data = pd.read_csv(f, index_col=range(7))
Returns
-------
mean : array
weighted average aggregated :py:class:`~xarray.DataArray`
pop_data['fips'] = ((lambda x: x['STATE']*1000 + x['COUNTY'])(
pop_data.reset_index(['STATE', 'COUNTY'], drop=False))).values
'''

pop_data['state'] = pop_data.reset_index(
'STATE', drop=False)['STATE'].values

pop_data['census'] = pop_data.reset_index(
'DIVISION', drop=False)['DIVISION'].values

pop_data['national'] = 1

pop_data = pop_data.set_index(
['national', 'census', 'state', 'fips'],
append=True
).reset_index(
pop_data.index.names, drop=True)

years = range(2010, 2015)
if pop is None and api is None:
api = datafs.get_api()

pop_data = metacsv.DataFrame(
pop_data[list(map('POPESTIMATE{}'.format, years))])
if pop is None:
pop = _prep_pop_data(api)

pop_data.columns = list(map(str, years))
if dim != 'fips':
pop = pop.rename({'fips': dim})

pop_data.coords = {
'fips': None,
'state': 'fips',
'census': 'fips',
'national': 'fips'}
return (
((ds * pop[str(year)]).groupby(level).sum(dim=dim)) /
((pop[str(year)]).groupby(level).sum(dim=dim)))

pop = pop_data.to_xarray()

with pop_arch.get_local_path(
bumpversion='major',
message=(
'2014 vintage CO-EST2014-alldata.csv used in the ACP, ' +
'prepared for use with xarray Datasets'),
dependencies={(
'ACP/integration/socioeconomics/' +
'population/census/county_census_pop.csv'): '0.0.1'}) as f:
def _prep_pop_data(api):

pop.to_netcdf(f)
pop_arch = (
'ACP/integration/socioeconomics/population/' +
'census/county_census_pop.nc')

return pop
return DataCache.retrieve(pop_arch, api=api)
32 changes: 23 additions & 9 deletions impactlab_tools/utils/files.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,32 @@
"""Utilities for path handling
"""
Utilities for path handling
Provides server-specific paths, configured in a server configuration file.
"""

import sys, os, yaml
import sys
import os
import yaml

default_server_config_path = "../server.yml"
server_config = None

##### Path-handling functions

# Path-handling functions

def sharedpath(subpath):
"""Return a subpath of the configured shareddir."""
if server_config is None:
assert os.path.exists(default_server_config_path), "Cannot find configuration file at %s" % default_server_config_path
msg = "Cannot find configuration file at {}".format(
default_server_config_path)

assert os.path.exists(default_server_config_path), msg

use_config(get_file_config(default_server_config_path))

return os.path.join(server_config['shareddir'], subpath)


def configpath(path):
"""Return an configured absolute path. If the path is absolute, it
will be left alone; otherwise, it is assumed to be a subpath of the
Expand All @@ -29,7 +37,8 @@ def configpath(path):

return sharedpath(path)

### Configuration-file handling functions

# Configuration-file handling functions

def use_config(config):
"""Use the given configuration for path functions."""
Expand All @@ -42,22 +51,27 @@ def use_config(config):

server_config = config


def get_file_config(filepath):
"""Load a configuration file from a given path."""

with open(filepath, 'r') as fp:
config = yaml.load(fp)
return config


def get_argv_config(index=1):
"""Load a configuration file specified as the `index` argv argument.
In the future, this should also load specific configurable options from the command-line.
"""
Load a configuration file specified as the `index` argv argument.
In the future, this should also load specific configurable options from the
command-line.
"""

with open(sys.argv[index], 'r') as fp:
config = yaml.load(fp)
return config

if __name__ == '__main__':
print configpath('testing')

if __name__ == '__main__':
print(configpath('testing'))
3 changes: 3 additions & 0 deletions impactlab_tools/utils/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,18 @@ def check_version(input_list, check_git=False):
dict:
A dictionary of the modules: keys are the module names, each
key has value of another dictionary, containing:
- "source": how is the module installed ("pip", "local",
"git", or None):
- source is "pip" if it's an open-sourced python
package installed through pip.
- source is "pip-local" if it's a self-made tool
installed through pip.
- source is "git" if it's a git managed repo of
scripts, not installed through pip.
- source is None if the module cannot be found.
- "version": If it's an open source module (source: pip),
this is the version numbers of it.
- "git_hash": If it's a local module (source: local, or
Expand Down

0 comments on commit 21edf88

Please sign in to comment.