Skip to content

Commit

Permalink
Merge 0c59f68 into 3e2b06d
Browse files Browse the repository at this point in the history
  • Loading branch information
cmutel committed Sep 17, 2016
2 parents 3e2b06d + 0c59f68 commit ecfd913
Show file tree
Hide file tree
Showing 39 changed files with 3,564 additions and 413 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ before_install:
- if [ "$TRAVIS_OS_NAME" == "osx" ]; then virtualenv -p $PYTHON_EXE venv; fi
- if [ "$TRAVIS_OS_NAME" == "osx" ]; then source venv/bin/activate; fi
- echo $(python --version)
install: "pip install -r requirements-travis.txt"
install: "pip install -r ci/requirements-travis.txt"
script:
- if [ "$TRAVIS_OS_NAME" == "linux" ]; then pwd; ls; py.test --cov=ocelot; python setup.py install &> ignore.this; python tests/manual/run_all_ci.py; fi
- if [ "$TRAVIS_OS_NAME" == "osx" ]; then py.test tests; python3 setup.py install &> ignore.this; python3 tests/manual/run_all_ci.py; fi
Expand Down
26 changes: 17 additions & 9 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,24 @@ build: false

environment:
matrix:
- PYTHON: "C:/Python35"
- PYTHON: "C:\\Python35_64"
PYTHON_VERSION: "3.5"
PYTHON_ARCH: "64"
CONDA_PY: "35"
CONDA_NPY: "110"

- PYTHON: "C:\\Python35_32"
PYTHON_VERSION: "3.5"
PYTHON_ARCH: "32"
CONDA_PY: "35"
CONDA_NPY: "110"

install:
# Note: Getting lxml installed was a bit of a pain
# Final working solution is to download wheel from
# Gohlke's unofficial binaries, then relabel ABI from `cp35m` to `none`
# This seems to work, at least well enough for our tests to pass
- "%PYTHON%/Scripts/pip.exe install -r requirements-appveyor.txt"
- powershell .\\ci\\appveyor-install.ps1
- "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
- "pip install --no-cache-dir -r requirements.txt"

test_script:
- "%PYTHON%/Scripts/py.test.exe"
- "%PYTHON%/python.exe setup.py -q install"
- "%PYTHON%/python.exe tests/manual/run_all_ci.py"
- "py.test"
- "python setup.py -q install"
- "python.exe tests/manual/run_all_ci.py"
96 changes: 96 additions & 0 deletions ci/appveyor-install.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Sample script to install Miniconda under Windows
# Authors: Olivier Grisel, Jonathan Helmus and Kyle Kastner, Robert McGibbon
# License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/

$MINICONDA_URL = "http://repo.continuum.io/miniconda/"


function DownloadMiniconda ($python_version, $platform_suffix) {
$webclient = New-Object System.Net.WebClient
if ($python_version -match "3.5") {
$filename = "Miniconda3-latest-Windows-" + $platform_suffix + ".exe"
} else {
$filename = "Miniconda-latest-Windows-" + $platform_suffix + ".exe"
}
$url = $MINICONDA_URL + $filename

$basedir = $pwd.Path + "\"
$filepath = $basedir + $filename
if (Test-Path $filename) {
Write-Host "Reusing" $filepath
return $filepath
}

# Download and retry up to 3 times in case of network transient errors.
Write-Host "Downloading" $filename "from" $url
$retry_attempts = 2
for($i=0; $i -lt $retry_attempts; $i++){
try {
$webclient.DownloadFile($url, $filepath)
break
}
Catch [Exception]{
Start-Sleep 1
}
}
if (Test-Path $filepath) {
Write-Host "File saved at" $filepath
} else {
# Retry once to get the error message if any at the last try
$webclient.DownloadFile($url, $filepath)
}
return $filepath
}


function InstallMiniconda ($python_version, $architecture, $python_home) {
Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home
if (Test-Path $python_home) {
Write-Host $python_home "already exists, skipping."
return $false
}
if ($architecture -match "32") {
$platform_suffix = "x86"
} else {
$platform_suffix = "x86_64"
}

$filepath = DownloadMiniconda $python_version $platform_suffix
Write-Host "Installing" $filepath "to" $python_home
$install_log = $python_home + ".log"
$args = "/S /D=$python_home"
Write-Host $filepath $args
Start-Process -FilePath $filepath -ArgumentList $args -Wait -Passthru
if (Test-Path $python_home) {
Write-Host "Python $python_version ($architecture) installation complete"
} else {
Write-Host "Failed to install Python in $python_home"
Get-Content -Path $install_log
Exit 1
}
}


function InstallCondaPackages ($python_home, $spec) {
$conda_path = $python_home + "\Scripts\conda.exe"
$args = "install --yes " + $spec
Write-Host ("conda " + $args)
Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru
}

function UpdateConda ($python_home) {
$conda_path = $python_home + "\Scripts\conda.exe"
Write-Host "Updating conda..."
$args = "update --yes conda"
Write-Host $conda_path $args
Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru
}


function main () {
InstallMiniconda $env:PYTHON_VERSION $env:PYTHON_ARCH $env:PYTHON
UpdateConda $env:PYTHON
InstallCondaPackages $env:PYTHON "lxml cytoolz numpy scipy pandas jinja2 psutil pywin32 docopt pytest"
}

main
1 change: 1 addition & 0 deletions requirements-travis.txt → ci/requirements-travis.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pyrsistent
pytest
pytest-cov
python-coveralls
stats_arrays
toolz
voluptuous
wrapt
50 changes: 1 addition & 49 deletions docs/cutoff.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,55 +19,7 @@ The first step is to do some basic data validation. We will also do validation f
Data cleanup
============

Variables and formulas
----------------------

Ecospold 2 supports parameterized datasets, where numeric values for exchanges and production volumes can be calculated using a chain for formulas and uncertain parameters. Formulas can be present in four different places (see also the :ref:`dataformat`):

* An exchange in the list ``dataset['exchanges']``.
* A property of an exchange in the list ``dataset['exchanges'][some_index]['properties'][another_index]``. Note that not all exchanges have properties.
* A parameter in the list ``dataset['parameters']``. Again, not all datasets have parameters.
* A technosphere exchange production volume ``dataset['exchanges'][some_index]['production volume']``. Only production exchanges have production volumes.

Across a dataset, the following conventions are used:

* The key ``variable`` gives the name of a variable, e.g. ``{'variable': 'some_name'}``. Variable names must be valid python identifiers, so ``some_name`` instead of ``some name``.
* The key ``formula`` gives a mathematical formula, e.g. ``{'formula': 'some_name * 2'}``.

The Ecospold standard places no real limits on which variables can depend on other variables, so arbitrarily complex relationships are possible.

To make things extra spicy, some variables can be implicit, and instead of being given a name, they are referred to by the id of their containing reference element. So, the formula ``Ref('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')`` means get the numeric value (``amount``) of the exchange whose ``id`` is ``aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee``, and substitute in that amount. Datasets with these implicit variables only occur four times in ecoinvent 3.2 and three times in ecoinvent 3.3. Implicit variables can have three forms:

* ``Ref('some id')``: Get ``amount`` value for exchange *or* parameter with id ``some id``.
* ``Ref('some id', 'ProductionVolume')``: Get production volume for exchange with id ``some id``.
* ``Ref('some id', 'some other id')``: Get ``amount`` for property with id ``some other id`` in exchange ``some id``. This isn't used in ecoinvent 3.2 or 3.3, and isn't supported in the current version of Ocelot.

Our first cleanup function will replace these implicit relationships with named variables.

.. autofunction:: ocelot.transformations.parameterization.implicit_references.replace_implicit_references

Next, we manually fix a couple of known problems in certain formula strings, such as numbers with leading zeros that are not understand by Python.

.. autofunction:: ocelot.transformations.parameterization.known_ecoinvent_issues.fix_known_bad_formula_strings

The Ecospold 2 formula syntax is similar to Python in some ways, but we still need to use several functions to get formulas that Python can understand. Ocelot is still not 100% compatible with the entire Ecospold 2 formula spec.

.. autofunction:: ocelot.transformations.parameterization.python_compatibility.lowercase_all_parameters

.. autofunction:: ocelot.transformations.parameterization.python_compatibility.fix_math_formulas

.. autofunction:: ocelot.transformations.parameterization.python_compatibility.replace_reserved_words

Finally, in cases where we can't fix problems with formulas, we remove them from the dataset.

.. autofunction:: ocelot.transformations.parameterization.python_compatibility.delete_unparsable_formulas

Production volumes
------------------

Production volumes are specified for exchanges which produce reference product and allocatable byproduct flows. These volumes are used only to calculate the contribution of different transforming activities to markets. As such, production volumes are fixed during the evaluation of a system model in Ocelot. In order to stop an evaluation of the datasets formulas and variables from changing the value of the production volume, we move all such parameterization information to a new parameter, outside of the production volume definition.

.. autofunction:: ocelot.transformations.parameterization.production_volumes.create_pv_parameters
The first step in data cleanup is to apply the :ref:`generic functions for parameter and formulas <parameterization_cleanup>`.

Exchange manipulations
----------------------
Expand Down
151 changes: 151 additions & 0 deletions docs/cutoff.rst.orig
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
Cutoff system model
*******************

The distinguishing features of the cutoff system model are that waste producers pay the full cost of waste treatment or disposal, and that economic allocation is used to split most multioutput activities.

The cutoff system model consists of a few major steps, each of which is broken up into multiple transformation functions.

Data validation
===============

The first step is to do some basic data validation. We will also do validation for specific types of datasets or specific stages of the system model later on.

.. autofunction:: ocelot.transformations.parameterization.variable_names_are_unique

.. autofunction:: ocelot.transformations.validation.ensure_markets_only_have_one_reference_product

.. autofunction:: ocelot.transformations.validation.ensure_markets_dont_consume_their_ref_product

Data cleanup
============

<<<<<<< HEAD
The first step in data cleanup is to apply the :ref:`generic functions for parameter and formulas <parameterization_cleanup>`.
=======
Variables and formulas
----------------------

Ecospold 2 supports parameterized datasets, where numeric values for exchanges and production volumes can be calculated using a chain for formulas and uncertain parameters. Formulas can be present in four different places (see also the :ref:`dataformat`):

* An exchange in the list ``dataset['exchanges']``.
* A property of an exchange in the list ``dataset['exchanges'][some_index]['properties'][another_index]``. Note that not all exchanges have properties.
* A parameter in the list ``dataset['parameters']``. Again, not all datasets have parameters.
* A technosphere exchange production volume ``dataset['exchanges'][some_index]['production volume']``. Only production exchanges have production volumes.

Across a dataset, the following conventions are used:

* The key ``variable`` gives the name of a variable, e.g. ``{'variable': 'some_name'}``. Variable names must be valid python identifiers, so ``some_name`` instead of ``some name``.
* The key ``formula`` gives a mathematical formula, e.g. ``{'formula': 'some_name * 2'}``.

The Ecospold standard places no real limits on which variables can depend on other variables, so arbitrarily complex relationships are possible.

To make things extra spicy, some variables can be implicit, and instead of being given a name, they are referred to by the id of their containing reference element. So, the formula ``Ref('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')`` means get the numeric value (``amount``) of the exchange whose ``id`` is ``aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee``, and substitute in that amount. Datasets with these implicit variables only occur four times in ecoinvent 3.2 and three times in ecoinvent 3.3. Implicit variables can have three forms:

* ``Ref('some id')``: Get ``amount`` value for exchange *or* parameter with id ``some id``.
* ``Ref('some id', 'ProductionVolume')``: Get production volume for exchange with id ``some id``.
* ``Ref('some id', 'some other id')``: Get ``amount`` for property with id ``some other id`` in exchange ``some id``. This isn't used in ecoinvent 3.2 or 3.3, and isn't supported in the current version of Ocelot.

Our first cleanup function will replace these implicit relationships with named variables.

.. autofunction:: ocelot.transformations.parameterization.implicit_references.replace_implicit_references

Next, we manually fix a couple of known problems in certain formula strings, such as numbers with leading zeros that are not understand by Python.

.. autofunction:: ocelot.transformations.parameterization.known_ecoinvent_issues.fix_known_bad_formula_strings

The Ecospold 2 formula syntax is similar to Python in some ways, but we still need to use several functions to get formulas that Python can understand. Ocelot is still not 100% compatible with the entire Ecospold 2 formula spec.

.. autofunction:: ocelot.transformations.parameterization.python_compatibility.lowercase_all_parameters

.. autofunction:: ocelot.transformations.parameterization.python_compatibility.fix_math_formulas
>>>>>>> master

Exchange manipulations
----------------------

<<<<<<< HEAD
We can remove some exchange data which is not used in the cutoff system model.
=======
Finally, in cases where we can't fix problems with formulas, we remove them from the dataset.

.. autofunction:: ocelot.transformations.parameterization.python_compatibility.delete_unparsable_formulas
>>>>>>> master

Production volumes
------------------

Production volumes are specified for exchanges which produce reference product and allocatable byproduct flows. These volumes are used only to calculate the contribution of different transforming activities to markets. As such, production volumes are fixed during the evaluation of a system model in Ocelot. In order to stop an evaluation of the datasets formulas and variables from changing the value of the production volume, we move all such parameterization information to a new parameter, outside of the production volume definition.

.. autofunction:: ocelot.transformations.parameterization.production_volumes.create_pv_parameters

Exchange manipulations
----------------------

We can remove some exchange data which is not used in the cutoff system model.

.. autofunction:: ocelot.transformations.cutoff.cleanup.remove_consequential_exchanges

.. autofunction:: ocelot.transformations.cutoff.cleanup.drop_rp_activity_links

Hard links, or activity links, are links between a consuming and producing activity which exist outside the normal system model linking rules. These links are already included in the undefined datasets, and are specified by dataset authors.

The next function makes sure that we can successfully resolve these hard links in our supply chain graph.

.. autofunction:: ocelot.transformations.activity_links.check_activity_link_validity

We end the manipulation step with two functions related to the treatment of wastes and recyclables.

.. autofunction:: ocelot.transformations.cutoff.wastes.create_recycled_content_datasets

.. autofunction:: ocelot.transformations.cutoff.wastes.flip_non_allocatable_byproducts

Split multioutput activities
============================

Introduction
------------

There are a number of choices to be made when constructing an individual dataset, including what the inputs and outputs of a process are. From a mathematical point of view, the different is simple: inputs have a negative number in the technosphere matrix, while outputs have a postive number. The corresponds to our physical understanding of the system, where inputs are consumed and outputs are produced. However, things are never that simple - for example, in a waste treatment process, it is common to consider the treated waste as an output (with a negative sign), even though it is consumed.

In Ocelot, at least in the current version, we don't make these choices ourselves - the inputs and outputs are defined in the undefined datasets, and our job is instead to handle multioutput datasets so that our constructed technosphere matrix is not singular.

In the undefined datasets, we distinguish between two types of outputs: reference products and byproducts. In general, reference products are the reason that producers do a transforming activity (which is why each dataset must have at least one reference product), and byproducts are what comes along for the ride. However, as with many of the definitions used in LCA, what was a sharp diving line from a distance tends to blur a bit at the boundaries.

We further distinguish three types of products (both reference and byproducts): allocatable, recyclable and waste. Formally, in the internal data format, outputs will have a ``type`` of either ``reference product`` or ``byproduct``, and a ``byproduct classification`` of ``allocatable product``, ``recyclable``, or ``waste``.

In the cutoff approach, the difference between reference products and byproducts lies in how we split multiple outputs of each.

* If we have multiple reference products, we assume that these datasets are parameterized, and we can use the formulas in the different exchanges to split the dataset into multiple datasets with one reference product each.
* If we have multiple byproducts, we use economic allocation to split emissions and inputs between the reference product and the allocatable byproducts.

The first step in allocation is to label datasets based on the allocation method that will be used.

.. autofunction:: ocelot.transformations.cutoff.allocation.choose_allocation_method

We then apply the allocation functions in order:

.. autofunction:: ocelot.transformations.cutoff.economic.economic_allocation

.. autofunction:: ocelot.transformations.cutoff.markets.constrained_market_allocation

.. autofunction:: ocelot.transformations.cutoff.wastes.recycling_allocation

.. autofunction:: ocelot.transformations.cutoff.wastes.waste_treatment_allocation

.. autofunction:: ocelot.transformations.cutoff.combined.combined_production

.. autofunction:: ocelot.transformations.cutoff.combined.combined_production_with_byproducts

.. autofunction:: ocelot.transformations.cutoff.combined.combined_production_without_products
<<<<<<< HEAD

After allocation, we can drop a category of hard (activity) links - those from a reference product. These hard links don't have any meaning, as reference products are produced by the activity, and don't need to be linked.

=======

After allocation, we can drop a category of hard (activity) links - those from a reference product. These hard links don't have any meaning, as reference products are produced by the activity, and don't need to be linked.

>>>>>>> master
.. autofunction:: ocelot.transformations.cutoff.cleanup.drop_rp_activity_links

The next set is :ref:`space`.
2 changes: 2 additions & 0 deletions docs/data_format.rst
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ Some fields can only take certain values. The activity dataset, for example, ref
valid_technology_levels = Any("undefined", "new", "modern",
"current", "old", "outdated")
.. _uncertainty_format:

Uncertainty
-----------

Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Contents:
data_format
foundation
filesystem
uncertainty
tests
contributing

Expand Down

0 comments on commit ecfd913

Please sign in to comment.