Skip to content

Commit

Permalink
Merge pull request #32 from CSchoel/v0.6
Browse files Browse the repository at this point in the history
Fix for correlation dimension
  • Loading branch information
CSchoel committed Oct 29, 2023
2 parents e8b2955 + 5962f0e commit 5da1ee7
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 14 deletions.
59 changes: 59 additions & 0 deletions .ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
[tool.ruff]
# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".mypy_cache",
".nox",
".pants.d",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"venv",
]

# Same as Black.
line-length = 88
indent-width = 4

# Assume Python 3.8
target-version = "py38"

[tool.ruff.lint]
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
select = ["E4", "E7", "E9", "F", "N", "D", "S", "PL", "NPY"]
ignore = []

# Allow fix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
unfixable = []

# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[tool.ruff.format]
# Like Black, use double quotes for strings.
quote-style = "double"

# Like Black, indent with spaces, rather than tabs.
indent-style = "space"

# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false

# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"
8 changes: 5 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
NOnLinear measures for Dynamical Systems (nolds)
================================================

.. image:: https://travis-ci.com/CSchoel/nolds.svg?branch=master
:target: https://travis-ci.com/CSchoel/nolds
.. image:: https://github.com/CSchoel/nolds/actions/workflows/ci.yaml/badge.svg
:target: https://github.com/CSchoel/nolds/actions/workflows/ci.yaml

.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.3814723.svg
:target: https://doi.org/10.5281/zenodo.3814723

Expand All @@ -13,9 +14,10 @@ NOnLinear measures for Dynamical Systems (nolds)
.. image:: https://api.codacy.com/project/badge/Grade/42c30d253b384e87a86b70c8fa0da6b2
:target: https://www.codacy.com/app/christopher.schoelzel/nolds?utm_source=github.com&utm_medium=referral&utm_content=CSchoel/nolds&utm_campaign=Badge_Grade

.. image:: https://codecov.io/gh/CSchoel/nolds/branch/master/graph/badge.svg
.. image:: https://codecov.io/gh/CSchoel/nolds/branch/master/graph/badge.svg?token=Xgr82QKhFi
:target: https://codecov.io/gh/CSchoel/nolds


Nolds is a small numpy-based library that provides an implementation and a learning resource for nonlinear measures for dynamical systems based on one-dimensional time series.

Currently the following measures are implemented:
Expand Down
35 changes: 27 additions & 8 deletions nolds/measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -1825,7 +1825,7 @@ def mfhurst_dm(data, qvals=None, max_dists=range(5, 20), detrend=True,
return [mH, sH]


def corr_dim(data, emb_dim, rvals=None, dist=rowwise_euclidean,
def corr_dim(data, emb_dim, lag=1, rvals=None, dist=rowwise_euclidean,
fit="RANSAC", debug_plot=False, debug_data=False, plot_file=None):
"""
Calculates the correlation dimension with the Grassberger-Procaccia algorithm
Expand Down Expand Up @@ -1857,9 +1857,12 @@ def corr_dim(data, emb_dim, rvals=None, dist=rowwise_euclidean,
This version of the algorithm is created for one-dimensional (scalar) time
series. Therefore, before calculating C(r), a delay embedding of the time
series is performed to yield emb_dim dimensional vectors
Y_i = [X_i, X_(i+1), X_(i+2), ... X_(i+embd_dim-1)]. Choosing a higher
value for emb_dim allows to reconstruct higher dimensional dynamics and
avoids "systematic errors due to corrections to scaling".
Y_i = [X_i, X_(i+1*lag), X_(i+2*lag), ... X_(i+(embd_dim-1)*lag)]. Choosing
a higher value for emb_dim allows to reconstruct higher dimensional dynamics
and avoids "systematic errors due to corrections to scaling". Choosing a
higher value for lag allows to avoid overestimating correlation because
X_i ~= X_i+1, but it should also not be set too high to not underestimate
correlation due to exponential divergence of trajectories in chaotic systems.
References:
.. [cd_1] P. Grassberger and I. Procaccia, “Characterization of strange
Expand Down Expand Up @@ -1911,19 +1914,35 @@ def corr_dim(data, emb_dim, rvals=None, dist=rowwise_euclidean,
``csums`` are the corresponding log(C(r)) and ``poly`` are the line
coefficients (``[slope, intercept]``)
"""
# TODO determine lag in units of time instead of number of datapoints
data = np.asarray(data)

# TODO what are good values for r?
# TODO do this for multiple values of emb_dim?
if rvals is None:
sd = np.std(data, ddof=1)
rvals = logarithmic_r(0.1 * sd, 0.5 * sd, 1.03)
n = len(data)
orbit = delay_embedding(data, emb_dim, lag=1)
dists = np.array([dist(orbit, orbit[i]) for i in range(len(orbit))])
orbit = delay_embedding(data, emb_dim, lag=lag)
n = len(orbit)
dists = np.zeros((len(orbit), len(orbit)), dtype=np.float64)
for i in range(len(orbit)):
# calculate distances between X_i and X_i+1, X_i+2, ... , X_n-1
# NOTE: strictly speaking, [cd_1] does not specify to exclude self-matches
# however, since both [cd_2] and [cd_3] specify to only compare i with j != i
# or j > i respectively, it is safe to assume that this was an oversight in
# [cd_1]
d = dist(orbit[i+1:], orbit[i])
dists[i+1:,i] = d # fill column i
dists[i,i+1:] = d # fill row i
csums = []
for r in rvals:
s = 1.0 / (n * (n - 1)) * np.sum(dists < r)
# NOTE: The [cd_1] and [cd_2] both use the factor 1/N^2 here.
# However, since we only use these values to fit a line in a log-log plot
# any multiplicative constant doesn't change the result since it will
# only result in an offset on the y-axis. Also, [cd_3] has a point here
# in that if we exclude self-matches in the numerator, it makes sense to
# also exclude self-matches from the denominator.
s = 1.0 / (n * (n - 1)) * np.sum(dists <= r)
csums.append(s)
csums = np.array(csums)
# filter zeros from csums
Expand Down
21 changes: 18 additions & 3 deletions nolds/test_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import numpy as np

# import internal module to test helping functions
from . import measures as nolds
from . import datasets
from nolds import measures as nolds
from nolds import datasets
import unittest
import warnings

Expand Down Expand Up @@ -333,7 +333,22 @@ def test_corr_dim(self):
cd = nolds.corr_dim(data, 4, fit="poly")
self.assertAlmostEqual(cd, 0.5, delta=0.15)
# TODO test example for cd > 1
def test_corr_dim_logistic(self):

def test_lorenz(self):
# NOTE: The settings of n, discard, lag, emb_dim, and rvals were determined
# experimentally to find the smallest dataset that yields the results reported
# by Grassberger and Procaccia (1983).
discard = 5000
n = 5000
lag = 5
emb_dim = 5
data = datasets.lorenz_euler(n + discard, 10, 28, 8/3, start=(1,1,1), dt=0.012)
x = data[discard:,1]
rvals = nolds.logarithmic_r(1, np.e, 1.1) # determined experimentally
cd = nolds.corr_dim(x, emb_dim, fit="poly", rvals=rvals, lag=5, debug_plot=True)
self.assertAlmostEqual(cd, 2.05, delta=0.1)

def test_logistic(self):
# TODO replicate tests with logistic map from grassberger-procaccia
pass

Expand Down

0 comments on commit 5da1ee7

Please sign in to comment.