Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Wrap fitcircle #1550

Draft
wants to merge 35 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
7c1ae68
add fitcircle.py
willschlitzer Sep 26, 2021
41a3ded
add fitcircle imports
willschlitzer Sep 28, 2021
7e2dfee
add test_fitcircle
willschlitzer Sep 28, 2021
a239841
add if statements and df column names
willschlitzer Sep 28, 2021
d526eab
update formatting in fixture_data
willschlitzer Sep 28, 2021
f02466e
formatting
willschlitzer Sep 29, 2021
0355a7d
add functions
willschlitzer Sep 29, 2021
9bf8331
add test info to test_fitcircle_no_outfile
willschlitzer Sep 29, 2021
0360abc
add if statement for normalize
willschlitzer Sep 29, 2021
1e86759
add tests
willschlitzer Sep 29, 2021
e5b7a91
add fitcircle to index.rst
willschlitzer Nov 6, 2021
3cde6d8
run make format
willschlitzer Nov 6, 2021
c199d09
remove unused imports
willschlitzer Nov 6, 2021
899a93f
Merge branch 'main' into wrap/fitcircle
willschlitzer Nov 6, 2021
d131d2f
change top docstring
willschlitzer Nov 6, 2021
3e57da6
Merge remote-tracking branch 'origin/wrap/fitcircle' into wrap/fitcircle
willschlitzer Nov 6, 2021
d4eebb7
fix variable names
willschlitzer Jan 14, 2022
913a70b
Merge branch 'main' into wrap/fitcircle
willschlitzer Jan 14, 2022
f0033fa
Merge branch 'main' into wrap/fitcircle
willschlitzer Apr 13, 2022
d41d90e
Apply suggestions from code review
willschlitzer Apr 19, 2022
0c11f2c
Update pygmt/src/fitcircle.py
willschlitzer Apr 21, 2022
a6f6265
Update pygmt/src/fitcircle.py
willschlitzer May 2, 2022
8ab458a
Merge branch 'main' into wrap/fitcircle
willschlitzer May 2, 2022
bbbb84d
run make format
willschlitzer May 2, 2022
00ffffd
Apply suggestions from code review
willschlitzer May 2, 2022
af7c4f9
add normalize and small_circle parameters
willschlitzer May 5, 2022
8803d43
Merge branch 'main' into wrap/fitcircle
willschlitzer May 5, 2022
d166335
Apply suggestions from code review
willschlitzer May 6, 2022
9227dbe
change "normalize" to "norm"
willschlitzer May 6, 2022
a10ed09
Merge branch 'main' into wrap/fitcircle
willschlitzer May 6, 2022
d75863d
Apply suggestions from code review
willschlitzer May 23, 2022
bfa8fa6
Merge branch 'main' into wrap/fitcircle
willschlitzer May 23, 2022
fa0aa4b
add docstring for "data"
willschlitzer May 23, 2022
e885b54
Update pygmt/src/fitcircle.py
willschlitzer May 24, 2022
c890477
Merge branch 'main' into wrap/fitcircle
willschlitzer Dec 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ Operations on tabular data
blockmedian
blockmode
filter1d
fitcircle
nearneighbor
project
select
Expand Down
1 change: 1 addition & 0 deletions pygmt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
config,
dimfilter,
filter1d,
fitcircle,
grd2cpt,
grd2xyz,
grdclip,
Expand Down
1 change: 1 addition & 0 deletions pygmt/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pygmt.src.contour import contour
from pygmt.src.dimfilter import dimfilter
from pygmt.src.filter1d import filter1d
from pygmt.src.fitcircle import fitcircle
from pygmt.src.grd2cpt import grd2cpt
from pygmt.src.grd2xyz import grd2xyz
from pygmt.src.grdclip import grdclip
Expand Down
131 changes: 131 additions & 0 deletions pygmt/src/fitcircle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
"""
fitcircle - Find mean position and great [or small] circle fit to points on
sphere.
"""
import warnings

import pandas as pd
from pygmt.clib import Session
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import GMTTempFile, build_arg_string, fmt_docstring, use_alias


@fmt_docstring
@use_alias(
L="norm",
S="small_circle",
V="verbose",
)
def fitcircle(data, output_type="pandas", outfile=None, **kwargs):
r"""
Find mean position and great [or small] circle fit to points on sphere.

**fitcircle** reads lon,lat [or lat,lon] values from the first two
columns of the table. These are converted to
Cartesian three-vectors on the unit sphere. Then two locations are
found: the mean of the input positions, and the pole to the great circle
which best fits the input positions. The user may choose one or both of
two possible solutions to this problem. When the data are closely grouped
along a great circle both solutions are similar. If the data have large
dispersion, the pole to the great circle will be less well determined
than the mean. Compare both solutions as a qualitative check.

Setting ``norm`` to **1** approximates the
minimization of the sum of absolute values of cosines of angular
distances. This solution finds the mean position as the Fisher average
of the data, and the pole position as the Fisher average of the
cross-products between the mean and the data. Averaging cross-products
gives weight to points in proportion to their distance from the mean,
analogous to the "leverage" of distant points in linear regression in
the plane.

Setting ``norm`` to **2** approximates the
minimization of the sum of squares of cosines of angular distances. It
creates a 3 by 3 matrix of sums of squares of components of the data
vectors. The eigenvectors of this matrix give the mean and pole
locations. This method may be more subject to roundoff errors when there
are thousands of data. The pole is given by the eigenvector
corresponding to the smallest eigenvalue; it is the least-well
represented factor in the data and is not easily estimated by either
method.

Full option list at :gmt-docs:`fitcircle.html`

{aliases}

Parameters
----------
data : str or list or {table-like}
Pass in either a file name to an ASCII data table, a Python list, a 2D
{table-classes} containing longitude and latitude values.
output_type : str
Determine the format the xyz data will be returned in [Default is
``pandas``]:

- ``numpy`` - :class:`numpy.ndarray`
- ``pandas``- :class:`pandas.DataFrame`
- ``file`` - ASCII file (requires ``outfile``)
outfile : str
The file name for the output ASCII file.
norm : int or bool
Specify the desired *norm* as **1** or **2**\ , or use ``True``
or **3** to see both solutions.
small_circle : float
Attempt to fit a small circle instead of a great circle. The pole
will be constrained to lie on the great circle connecting the pole
of the best-fit great circle and the mean location of the data.
Optionally append the desired fixed latitude of the small circle
[Default will determine the optimal latitude].
{V}

Returns
-------
ret : pandas.DataFrame or numpy.ndarray or None
Return type depends on ``outfile`` and ``output_type``:

- None if ``outfile`` is set (output will be stored in file set by
``outfile``)
- :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is
not set (depends on ``output_type`` [Default is
:class:`pandas.DataFrame`])

"""
if kwargs.get("L") is None:
raise GMTInvalidInput("Pass a required argument to 'norm'.")
if output_type not in ["numpy", "pandas", "file"]:
raise GMTInvalidInput("Must specify format as either numpy, pandas, or file.")
if outfile is not None and output_type != "file":
msg = (
f"Changing `output_type` of fitcirle from '{output_type}' to 'file' "
"since `outfile` parameter is set. Please use `output_type='file'` "
"to silence this warning."
)
warnings.warn(msg, category=RuntimeWarning, stacklevel=2)
output_type = "file"
elif output_type == "file" and outfile is None:
raise GMTInvalidInput("Must specify outfile for ASCII output.")
with GMTTempFile() as tmpfile:
with Session() as lib:
file_context = lib.virtualfile_from_data(check_kind="vector", data=data)
with file_context as infile:
if outfile is None:
outfile = tmpfile.name
lib.call_module(
module="fitcircle",
args=build_arg_string(kwargs, infile=infile, outfile=outfile),
)

# Read temporary csv output to a pandas table
if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame
result = pd.read_csv(
tmpfile.name,
sep="\t",
names=["longitude", "latitude", "method"],
comment=">",
)
Comment on lines +119 to +125
Copy link
Member

@seisman seisman May 31, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

$ gmt fitcircle @sat_03.txt -L2     
330.249933144	-18.4048179232	Points read: 703 Average Position (Flat Earth)
330.169184777	-18.4206532702	L2 Average Position (Eigenval Method)
52.7451972868	21.2040074195	L2 N Hemisphere Great Circle Pole (Eigenval Method)
232.745197287	-21.2040074195	L2 S Hemisphere Great Circle Pole (Eigenval Method)
$ gmt fitcircle @sat_03.txt -L2 -Fms 
330.169184777	-18.4206532702	232.745197287	-21.2040074195

As you can see, the output of fitcircle is quite different with or without -F option, so the read_csv call doesn't work with -F.

I'm wondering should we return a dict instead, e.g.,:

>>> solution = fitcircle(data="@sat_03.txt", norm=2)
>>> print(solution)
{'average': {'latitude': -18.4048179232, 'longitude': 330.249933144},
 'L2_average': {'latitude': -18.4206532702, 'longitude': 330.169184777},
 'L2_north': {'latitude': 21.2040074195, 'longitude': 52.7451972868},
 'L2_south': {'latitude': -21.2040074195, 'longitude': 232.745197287}}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can work on this, but I'm about to move so I may not be too speedy

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No hurry. Better to have more discussions about the output before working on it.

elif outfile != tmpfile.name: # return None if outfile set, output in outfile
result = None

if output_type == "numpy":
result = result.to_numpy()
return result
111 changes: 111 additions & 0 deletions pygmt/tests/test_fitcircle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""
Tests for fitcircle.
"""

import os

import numpy as np
import numpy.testing as npt
import pandas as pd
import pytest
from pygmt import fitcircle
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import GMTTempFile
from pygmt.src import which


@pytest.fixture(scope="module", name="data")
def fixture_data():
"""
Load the sample data from the sat_03 remote file.
"""
fname = which("@sat_03.txt", download="c")
data = pd.read_csv(
fname,
header=None,
skiprows=1,
sep="\t",
names=["longitutde", "latitude", "z"],
)
return data


def test_fitcircle_no_outfile(data):
"""
Test fitcircle with no set outfile.
"""
result = fitcircle(data=data, norm=True)
assert result.shape == (7, 3)
# Test longitude results
npt.assert_allclose(result.longitude.min(), 52.7434273422)
npt.assert_allclose(result.longitude.max(), 330.243649573)
npt.assert_allclose(result.longitude.mean(), 223.078116476)
npt.assert_allclose(result.longitude.median(), 232.7449849)
# Test latitude results
npt.assert_allclose(result.latitude.min(), -21.2085369093)
npt.assert_allclose(result.latitude.max(), 21.2085369093)
npt.assert_allclose(result.latitude.mean(), -7.8863683297)
npt.assert_allclose(result.latitude.median(), -18.406777)


def test_fitcircle_file_output(data):
"""
Test that fitcircle returns a file output when it is specified.
"""
with GMTTempFile(suffix=".txt") as tmpfile:
result = fitcircle(
data=data, norm=True, outfile=tmpfile.name, output_type="file"
)
assert result is None # return value is None
assert os.path.exists(path=tmpfile.name) # check that outfile exists


def test_fitcircle_invalid_format(data):
"""
Test that fitcircle fails with an incorrect format for output_type.
"""
with pytest.raises(GMTInvalidInput):
fitcircle(data=data, norm=True, output_type="a")


def test_fitcircle_no_normalize(data):
"""
Test that fitcircle fails with an argument is missing for normalize.
"""
with pytest.raises(GMTInvalidInput):
fitcircle(data=data)


def test_fitcircle_no_outfile_specified(data):
"""
Test that fitcircle fails when outpput_type is set to 'file' but no output
file name is specified.
"""
with pytest.raises(GMTInvalidInput):
fitcircle(data=data, norm=True, output_type="file")


def test_filter1d_outfile_incorrect_output_type(data):
"""
Test that filter1d raises a warning when an outfile filename is set but the
output_type is not set to 'file'.
"""
with pytest.warns(RuntimeWarning):
with GMTTempFile(suffix=".txt") as tmpfile:
result = fitcircle(
data=data, norm=True, outfile=tmpfile.name, output_type="numpy"
)
assert result is None # return value is None
assert os.path.exists(path=tmpfile.name) # check that outfile exists


def test_fitcircle_format(data):
"""
Test that correct formats are returned.
"""
circle_default = fitcircle(data=data, norm=True)
assert isinstance(circle_default, pd.DataFrame)
circle_array = fitcircle(data=data, norm=True, output_type="numpy")
assert isinstance(circle_array, np.ndarray)
circle_df = fitcircle(data=data, norm=True, output_type="pandas")
assert isinstance(circle_df, pd.DataFrame)