-
Notifications
You must be signed in to change notification settings - Fork 1
/
na1001.py
301 lines (261 loc) · 10.2 KB
/
na1001.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# -*- coding: utf-8 -*-
"""NASA Ames FFI 1001 text file format reader / writer."""
from datetime import datetime, timezone
from pathlib import Path
from typing import Union
import numpy as np
import pandas as pd
import polars as pl
from .na1001_backend import nasa_ames_1001_rw as rw
from .na1001_backend import nasa_ames_1001_tools as tools
###############################################################################
class FFI1001(object):
r"""
A class to work with NASA Ames files with format index 1001.
Parameters
----------
file : str or pathlib.Path or file-like
data source.
sep : str, optional
General delimiter. The default is " ".
sep_data : str, optional
Delimiter used exclusively in data block. The default is "\t".
strip_lines : bool, optional
Remove surrounding whitespaces from all lines before parsing.
The default is True.
auto_nncoml : bool, optional
Automatically determine number of lines in comment blocks.
The default is True.
rmv_repeated_seps : bool, optional
Remove repeated delimiters (e.g. double space). The default is False.
vscale_vmiss_vertical : bool, optional
VSCALE and VMISS parameters are arranged vertically over multiple
lines (1 entry per line) instead of in one line each.
The default is False.
vmiss_to_None : bool, optional
Set True if missing values should be replaced with None. The default is False.
ensure_ascii : bool, optional
Enforce ASCII-decoding of the input. The default is False.
allow_emtpy_data : bool, optional
Allow header-only input. The default is False.
Returns
-------
FFI 1001 class instance.
"""
def __init__(self, file=None, **kwargs):
today = datetime.now(timezone.utc).date()
self.NLHEAD = 14 # minimum number of header lines is 14
self.ONAME = "data origin"
self.ORG = "organization"
self.SNAME = "sampling description"
self.MNAME = "mission name"
self.IVOL = 1
self.NVOL = 1
self.DATE = (1970, 1, 1)
self.RDATE = (today.year, today.month, today.day)
self.DX = 0
self.XNAME = "x name"
self.NV = 1
self.VSCAL = ["1"]
self.VMISS = ["-9999"]
self._VNAME = ["v names"]
self.NSCOML = 0
self._SCOM = ["special comments"]
self.NNCOML = 0
self._NCOM = ["normal comments"]
self._X = [""]
self.V = [[""]]
self._SRC = "path to file"
self._HEADER = "file header"
if file is not None:
self.__from_file(file, **kwargs)
@property
def FFI(self):
"""FFI is always 1001, as the class name indicates..."""
return 1001
@property
def SCOM(self) -> list[str]:
"""Special comments block"""
return self._SCOM
@SCOM.setter
def SCOM(self, value: list[str]):
self._SCOM, self.NSCOML = value, len(value)
self.NLHEAD = 14 + self.NSCOML + self.NNCOML + self.NV
@property
def NCOM(self) -> list[str]:
"""Normal comments block"""
return self._NCOM
@NCOM.setter
def NCOM(self, value: list[str]):
self._NCOM, self.NNCOML = value, len(value)
self.NLHEAD = 14 + self.NSCOML + self.NNCOML + self.NV
@property
def VNAME(self) -> list[str]:
"""Variables name block"""
return self._VNAME
@VNAME.setter
def VNAME(self, value: list[str]):
self._VNAME, self.NV = value, len(value)
self.NLHEAD = 14 + self.NSCOML + self.NNCOML
@property
def X(self) -> list[str]:
"""Independent variable"""
return self._X
@X.setter
def X(self, xarr: list[str]):
self._X = xarr
# calculate dx as unique diffs in X,
# unique with floats might fail, so add a round to 4 decimals:
dx = np.unique(np.diff(np.array(xarr, dtype=float)).round(4))
# let dx be 0 if there's more than one unique diff
dx = dx[0] if dx.size == 1 else 0
# use an integer if dx is close to its integer value, else float:
dx = int(dx) if np.isclose(dx, int(dx)) else dx
self.DX = dx
@property
def V(self) -> list[list[str],]:
"""Dependent variable"""
return self._V
@V.setter
def V(self, vlists: list[list[str],]):
# assert (
# len(vlists) == self.NV
# ), f"try to set {len(vlists)} dependent variables, but VNAMES specify {self.NV}"
self._V = vlists
# ------------------------------------------------------------------------------
def __from_file(self, file: Union[str, Path], **kwargs):
"""Load NASA Ames 1001 from text file."""
nadict = rw.na1001_cls_read(file, **kwargs)
for k in rw.KEYS:
setattr(self, k, nadict[k])
# ------------------------------------------------------------------------------
def to_file(self, file: Union[str, Path], **kwargs):
"""
Write NASA Ames 1001 file from populated ffi_1001 class.
Output text is UTF-8 encoded, to allow for a more contemporary set of characters.
Parameters
----------
file : str or pathlib.Path
filepath and -name of the destination.
sep : str, optional
General delimiter. The default is " ".
sep_data : str, optional
Delimiter to separate data columns. The default is "\t".
overwrite : int, optional
Set to 1 to overwrite existing files. The default is 0 (no overwrite).
verbose : bool, optional
Verbose print output if True. The default is False.
Returns
-------
int
0 -> failed, 1 -> successful write, 2 -> successful overwrite.
"""
return rw.na1001_cls_write(file, self.__dict__, **kwargs)
# ------------------------------------------------------------------------------
def to_dict_nparray(self, **kwargs) -> dict[str, np.ndarray]:
"""
Make dictionary of numpy 1D arrays from FFI1001 class instance.
Parameters
----------
sel_vnames : list of string, optional
VNAMEs to be converted. The default is None.
clean_vnames : boolean, optional
remove whitespaces from variable names. The default is False.
vname_delimiter : string, optional
Where to split entries in VNAME. The default is ';'.
split_idx : int, optional
Which part of split result to use, see splitVname. The default is 0.
xdtype : data type, optional
Data type for independent variable X. The default is np.float.
vdtype : data type, optional
Data type for dependent variable(s). The default is np.float.
vmiss : missing value identifier, optional
The default is np.nan.
Returns
-------
dict
dictionary holding numpy arrays for variables from the na1001 class
instance.
"""
return tools.naDict_2_npndarr(self.__dict__, **kwargs)
# ------------------------------------------------------------------------------
def to_pddf(self, **kwargs) -> pd.DataFrame:
"""
Make a pandas DataFrame from NA 1001 data (X and V).
Parameters
----------
sep_colhdr : str, optional
separator used in column header (last line of NCOM). Default is tab.
idx_colhdr : int, optional
look for column header in NCOM at index idx_colhdr. Default is -1.
clean_colnames : boolean, optional
remove whitespaces from variable names. The default is False.
dtype : numpy array data type, optional
data type to use for conversion to DataFrame. Default is float.
add_datetime_index: boolean, optional
add a DateTime index to the df. The default is False.
Returns
-------
pandas.DataFrame
dataframe with a column for X and one for each parameter in V.
"""
return tools.naDict_2_pddf(self.__dict__, **kwargs)
# ------------------------------------------------------------------------------
def to_poldf(self, **kwargs) -> pl.DataFrame:
"""
Make a polars DataFrame from NA 1001 data (X and V).
Parameters
----------
sep_colhdr : str, optional
separator used in column header (last line of NCOM). Default is tab.
idx_colhdr : int, optional
look for column header in NCOM at index idx_colhdr. Default is -1.
clean_colnames : boolean, optional
remove whitespaces from variable names. The default is False.
add_datetime: boolean, optional
add a DateTime column to the df. The default is False.
nan_to_none: boolean, optional
fill NaN values with polars' Null. The default is False.
Returns
-------
polars.DataFrame
dataframe with a column for X and one for each parameter in V.
"""
return tools.naDict_2_poldf(self.__dict__, **kwargs)
# ------------------------------------------------------------------------------
def __repr__(self) -> str:
s = f"NASA Ames {self.FFI}\n---\n"
s += "".join(
[
f"{k.strip('_')} : {self.__dict__[k]}\n"
for k in (
"_SRC",
"NLHEAD",
"ONAME",
"ORG",
"SNAME",
"MNAME",
"IVOL",
"NVOL",
"DATE",
"RDATE",
"DX",
"XNAME",
"NV",
"VSCAL",
"VMISS",
"_VNAME",
"NSCOML",
"_SCOM",
"NNCOML",
"_NCOM",
)
]
)
return s
def __str__(self) -> str:
s = "NASA Ames 1001\n"
s += f"SRC: {self._SRC}\n---\n"
s += "\n".join(f"{k}: {getattr(self, k)}" for k in ["ONAME", "ORG", "SNAME"])
s += "\n" + ", ".join(f"{k}: {getattr(self, k)}" for k in ["DATE", "RDATE"])
return s