Skip to content

Commit

Permalink
Merge pull request #192 from dstansby/VDRInfo
Browse files Browse the repository at this point in the history
Add VDRInfo
  • Loading branch information
dstansby committed May 22, 2023
2 parents 658341b + 55f77a0 commit 8db0ce1
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 69 deletions.
2 changes: 1 addition & 1 deletion cdflib/cdf_to_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def _convert_cdf_to_dicts(filename, to_datetime=False, to_unixtime=False):
var_atts_temp[att] = cdf_file.attget(att, var_name)
variable_properties[var_name] = cdf_file.varinq(var_name)
# Gather the actual variable data
if variable_properties[var_name]["Last_Rec"] < 0:
if variable_properties[var_name].Last_Rec < 0:
var_data_temp[var_name] = np.array([])
else:
var_data_temp[var_name] = cdf_file.varget(var_name)
Expand Down
94 changes: 36 additions & 58 deletions cdflib/cdfread.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,15 @@
import numpy as np

import cdflib.epochs as epoch
from cdflib.dataclasses import AEDR, VDR, ADRInfo, AttData, CDRInfo, GDRInfo
from cdflib.dataclasses import (
AEDR,
VDR,
ADRInfo,
AttData,
CDRInfo,
GDRInfo,
VDRInfo,
)

__all__ = ["CDF"]

Expand Down Expand Up @@ -227,63 +235,33 @@ def cdf_info(self):
mycdf_info["LeapSecondUpdated"] = self._leap_second_updated
return mycdf_info

def varinq(self, variable: str) -> Dict[str, Any]:
def varinq(self, variable: str) -> VDRInfo:
"""
Returns a dictionary that shows the basic variable information.
Get basic variable information.
This information includes
+-----------------+--------------------------------------------------------------------------------+
| ['Variable'] | the name of the variable |
+-----------------+--------------------------------------------------------------------------------+
| ['Num'] | the variable number |
+-----------------+--------------------------------------------------------------------------------+
| ['Var_Type'] | the variable type: zVariable or rVariable |
+-----------------+--------------------------------------------------------------------------------+
| ['Data_Type'] | the variable's CDF data type |
+-----------------+--------------------------------------------------------------------------------+
| ['Num_Elements']| the number of elements of the variable |
+-----------------+--------------------------------------------------------------------------------+
| ['Num_Dims'] | the dimensionality of the variable record |
+-----------------+--------------------------------------------------------------------------------+
| ['Dim_Sizes'] | the shape of the variable record |
+-----------------+--------------------------------------------------------------------------------+
| ['Sparse'] | the variable's record sparseness |
+-----------------+--------------------------------------------------------------------------------+
| ['Last_Rec'] | the maximum written record number (0-based) |
+-----------------+--------------------------------------------------------------------------------+
| ['Dim_Vary'] | the dimensional variance(s) |
+-----------------+--------------------------------------------------------------------------------+
| ['Rec_Vary'] | the record variance |
+-----------------+--------------------------------------------------------------------------------+
| ['Pad'] | the padded value if set |
+-----------------+--------------------------------------------------------------------------------+
| ['Compress'] | the GZIP compression level, 0 to 9. 0 if not compressed |
+-----------------+--------------------------------------------------------------------------------+
| ['Block_Factor']| the blocking factor if the variable is compressed |
+-----------------+--------------------------------------------------------------------------------+
Returns
-------
VDRInfo
"""
vdr_info = self.vdr_info(variable)

var: Dict[str, Any] = {}
var["Variable"] = vdr_info.name
var["Num"] = vdr_info.variable_number
var["Var_Type"] = self._variable_token(vdr_info.section_type)
var["Data_Type"] = vdr_info.data_type
var["Data_Type_Description"] = self._datatype_token(vdr_info.data_type)
var["Num_Elements"] = vdr_info.num_elements
var["Num_Dims"] = vdr_info.num_dims
var["Dim_Sizes"] = vdr_info.dim_sizes
var["Sparse"] = self._sparse_token(vdr_info.sparse)
var["Last_Rec"] = vdr_info.max_rec
var["Rec_Vary"] = vdr_info.record_vary
var["Dim_Vary"] = vdr_info.dim_vary
if vdr_info.pad is not None:
var["Pad"] = vdr_info.pad
var["Compress"] = vdr_info.compression_level
if vdr_info.blocking_factor is not None:
var["Block_Factor"] = vdr_info.blocking_factor

return var
return VDRInfo(
vdr_info.name,
vdr_info.variable_number,
self._variable_token(vdr_info.section_type),
vdr_info.data_type,
self._datatype_token(vdr_info.data_type),
vdr_info.num_elements,
vdr_info.num_dims,
vdr_info.dim_sizes,
self._sparse_token(vdr_info.sparse),
vdr_info.max_rec,
vdr_info.record_vary,
vdr_info.dim_vary,
vdr_info.compression_level,
vdr_info.pad,
vdr_info.blocking_factor,
)

def attinq(self, attribute: Union[str, int]) -> ADRInfo:
"""
Expand Down Expand Up @@ -1935,11 +1913,11 @@ def _findtimerecords(
vdr_info = self.varinq(epoch)
if vdr_info is None:
raise ValueError("Epoch not found")
if vdr_info["Data_Type"] == 31 or vdr_info["Data_Type"] == 32 or vdr_info["Data_Type"] == 33:
if vdr_info.Data_Type == 31 or vdr_info.Data_Type == 32 or vdr_info.Data_Type == 33:
epochtimes = self.varget(epoch)
else:
vdr_info = self.varinq(var_name)
if vdr_info["Data_Type"] == 31 or vdr_info["Data_Type"] == 32 or vdr_info["Data_Type"] == 33:
if vdr_info.Data_Type == 31 or vdr_info.Data_Type == 32 or vdr_info.Data_Type == 33:
epochtimes = self.varget(var_name)
else:
# acquire depend_0 variable
Expand All @@ -1952,14 +1930,14 @@ def _findtimerecords(
)

vdr_info = self.varinq(dependVar.Data)
if vdr_info["Data_Type"] != 31 and vdr_info["Data_Type"] != 32 and vdr_info["Data_Type"] != 33:
if vdr_info.Data_Type != 31 and vdr_info.Data_Type != 32 and vdr_info.Data_Type != 33:
raise ValueError(
"Corresponding variable from 'DEPEND_0' attribute "
"for variable: {}".format(var_name) + " is not a CDF epoch type"
)
epochtimes = self.varget(dependVar.Data)

return self._findrangerecords(vdr_info["Data_Type"], epochtimes, starttime, endtime)
return self._findrangerecords(vdr_info.Data_Type, epochtimes, starttime, endtime)

def _findrangerecords(
self, data_type: int, epochtimes: epoch.epochs_type, starttime: datetime.datetime, endtime: datetime.datetime
Expand Down Expand Up @@ -2172,7 +2150,7 @@ def _file_or_url_or_s3_handler(

return bdata

def _unstream_file(self, f) -> None: # type: ignore
def _unstream_file(self, f) -> None: # type: ignore
"""
Typically for S3 or URL, writes the current file stream
into a file in the temporary directory.
Expand Down
50 changes: 50 additions & 0 deletions cdflib/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,56 @@ class GDRInfo:
leapsecond_updated: Optional[int] = None


@dataclass
class VDRInfo:
"""
Variable data record info.
Attributes
----------
Variable : str
Name of the variable.
Num : int
Variable number.
Var_Type : str
Variable type: zVariable or rVariable.
Data_Type : str
Variable CDF data type.
Num_Elements : int
Number of elements of the variable.
Num_Dims : int
Dimensionality of variable record.
Dim_sizes :
Shape of the variable record.
Last_Rec :
Maximum written variable number (0-based).
Dim_Vary :
Dimensional variance(s).
Rec_Vary :
Record variance.
Pad :
Padded value (if set).
Block_Factor:
Blocking factor (if variable is compressed).
"""

Variable: str
Num: int
Var_Type: str
Data_Type: int
Data_Type_Description: str
Num_Elements: int
Num_Dims: int
Dim_Sizes: List[int]
Sparse: str
Last_Rec: int
Rec_Vary: int
Dim_Vary: Union[List[int], List[bool]]
Compress: int
Pad: Optional[int] = None
Block_Factor: Optional[int] = None


@dataclass
class AEDR:
entry: np.ndarray
Expand Down
1 change: 1 addition & 0 deletions doc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Breaking changes
- ``CDF.attinq`` now returns a dataclass instead of a dict.
- ``CDF.attget`` now returns a dataclass instead of a dict.
- ``CDF.varget`` now returns a dataclass instead of a dict.
- ``CDF.varinq`` now returns a dataclass instead of a dict.

Bugfixes
--------
Expand Down
20 changes: 10 additions & 10 deletions tests/test_cdfwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def test_create_zvariable(tmp_path):

# Test CDF info
varinfo = reader.varinq("Variable1")
assert varinfo["Data_Type"] == 1
assert varinfo.Data_Type == 1

var = reader.varget("Variable1")
assert (var == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).all()
Expand All @@ -202,7 +202,7 @@ def test_create_rvariable(tmp_path):

# Test CDF info
varinfo = reader.varinq("Variable1")
assert varinfo["Data_Type"] == 12
assert varinfo.Data_Type == 12

var = reader.varget("Variable1")
for x in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
Expand Down Expand Up @@ -230,7 +230,7 @@ def test_create_zvariable_no_recvory(tmp_path):

# Test CDF info
varinfo = reader.varinq("Variable1")
assert varinfo["Data_Type"] == 8
assert varinfo.Data_Type == 8

var = reader.varget("Variable1")
assert var == 2
Expand Down Expand Up @@ -363,7 +363,7 @@ def test_sparse_virtual_zvariable_blocking(tmp_path):
varinq = reader.varinq("Variable1")
var = reader.varget("Variable1")

pad_num = varinq["Pad"][0]
pad_num = varinq.Pad[0]
assert var[30001] == pad_num
assert var[70001] == 70001

Expand Down Expand Up @@ -399,7 +399,7 @@ def test_sparse_zvariable_blocking(tmp_path):
# tfile = cdf_create(fn, {'rDim_sizes': [1]})
varinq = reader.varinq("Variable1")
var = reader.varget("Variable1")
pad_num = varinq["Pad"][0]
pad_num = varinq.Pad[0]

assert var[30001] == pad_num
assert var[70001] == 30000
Expand Down Expand Up @@ -427,7 +427,7 @@ def test_sparse_zvariable_pad(tmp_path):
# Test CDF info
varinq = reader.varinq("Variable1")
var = reader.varget("Variable1")
pad_num = varinq["Pad"][0]
pad_num = varinq.Pad[0]

assert var[100] == pad_num
assert var[3000] == 1
Expand Down Expand Up @@ -456,7 +456,7 @@ def test_sparse_zvariable_previous(tmp_path):
# Test CDF info
varinq = reader.varinq("Variable1")
var = reader.varget("Variable1")
pad_num = varinq["Pad"][0]
pad_num = varinq.Pad[0]

assert var[100] == pad_num
assert var[6001] == var[6000]
Expand Down Expand Up @@ -486,7 +486,7 @@ def test_create_2d_rvariable(tmp_path):

# Test CDF info
varinfo = reader.varinq("Variable1")
assert varinfo["Data_Type"] == 14
assert varinfo.Data_Type == 14

var = reader.varget("Variable1")
for x in [0, 1, 2, 3, 4]:
Expand Down Expand Up @@ -521,7 +521,7 @@ def test_create_2d_rvariable_dimvary(tmp_path):
# Test CDF info
varinfo = reader.varinq("Variable1")

assert varinfo["Data_Type"] == 21
assert varinfo.Data_Type == 21
var = reader.varget("Variable1")
for x in [0, 1, 2, 3, 4]:
assert var[x][0] == 2 * x
Expand Down Expand Up @@ -557,7 +557,7 @@ def test_create_2d_r_and_z_variables(tmp_path):

# Test CDF info
varinfo = reader.varinq("Variable1")
assert varinfo["Data_Type"] == 22
assert varinfo.Data_Type == 22

var = reader.varget("Variable1")
for x in [0, 1, 2, 3, 4]:
Expand Down

0 comments on commit 8db0ce1

Please sign in to comment.