Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0501835
Additional test data files
Joseph-Ellaway Mar 8, 2026
389225a
Expected compressed JSON files
Joseph-Ellaway Mar 8, 2026
f4abbd2
Test for compressed cif and JSONs
Joseph-Ellaway Mar 8, 2026
a9cb6c9
Enable compression for assembly parser
Joseph-Ellaway Mar 8, 2026
7c21de0
Test files for compressed interfaces
Joseph-Ellaway Mar 8, 2026
d8a76e0
Tests for compressing interfaces
Joseph-Ellaway Mar 8, 2026
dd6c746
Test for compressed interface summary JSON
Joseph-Ellaway Mar 8, 2026
b1dca9c
Test files for interface summary compression
Joseph-Ellaway Mar 8, 2026
ec4d40b
Compress JSONs for interface summary results
Joseph-Ellaway Mar 8, 2026
5d91d5b
Scratch folder
Joseph-Ellaway Mar 8, 2026
cbefbd3
List file parsers updated to allow compression
Joseph-Ellaway Mar 8, 2026
8dedd8b
CLI entry point updated to take compression flags
Joseph-Ellaway Mar 8, 2026
00a19cf
Updated output test files for component_id
Joseph-Ellaway Mar 8, 2026
41fc58e
Increased test verbosity
Joseph-Ellaway Mar 8, 2026
c3caa92
Models updated to accept component_id
Joseph-Ellaway Mar 8, 2026
e6d2548
component_id fed into models from parsers
Joseph-Ellaway Mar 8, 2026
332eb72
Version bump
Joseph-Ellaway Mar 8, 2026
89ae27d
Unnecessary compressionlevel removed
Joseph-Ellaway Mar 8, 2026
7edd177
Update pisa_utils/parsers.py
Joseph-Ellaway Mar 8, 2026
b2cee91
Update pisa_utils/run.py
Joseph-Ellaway Mar 8, 2026
c42a911
Update pisa_utils/run.py
Joseph-Ellaway Mar 8, 2026
7643eaf
Fix missing `.gz` extension on `path_assembly_json` when `--compress_…
Copilot Mar 8, 2026
dd45da9
Linting fix
Joseph-Ellaway Mar 8, 2026
b5f062d
Abstract gzip and saving in XML->JSON parsers
Joseph-Ellaway Mar 8, 2026
008a43f
Fix CompileInterfaceSummaryJSON.parse() to respect compressed mode (#45)
Copilot Mar 9, 2026
8898816
Tests fixed, point to compressed int. folder
Joseph-Ellaway Mar 9, 2026
35dfb72
Tests updated to compress on the fly
Joseph-Ellaway Mar 10, 2026
c252cb5
Compressed files removed
Joseph-Ellaway Mar 10, 2026
518ebc3
Compress with Python native approach
Joseph-Ellaway Mar 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,4 @@ tests/data/actual_output/
tests/data/actual_output/*
pisa_cfg_tmp
tmp_example_data/
scratch/
26 changes: 26 additions & 0 deletions pisa_utils/models/data_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
COMPLEX_TYPE,
COMPLEXES_IN_ASU,
COMPLEXES_IN_PQS_SET,
COMPONENT_ID,
COMPONENT_NUMBER,
COMPONENT_TOTAL_ATOMS,
COMPONENT_TOTAL_RESIDUES,
Expand Down Expand Up @@ -391,6 +392,11 @@ def ensure_residues_is_list(cls, v: Union[dict, list[dict]]) -> list[dict]:

class Molecule(StrictModel):
mol_id: int = Field(..., validation_alias="id")
component_id: str = Field(
...,
description=COMPONENT_ID,
examples=["Protein"],
)
auth_asym_id: str = Field(
...,
description=AUTH_ASYM_ID,
Expand Down Expand Up @@ -1536,6 +1542,11 @@ class Component(StrictModel):
molecule_type_id: int = Field(
..., description=COMPONENT_TYPE_ID, examples=[1], validation_alias="monomer_id"
)
component_id: str = Field(
...,
description=COMPONENT_ID,
examples=["A", "[NA]A:301", "o7", "[GOL]A:302"],
)
auth_asym_id: str = Field(
...,
description=AUTH_ASYM_ID,
Expand Down Expand Up @@ -1618,6 +1629,7 @@ class Components(StrictModel):
Component(
serial_number=1,
monomer_id=1,
component_id="A",
chain_id="A",
monomer_class="Protein",
total_atoms=1846,
Expand All @@ -1630,6 +1642,7 @@ class Components(StrictModel):
Component(
serial_number=2,
monomer_id=2,
component_id="B",
chain_id="B",
monomer_class="Protein",
total_atoms=1500,
Expand All @@ -1639,6 +1652,19 @@ class Components(StrictModel):
area=9500.3,
delta_g=-180.5,
),
Component(
serial_number=3,
monomer_id=3,
component_id="[NA]A:301",
chain_id="A",
monomer_class="Ligand",
total_atoms=10,
total_residues=1,
surface_atoms=5,
surface_residues=1,
area=50.2,
delta_g=-5.0,
),
]
],
)
Expand Down
3 changes: 3 additions & 0 deletions pisa_utils/models/labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@
VISUAL_ID = """PISA-defined chain ID used in the 'formula' field. Chains with
sufficiently similar structural similarity are assigned the same visual ID."""

COMPONENT_ID = """Component identifier. For macromolecular components, this is the
auth_asym_id. For ligands, this is the CCD, auth_asym_id and sequence IDs."""

COMPONENT_NUMBER = """Unique ID corresponding to component instance in the (crystal)
structure"""

Expand Down
24 changes: 24 additions & 0 deletions pisa_utils/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,27 @@ class DataModes(str, Enum):
ASSEMBLIES = "assemblies"
INTERFACES = "interfaces"
MONOMERS = "monomers"


class OutputFormats(str, Enum):
JSON = ".json"
JSON_GZ = ".json.gz"
XML = ".xml"
XML_GZ = ".xml.gz"


class AllowedModelFileFormats(str, Enum):
# Text-based formats
CIF = ".cif"
PDB = ".pdb"
MMCIF = ".mmcif"
ENT = ".ent"

# Binary formats
BCIF = ".bcif"

# Compressed formats
CIF_GZ = ".cif.gz"
PDB_GZ = ".pdb.gz"
MMCIF_GZ = ".mmcif.gz"
ENT_GZ = ".ent.gz"
Loading