diff --git a/odml/validation.py b/odml/validation.py
index 21498f15..7ea1492b 100644
--- a/odml/validation.py
+++ b/odml/validation.py
@@ -3,6 +3,7 @@
Generic odML validation framework.
"""
+import re
from . import dtypes
LABEL_ERROR = 'error'
@@ -386,3 +387,56 @@ def property_values_check(prop):
Validation.register_handler('property', property_values_check)
+
+
+def property_values_string_check(prop):
+ """
+ PROTOTYPE
+
+ Tests whether values with dtype "string" are maybe of different dtype.
+
+ :param prop: property the validation is applied on.
+ """
+
+ if prop.dtype != "string" or not prop.values:
+ return
+
+ dtype_checks = {
+ 'int': r'^(-+)?\d+$',
+ 'date': r'^\d{2,4}-\d{1,2}-\d{1,2}$',
+ 'datetime': r'^\d{2,4}-\d{1,2}-\d{1,2} \d{2}:\d{2}(:\d{2})?$',
+ 'time': r'^\d{2}:\d{2}(:\d{2})?$',
+ 'float': r'^(-+)?\d+\.\d+$',
+ 'tuple': r'^\((.*?)\)',
+ 'boolean': r'^TRUE|FALSE|True|False|t|f+$',
+ 'text': r'[\r\n]'}
+
+ val_dtypes = []
+
+ for val in prop.values:
+ curr_dtype = "string"
+
+ for check_dtype in dtype_checks.items():
+ if bool(re.compile(check_dtype[1]).match(val.strip())):
+ if check_dtype[0] == "tuple" and val.count(';') > 0:
+ curr_dtype = str(val.count(';') + 1) + "-" + check_dtype[0]
+ else:
+ curr_dtype = check_dtype[0]
+ break
+ if check_dtype[0] == "text" and len(re.findall(check_dtype[1], val.strip())) > 0:
+ curr_dtype = check_dtype[0]
+ break
+
+ val_dtypes += [curr_dtype]
+
+ res_dtype = max(set(val_dtypes), key=val_dtypes.count)
+
+ if len(set(val_dtypes)) > 1:
+ res_dtype = "string"
+
+ if res_dtype != "string":
+ msg = 'Dtype of property "%s" currently is "string", but might fit dtype "%s"!' % (prop.name, res_dtype)
+ yield ValidationError(prop, msg, LABEL_WARNING)
+
+
+Validation.register_handler('property', property_values_string_check)
diff --git a/test/resources/validation_dtypes.json b/test/resources/validation_dtypes.json
new file mode 100644
index 00000000..d3feb9c7
--- /dev/null
+++ b/test/resources/validation_dtypes.json
@@ -0,0 +1,370 @@
+{
+ "Document": {
+ "id": "2c9bddd2-0082-410c-89d4-cd6ea44a404b",
+ "sections": [
+ {
+ "id": "f545ca92-1d4c-4d02-8170-1837b9e16395",
+ "type": "no_dtypes",
+ "name": "no_dtypes",
+ "sections": [
+ {
+ "id": "24d75996-d19d-4138-8d79-8b90d7affbc2",
+ "name": "sec_string",
+ "sections": [],
+ "properties": [
+ {
+ "id": "951db657-ba2d-43b9-9827-f83ef3ce8e64",
+ "name": "words_no",
+ "value": [
+ "hello",
+ "-world",
+ "3",
+ "True"
+ ]
+ }
+ ]
+ },
+ {
+ "id": "2b3540fc-a422-4279-b10d-f87e16e0a2c3",
+ "type": "int",
+ "name": "sec_int",
+ "sections": [],
+ "properties": [
+ {
+ "id": "55829af7-1058-4cd6-8454-de1d77143935",
+ "name": "members_no",
+ "value": [
+ "-13",
+ "101",
+ "-11",
+ "0",
+ "-8"
+ ]
+ }
+ ]
+ },
+ {
+ "id": "725b997a-16af-436f-b6a4-bb7a16822958",
+ "type": "float",
+ "name": "sec_float",
+ "sections": [],
+ "properties": [
+ {
+ "id": "d26b8eed-fd92-4d04-8434-f0846186ceee",
+ "name": "potential_no",
+ "value": [
+ "-4.8",
+ "10.0",
+ "-11.9",
+ "-10.0",
+ "18.0"
+ ]
+ }
+ ]
+ },
+ {
+ "id": "f8a620b1-b71c-42eb-917b-c5879b4f9e96",
+ "type": "date",
+ "name": "sec_date",
+ "sections": [],
+ "properties": [
+ {
+ "id": "9d207f37-ca27-4df7-9409-02e369cced7a",
+ "name": "dates_no",
+ "value": [
+ "1997-12-14",
+ "00-12-14",
+ "89-07-04"
+ ]
+ }
+ ]
+ },
+ {
+ "id": "857f3300-d6ac-4c48-9d95-12a1d70b9092",
+ "type": "datetime",
+ "name": "sec_datetime",
+ "sections": [],
+ "properties": [
+ {
+ "id": "8fa99c25-4cc9-491b-9398-59afb563f103",
+ "name": "datetimes_no",
+ "value": [
+ "97-12-14 11:11:11",
+ "97-12-14 12:12",
+ "1997-12-14 03:03"
+ ]
+ }
+ ]
+ },
+ {
+ "id": "922d2ab9-bfa6-413d-ba8d-f57cc775b6ac",
+ "type": "time",
+ "name": "sec_time",
+ "sections": [],
+ "properties": [
+ {
+ "id": "a275b4cc-e850-4a46-95eb-a01772706468",
+ "name": "times_no",
+ "value": [
+ "11:11:11",
+ "12:12:12",
+ "03:03:03"
+ ]
+ }
+ ]
+ },
+ {
+ "id": "f18728fc-1e75-43dc-810f-19c9c64f29c0",
+ "type": "boolean",
+ "name": "sec_boolean",
+ "sections": [],
+ "properties": [
+ {
+ "id": "4080b4ad-fe08-4abc-8071-a84a8169cf30",
+ "name": "sent_no",
+ "value": [
+ "False",
+ "True",
+ "TRUE",
+ "False",
+ "t"
+ ]
+ }
+ ]
+ },
+ {
+ "id": "8c49b24b-0b86-4dd6-a4da-45e1c2c504c6",
+ "type": "2-tuple",
+ "name": "sec_2_tuple",
+ "sections": [],
+ "properties": [
+ {
+ "id": "4f0cfc1b-0175-46f9-9ad4-62ab19fc6625",
+ "name": "Location_no",
+ "value": [
+ "(39.12; 67.19)",
+ "(39.12; 67.19)",
+ "(39.12; 67.18)"
+ ]
+ }
+ ]
+ },
+ {
+ "id": "62531fb3-8b5a-4490-a421-8e15cb787f5f",
+ "type": "3-tuple",
+ "name": "sec_3_tuple",
+ "sections": [],
+ "properties": [
+ {
+ "id": "45eae816-9023-414d-b15f-ae7a8a845f45",
+ "name": "Coos_no",
+ "value": [
+ "(39.12; 89; 67.19)",
+ "(39.12; 78; 67.19)",
+ "(39.12; 56; 67.18)"
+ ]
+ }
+ ]
+ }
+ ],
+ "properties": []
+ },
+ {
+ "id": "bc217fca-d51a-4d01-b37f-7270f02f88a4",
+ "type": "mislabelled_dtypes",
+ "name": "mislabelled_dtypes",
+ "sections": [
+ {
+ "id": "3fd7dd7e-246f-4002-a577-534792fb5357",
+ "type": "string",
+ "name": "sec_string",
+ "sections": [],
+ "properties": [
+ {
+ "id": "7c987cf5-7bc7-452d-a87f-69a8e68397ea",
+ "name": "words_mislabelled",
+ "value": [
+ "hello",
+ "-world",
+ "3",
+ "True"
+ ],
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "id": "196e9408-3d8a-478a-829d-0834d14c9a32",
+ "type": "int",
+ "name": "sec_int",
+ "sections": [],
+ "properties": [
+ {
+ "id": "c581ffae-8d7f-44e7-92cd-b96c253e680d",
+ "name": "members_mislabelled",
+ "value": [
+ "-13",
+ "101",
+ "-11",
+ "0",
+ "-8"
+ ],
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "id": "fbe32258-9b80-43b4-8399-856780e1c67e",
+ "type": "float",
+ "name": "sec_float",
+ "sections": [],
+ "properties": [
+ {
+ "id": "7302729f-ccc0-463d-8c39-78ec1136237f",
+ "name": "potential_mislabelled",
+ "value": [
+ "-4.8",
+ "10.0",
+ "-11.9",
+ "-10.0",
+ "18.0"
+ ],
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "id": "daa8fc2f-fc44-408f-b824-d6f890974e36",
+ "type": "date",
+ "name": "sec_date",
+ "sections": [],
+ "properties": [
+ {
+ "id": "5818ae19-80d5-42c1-ae05-61a36c2b9546",
+ "name": "dates_mislabelled",
+ "value": [
+ "1997-12-14",
+ "00-12-14",
+ "89-07-04"
+ ],
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "id": "d2d0b323-4701-4fc1-ac9c-9bb9b04c2710",
+ "type": "datetime",
+ "name": "sec_datetime",
+ "sections": [],
+ "properties": [
+ {
+ "id": "2be4f2c2-5364-4c6e-86c7-8b940cc8ed5f",
+ "name": "datetimes_mislabelled",
+ "value": [
+ "97-12-14 11:11:11",
+ "97-12-14 12:12",
+ "1997-12-14 03:03"
+ ],
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "id": "aa38c228-7870-41a0-89bd-f579406468c0",
+ "type": "time",
+ "name": "sec_time",
+ "sections": [],
+ "properties": [
+ {
+ "id": "d20c097f-162c-456a-beb4-0a55e261c9b8",
+ "name": "times_mislabelled",
+ "value": [
+ "11:11:11",
+ "12:12:12",
+ "03:03:03"
+ ],
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "id": "b4d969e7-78b3-4bc3-942b-0126046ba538",
+ "type": "boolean",
+ "name": "sec_boolean",
+ "sections": [],
+ "properties": [
+ {
+ "id": "d7f0cf25-a87a-4b77-a7a5-41c4b31763a5",
+ "name": "sent_mislabelled",
+ "value": [
+ "False",
+ "True",
+ "TRUE",
+ "False",
+ "t"
+ ],
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "id": "fe16efe1-5f94-475e-b652-70b7f5495de5",
+ "type": "text",
+ "name": "sec_text",
+ "sections": [],
+ "properties": [
+ {
+ "id": "ae863fc4-b998-4da1-b586-96c0d0124ddb",
+ "name": "texts_mislabelled",
+ "value": [
+ "lineA \n lineB",
+ "lineC\n lineD",
+ "\nlineE\n lineF"
+ ],
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "id": "513ce09d-2d15-40b2-8d65-10438ba55752",
+ "type": "2-tuple",
+ "name": "sec_2_tuple",
+ "sections": [],
+ "properties": [
+ {
+ "id": "99318b7a-ce26-4763-bd32-9d3dfb4b956a",
+ "name": "Location_mislabelled",
+ "value": [
+ "(39.12; 67.19)",
+ "(39.12; 67.19)",
+ "(39.12; 67.18)"
+ ],
+ "type": "string"
+ }
+ ]
+ },
+ {
+ "id": "f5bb809b-e621-4524-9858-1004e2232a57",
+ "type": "3-tuple",
+ "name": "sec_3_tuple",
+ "sections": [],
+ "properties": [
+ {
+ "id": "1c41cc57-1704-4494-a463-71552a3f90bb",
+ "name": "Coos_mislabelled",
+ "value": [
+ "(39.12; 89; 67.19)",
+ "(39.12; 78; 67.19)",
+ "(39.12; 56; 67.18)"
+ ],
+ "type": "string"
+ }
+ ]
+ }
+ ],
+ "properties": []
+ }
+ ]
+ },
+ "odml-version": "1.1"
+}
\ No newline at end of file
diff --git a/test/resources/validation_dtypes.xml b/test/resources/validation_dtypes.xml
new file mode 100644
index 00000000..a3ecdd9e
--- /dev/null
+++ b/test/resources/validation_dtypes.xml
@@ -0,0 +1,217 @@
+
+
+
+ bfc85613-0e25-44e4-a85a-386b32c3c495
+
+ e72e64cd-e6eb-4399-b679-140d01f87365
+ no_dtypes
+ no_dtypes
+
+ a028642b-bebf-4392-9575-8cd751c3cdea
+ sec_string
+
+ 313d13ce-8a46-44de-a333-71c19f54752c
+ words_no
+ [hello,-world,3,True]
+
+
+
+ 2deb48f3-f23b-427c-99c2-73345d5c34bb
+ int
+ sec_int
+
+ 5b37aa19-76ef-48dd-bbd5-193a47213f36
+ members_no
+ [-13,101,-11,0,-8]
+
+
+
+ 4965ce85-5da7-4411-9e66-6af7066fbc46
+ float
+ sec_float
+
+ 48c06a25-a10a-489a-990a-f592f0d87615
+ potential_no
+ [-4.8,10.0,-11.9,-10.0,18.0]
+
+
+
+ 3d68d344-1fc2-43a5-9561-9fdc52b82fb2
+ date
+ sec_date
+
+ d0780269-20b1-4bc1-a023-4b502ffe63be
+ dates_no
+ [1997-12-14,00-12-14,89-07-04]
+
+
+
+ 9176ec80-7aa2-4a83-acb3-07e9efe7cf66
+ datetime
+ sec_datetime
+
+ 7ad1974a-3102-4e98-b1db-e36143bb8424
+ datetimes_no
+ [97-12-14 11:11:11,97-12-14 12:12,1997-12-14 03:03]
+
+
+
+ 96756686-8652-46f3-9f7e-23137872fc72
+ time
+ sec_time
+
+ 0201b643-a871-4b8b-9e3a-820f43b81f5a
+ times_no
+ [11:11:11,12:12:12,03:03:03]
+
+
+
+ c7a6950f-dbf0-4478-afc3-f5a045f85914
+ boolean
+ sec_boolean
+
+ c550e679-4d5a-42c8-bbac-41adb9553e8c
+ sent_no
+ [False,True,TRUE,False,t]
+
+
+
+ d5269b4d-5ef9-4e35-986a-9780fa96a5d6
+ 2-tuple
+ sec_2_tuple
+
+ ba9976ca-ce85-417b-86a7-18a19122fdc5
+ Location_no
+ [(39.12; 67.19),(39.12; 67.19),(39.12; 67.18)]
+
+
+
+ 67d066e7-ee4c-4ffb-ab8c-4539e6a97fbc
+ 3-tuple
+ sec_3_tuple
+
+ df72cfbb-22fc-4470-bd4c-bf9d1c98b170
+ Coos_no
+ [(39.12; 89; 67.19),(39.12; 78; 67.19),(39.12; 56; 67.18)]
+
+
+
+
+ 2fc36b6f-cf3d-4aef-ae4e-0f56d081c515
+ mislabelled_dtypes
+ mislabelled_dtypes
+
+ 7a7ee997-365d-4b3f-9edd-e7580a2bd76e
+ string
+ sec_string
+
+ 16037a96-072b-431c-ae7b-2d3312eb8f42
+ words_mislabelled
+ [hello,-world,3,True]
+ string
+
+
+
+ 65fe690a-9a0b-4557-8271-e5f10956b4e0
+ int
+ sec_int
+
+ 0c78a624-f60c-4555-8440-2299aaead89d
+ members_mislabelled
+ [-13,101,-11,0,-8]
+ string
+
+
+
+ fa8b21aa-52d0-414e-b93c-c2197f3020cf
+ float
+ sec_float
+
+ 50af87b7-4797-40fa-850a-c8033ba0b85b
+ potential_mislabelled
+ [-4.8,10.0,-11.9,-10.0,18.0]
+ string
+
+
+
+ 6e56b7da-c779-499d-a4fe-92ed48750421
+ date
+ sec_date
+
+ 2aede904-71a3-4897-8614-1dcdd9ed0ddf
+ dates_mislabelled
+ [1997-12-14,00-12-14,89-07-04]
+ string
+
+
+
+ 7dec0155-657d-4cba-8c06-8be2ceaf4049
+ datetime
+ sec_datetime
+
+ dc50b6ac-0993-432e-b401-06d13ed9e720
+ datetimes_mislabelled
+ [97-12-14 11:11:11,97-12-14 12:12,1997-12-14 03:03]
+ string
+
+
+
+ d0278536-a486-43c2-9708-ef16efc7440a
+ time
+ sec_time
+
+ afda136e-70e1-40ee-abdd-0e80f56cf624
+ times_mislabelled
+ [11:11:11,12:12:12,03:03:03]
+ string
+
+
+
+ aea047b0-abea-4c6c-9a8c-5b6089fde35c
+ boolean
+ sec_boolean
+
+ d959337d-ec34-468e-a374-1b1213fa23d5
+ sent_mislabelled
+ [False,True,TRUE,False,t]
+ string
+
+
+
+ 1b078e40-1e23-4a74-bf34-c948c46b8bba
+ text
+ sec_text
+
+ 4b7430d6-ba74-4f66-88e5-8009bd289815
+ texts_mislabelled
+ ["lineA
+ lineB","lineC
+ lineD","lineE
+ lineF"]
+ string
+
+
+
+ fcf33912-8635-4495-a41e-4f1f92e0dae0
+ 2-tuple
+ sec_2_tuple
+
+ d447d0b1-9586-43f5-9d5a-ae426cab43e9
+ Location_mislabelled
+ [(39.12; 67.19),(39.12; 67.19),(39.12; 67.18)]
+ string
+
+
+
+ 23091dcb-3949-400e-8740-7cd26f997edf
+ 3-tuple
+ sec_3_tuple
+
+ 66fc02ba-c908-4552-8b91-363348c39f28
+ Coos_mislabelled
+ [(39.12; 89; 67.19),(39.12; 78; 67.19),(39.12; 56; 67.18)]
+ string
+
+
+
+
diff --git a/test/resources/validation_dtypes.yaml b/test/resources/validation_dtypes.yaml
new file mode 100644
index 00000000..bfa63fc7
--- /dev/null
+++ b/test/resources/validation_dtypes.yaml
@@ -0,0 +1,246 @@
+Document:
+ id: 2af46ee0-48c3-4874-a67f-5f8b2951cf51
+ sections:
+ - id: ebb1607f-a450-409f-b286-b9a8aef0900f
+ name: no_dtypes
+ properties: []
+ sections:
+ - id: d7f47035-4e49-4438-b61f-227717a8bbe4
+ name: sec_string
+ properties:
+ - id: a211dd69-a2b2-46f1-95f1-6b30d8b96ba0
+ name: words_no
+ value:
+ - hello
+ - -world
+ - '3'
+ - 'True'
+ sections: []
+ - id: efa678af-e8a5-491e-a0ea-29653bcb8c7b
+ name: sec_int
+ properties:
+ - id: f1e31bef-3652-4890-b570-b92c3903a8cf
+ name: members_no
+ value:
+ - '-13'
+ - '101'
+ - '-11'
+ - '0'
+ - '-8'
+ sections: []
+ type: int
+ - id: ba3cd001-033b-4991-a205-9d4fd1138848
+ name: sec_float
+ properties:
+ - id: 06ebdcd1-9635-4afc-bf2c-1ef44946f387
+ name: potential_no
+ value:
+ - '-4.8'
+ - '10.0'
+ - '-11.9'
+ - '-10.0'
+ - '18.0'
+ sections: []
+ type: float
+ - id: e0a47494-1d45-4e9e-a1c9-2d9987cc6625
+ name: sec_date
+ properties:
+ - id: c9d46221-a403-4a48-870c-16f7632b24d7
+ name: dates_no
+ value:
+ - '1997-12-14'
+ - 00-12-14
+ - 89-07-04
+ sections: []
+ type: date
+ - id: 1e06a7d7-076b-45f2-b3e1-8795f9cd71c9
+ name: sec_datetime
+ properties:
+ - id: 7f98fcff-4ac5-4c38-a071-8f0801ca0aa8
+ name: datetimes_no
+ value:
+ - 97-12-14 11:11:11
+ - 97-12-14 12:12
+ - 1997-12-14 03:03
+ sections: []
+ type: datetime
+ - id: a8c624fc-1e0a-4da4-9862-64a0ad84fa0e
+ name: sec_time
+ properties:
+ - id: b2d169dd-1a00-43ad-b481-a85561c4d001
+ name: times_no
+ value:
+ - '11:11:11'
+ - '12:12:12'
+ - 03:03:03
+ sections: []
+ type: time
+ - id: 4aafafb0-4d07-4689-a54c-565b4805f740
+ name: sec_boolean
+ properties:
+ - id: 650e60e1-7647-40cb-8371-9f65e025c51c
+ name: sent_no
+ value:
+ - 'False'
+ - 'True'
+ - 'TRUE'
+ - 'False'
+ - t
+ sections: []
+ type: boolean
+ - id: ffe14c58-0ad7-420c-a97f-7805a1ba8bf6
+ name: sec_2_tuple
+ properties:
+ - id: e52973a8-9186-4739-a789-73286745cf4c
+ name: Location_no
+ value:
+ - (39.12; 67.19)
+ - (39.12; 67.19)
+ - (39.12; 67.18)
+ sections: []
+ type: 2-tuple
+ - id: e47bc5f7-38f6-4575-891a-91bcf64e7872
+ name: sec_3_tuple
+ properties:
+ - id: 443ef8a0-5e6e-480f-83ad-e305398dd828
+ name: Coos_no
+ value:
+ - (39.12; 89; 67.19)
+ - (39.12; 78; 67.19)
+ - (39.12; 56; 67.18)
+ sections: []
+ type: 3-tuple
+ type: no_dtypes
+ - id: cd3e02f7-ff2f-4b12-85d1-cd0a23ca8a83
+ name: mislabelled_dtypes
+ properties: []
+ sections:
+ - id: 2874da49-418d-452d-b4f8-540764d513aa
+ name: sec_string
+ properties:
+ - id: 8a6360e6-dce7-42c3-9477-eca7f9f51926
+ name: words_mislabelled
+ type: string
+ value:
+ - hello
+ - -world
+ - '3'
+ - 'True'
+ sections: []
+ type: string
+ - id: 8c926b02-4d6f-4e8f-aa92-78460eeeec5e
+ name: sec_int
+ properties:
+ - id: f46c9d64-28f8-4cc8-9fcc-31a0e5337ffe
+ name: members_mislabelled
+ type: string
+ value:
+ - '-13'
+ - '101'
+ - '-11'
+ - '0'
+ - '-8'
+ sections: []
+ type: int
+ - id: 0785a91c-e126-41c1-9c64-dabee9750ea8
+ name: sec_float
+ properties:
+ - id: 07f877fc-5190-4f21-a834-0bfea38dc5b0
+ name: potential_mislabelled
+ type: string
+ value:
+ - '-4.8'
+ - '10.0'
+ - '-11.9'
+ - '-10.0'
+ - '18.0'
+ sections: []
+ type: float
+ - id: 7684924a-5875-48f5-838d-509a99274097
+ name: sec_date
+ properties:
+ - id: 36d587fd-0d83-4f71-b2bd-b08628378e03
+ name: dates_mislabelled
+ type: string
+ value:
+ - '1997-12-14'
+ - 00-12-14
+ - 89-07-04
+ sections: []
+ type: date
+ - id: ecc8be3a-e17c-48e5-92a4-07f829713531
+ name: sec_datetime
+ properties:
+ - id: b7d415c1-8c1b-4f94-b425-37c3f6c2f131
+ name: datetimes_mislabelled
+ type: string
+ value:
+ - 97-12-14 11:11:11
+ - 97-12-14 12:12
+ - 1997-12-14 03:03
+ sections: []
+ type: datetime
+ - id: 1a5d94f6-6ebc-4f5e-b621-93cfa2bebb50
+ name: sec_time
+ properties:
+ - id: edc1fa24-f3a4-448f-9f1e-9387383a0fa3
+ name: times_mislabelled
+ type: string
+ value:
+ - '11:11:11'
+ - '12:12:12'
+ - 03:03:03
+ sections: []
+ type: time
+ - id: 75fb464d-9ea5-49a0-a820-343b08cfba2d
+ name: sec_boolean
+ properties:
+ - id: d2c2c6d1-1ca7-4544-8379-d8451954d38f
+ name: sent_mislabelled
+ type: string
+ value:
+ - 'False'
+ - 'True'
+ - 'TRUE'
+ - 'False'
+ - t
+ sections: []
+ type: boolean
+ - id: 889124e7-09a5-400a-b7de-7d0e6d019d7e
+ name: sec_text
+ properties:
+ - id: 0a05a652-cfe7-46c2-bba4-fa9d1714a635
+ name: texts_mislabelled
+ type: string
+ value:
+ - "lineA \n lineB"
+ - "lineC\n lineD"
+ - "\nlineE\n lineF"
+ sections: []
+ type: text
+ - id: db9d7872-86d4-4155-9a14-2a44a4402b1a
+ name: sec_2_tuple
+ properties:
+ - id: 508517b1-e67a-42d3-aca1-6bd55ddea0e6
+ name: Location_mislabelled
+ type: string
+ value:
+ - (39.12; 67.19)
+ - (39.12; 67.19)
+ - (39.12; 67.18)
+ sections: []
+ type: 2-tuple
+ - id: a75c955a-6bef-40a7-99fd-a26160ae79d2
+ name: sec_3_tuple
+ properties:
+ - id: 84aa21f3-c571-420c-963f-59c5785a45f2
+ name: Coos_mislabelled
+ type: string
+ value:
+ - (39.12; 89; 67.19)
+ - (39.12; 78; 67.19)
+ - (39.12; 56; 67.18)
+ sections: []
+ type: 3-tuple
+ type: mislabelled_dtypes
+odml-version: '1.1'
diff --git a/test/resources/validation_section.json b/test/resources/validation_section.json
new file mode 100644
index 00000000..d7748252
--- /dev/null
+++ b/test/resources/validation_section.json
@@ -0,0 +1,21 @@
+{
+ "Document": {
+ "id": "318c4323-f83c-4abc-adbd-daf47816fe87",
+ "sections": [
+ {
+ "id": "552c620d-ac5f-46c4-869e-f2b8170e7a1e",
+ "name": "sec_type_undefined",
+ "sections": [],
+ "properties": []
+ },
+ {
+ "id": "52540350-3533-42bc-b438-f031f5ac3641",
+ "type": "",
+ "name": "sec_type_empty",
+ "sections": [],
+ "properties": []
+ }
+ ]
+ },
+ "odml-version": "1.1"
+}
\ No newline at end of file
diff --git a/test/resources/validation_section.xml b/test/resources/validation_section.xml
new file mode 100644
index 00000000..b6853446
--- /dev/null
+++ b/test/resources/validation_section.xml
@@ -0,0 +1,14 @@
+
+
+
+ 69585a6e-bdfd-4a49-b227-d4f00e42723e
+
+ e50cfe1b-1c0b-4f47-b76b-5500508b661a
+ sec_type_undefined
+
+
+ d2fd6a82-2568-4ebc-9bd4-93e386a980b7
+
+ sec_type_empty
+
+
diff --git a/test/resources/validation_section.yaml b/test/resources/validation_section.yaml
new file mode 100644
index 00000000..93da8572
--- /dev/null
+++ b/test/resources/validation_section.yaml
@@ -0,0 +1,13 @@
+Document:
+ id: 468dfc92-d794-4ae9-9ffe-a0944a957d36
+ sections:
+ - id: 5279b30a-3dfd-4c44-ac18-2a80e7647449
+ name: sec_type_undefined
+ properties: []
+ sections: []
+ - id: 2033d777-fc2b-4b73-a8f0-ae164bb1ea10
+ name: sec_type_empty
+ properties: []
+ sections: []
+ type: ''
+odml-version: '1.1'
diff --git a/test/test_validation.py b/test/test_validation.py
index d020cd26..7f4a170e 100644
--- a/test/test_validation.py
+++ b/test/test_validation.py
@@ -1,5 +1,6 @@
import unittest
import odml
+import os
import odml.validation
import odml.terminology
from . import test_samplefile as samplefile
@@ -12,6 +13,7 @@ class TestValidation(unittest.TestCase):
def setUp(self):
self.doc = samplefile.SampleFileCreator().create_document()
self.maxDiff = None
+ self.dir_path = os.path.dirname(os.path.realpath(__file__))
def filter_repository_errors(self, errors):
return filter(lambda x: "A section should have an associated "
@@ -152,3 +154,296 @@ def test_standalone_property(self):
for err in validate(prop).errors:
assert not err.is_error
+
+ def test_prop_string_values(self):
+ """
+ Test if property values set as dtype string but could be of different dtype
+ raise validation warning.
+ """
+
+ prop0 = odml.Property(name='words', dtype="string",
+ values=['-13', '101', '-11', 'hello'])
+ assert len(validate(prop0).errors) == 0
+
+ prop1 = odml.Property(name='members', dtype="string",
+ values=['-13', '101', '-11', '0', '-8'])
+ self.assertError(validate(prop1), 'Dtype of property "members" currently is "string",'
+ ' but might fit dtype "int"!')
+
+ prop2 = odml.Property(name='potential', dtype="string",
+ values=['-4.8', '10.0', '-11.9', '-10.0', '18.0'])
+ self.assertError(validate(prop2),'Dtype of property "potential" currently is "string", '
+ 'but might fit dtype "float"!')
+
+ prop3 = odml.Property(name='dates', dtype="string",
+ values=['1997-12-14', '00-12-14', '89-07-04'])
+ self.assertError(validate(prop3), 'Dtype of property "dates" currently is "string", '
+ 'but might fit dtype "date"!')
+
+ prop4 = odml.Property(name='datetimes', dtype="string",
+ values=['97-12-14 11:11:11', '97-12-14 12:12', '1997-12-14 03:03'])
+ self.assertError(validate(prop4), 'Dtype of property "datetimes" currently is "string", '
+ 'but might fit dtype "datetime"!')
+
+ prop5 = odml.Property(name='times', dtype="string",
+ values=['11:11:11', '12:12:12', '03:03:03'])
+ self.assertError(validate(prop5), 'Dtype of property "times" currently is "string", '
+ 'but might fit dtype "time"!')
+
+ prop6 = odml.Property(name='sent', dtype="string",
+ values=['False', True, 'TRUE', False, 't'])
+ self.assertError(validate(prop6), 'Dtype of property "sent" currently is "string", '
+ 'but might fit dtype "boolean"!')
+
+ prop7 = odml.Property(name='texts', dtype="string",
+ values=['line1\n line2', 'line3\n line4', '\nline5\nline6'])
+ self.assertError(validate(prop7), 'Dtype of property "texts" currently is "string", '
+ 'but might fit dtype "text"!')
+
+ prop8 = odml.Property(name="Location", dtype='string',
+ values=['(39.12; 67.19)', '(39.12; 67.19)', '(39.12; 67.18)'])
+ self.assertError(validate(prop8), 'Dtype of property "Location" currently is "string", '
+ 'but might fit dtype "2-tuple"!')
+
+ prop9 = odml.Property(name="Coos", dtype='string',
+ values=['(39.12; 89; 67.19)', '(39.12; 78; 67.19)',
+ '(39.12; 56; 67.18)'])
+ self.assertError(validate(prop9), 'Dtype of property "Coos" currently is "string", '
+ 'but might fit dtype "3-tuple"!')
+
+ def test_load_section_xml(self):
+ """
+ Test if loading xml document raises validation errors for Sections with undefined type.
+ """
+
+ path = os.path.join(self.dir_path, "resources", "validation_section.xml")
+ doc = odml.load(path)
+
+ sec_type_undefined_err = False
+ sec_type_empty_err = False
+
+ for err in validate(doc).errors:
+ if err.msg == "Section type undefined" and err.obj.name == "sec_type_undefined":
+ sec_type_undefined_err = True
+ elif err.msg == "Section type undefined" and err.obj.name == "sec_type_empty":
+ sec_type_empty_err = True
+
+ assert sec_type_undefined_err
+ assert sec_type_empty_err
+
+ def test_load_dtypes_xml(self):
+ """
+ Test if loading xml document raises validation errors for Properties with undefined dtypes.
+ """
+
+ path = os.path.join(self.dir_path, "resources", "validation_dtypes.xml")
+ doc = odml.load(path)
+
+ self.assertError(validate(doc), 'Dtype of property "members_no" currently is "string", '
+ 'but might fit dtype "int"!')
+
+ self.assertError(validate(doc), 'Dtype of property "potential_no" currently is "string", '
+ 'but might fit dtype "float"!')
+
+ self.assertError(validate(doc), 'Dtype of property "dates_no" currently is "string", '
+ 'but might fit dtype "date"!')
+
+ self.assertError(validate(doc), 'Dtype of property "datetimes_no" currently is "string", '
+ 'but might fit dtype "datetime"!')
+
+ self.assertError(validate(doc), 'Dtype of property "times_no" currently is "string", '
+ 'but might fit dtype "time"!')
+
+ self.assertError(validate(doc), 'Dtype of property "sent_no" currently is "string", '
+ 'but might fit dtype "boolean"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Location_no" currently is "string", '
+ 'but might fit dtype "2-tuple"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Coos_no" currently is "string", '
+ 'but might fit dtype "3-tuple"!')
+
+ self.assertError(validate(doc), 'Dtype of property "members_mislabelled" currently is '
+ '"string", but might fit dtype "int"!')
+
+ self.assertError(validate(doc), 'Dtype of property "potential_mislabelled" currently is '
+ '"string", but might fit dtype "float"!')
+
+ self.assertError(validate(doc), 'Dtype of property "dates_mislabelled" currently is '
+ '"string", but might fit dtype "date"!')
+
+ self.assertError(validate(doc), 'Dtype of property "datetimes_mislabelled" currently is '
+ '"string", but might fit dtype "datetime"!')
+
+ self.assertError(validate(doc), 'Dtype of property "times_mislabelled" currently is '
+ '"string", but might fit dtype "time"!')
+
+ self.assertError(validate(doc), 'Dtype of property "sent_mislabelled" currently is '
+ '"string", but might fit dtype "boolean"!')
+
+ self.assertError(validate(doc), 'Dtype of property "texts_mislabelled" currently is '
+ '"string", but might fit dtype "text"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Location_mislabelled" currently is '
+ '"string", but might fit dtype "2-tuple"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Coos_mislabelled" currently is '
+ '"string", but might fit dtype "3-tuple"!')
+
+ def test_load_section_json(self):
+ """
+ Test if loading json document raises validation errors for Sections with undefined type.
+ """
+
+ path = os.path.join(self.dir_path, "resources", "validation_section.json")
+ doc = odml.load(path, "JSON")
+
+ sec_type_undefined_err = False
+ sec_type_empty_err = False
+
+ for err in validate(doc).errors:
+ if err.msg == "Section type undefined" and err.obj.name == "sec_type_undefined":
+ sec_type_undefined_err = True
+ elif err.msg == "Section type undefined" and err.obj.name == "sec_type_empty":
+ sec_type_empty_err = True
+
+ assert sec_type_undefined_err
+ assert sec_type_empty_err
+
+ def test_load_dtypes_json(self):
+ """
+ Test if loading json document raises validation errors for Properties with undefined dtypes.
+ """
+
+ path = os.path.join(self.dir_path, "resources", "validation_dtypes.json")
+ doc = odml.load(path, "JSON")
+
+ self.assertError(validate(doc), 'Dtype of property "members_no" currently is "string", '
+ 'but might fit dtype "int"!')
+
+ self.assertError(validate(doc), 'Dtype of property "potential_no" currently is "string", '
+ 'but might fit dtype "float"!')
+
+ self.assertError(validate(doc), 'Dtype of property "dates_no" currently is "string", '
+ 'but might fit dtype "date"!')
+
+ self.assertError(validate(doc), 'Dtype of property "datetimes_no" currently is "string", '
+ 'but might fit dtype "datetime"!')
+
+ self.assertError(validate(doc), 'Dtype of property "times_no" currently is "string", '
+ 'but might fit dtype "time"!')
+
+ self.assertError(validate(doc), 'Dtype of property "sent_no" currently is "string", '
+ 'but might fit dtype "boolean"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Location_no" currently is "string", '
+ 'but might fit dtype "2-tuple"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Coos_no" currently is "string", '
+ 'but might fit dtype "3-tuple"!')
+
+ self.assertError(validate(doc), 'Dtype of property "members_mislabelled" currently is '
+ '"string", but might fit dtype "int"!')
+
+ self.assertError(validate(doc), 'Dtype of property "potential_mislabelled" currently is '
+ '"string", but might fit dtype "float"!')
+
+ self.assertError(validate(doc), 'Dtype of property "dates_mislabelled" currently is '
+ '"string", but might fit dtype "date"!')
+
+ self.assertError(validate(doc), 'Dtype of property "datetimes_mislabelled" currently is '
+ '"string", but might fit dtype "datetime"!')
+
+ self.assertError(validate(doc), 'Dtype of property "times_mislabelled" currently is '
+ '"string", but might fit dtype "time"!')
+
+ self.assertError(validate(doc), 'Dtype of property "sent_mislabelled" currently is '
+ '"string", but might fit dtype "boolean"!')
+
+ self.assertError(validate(doc), 'Dtype of property "texts_mislabelled" currently is '
+ '"string", but might fit dtype "text"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Location_mislabelled" currently is '
+ '"string", but might fit dtype "2-tuple"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Coos_mislabelled" currently is '
+ '"string", but might fit dtype "3-tuple"!')
+
+ def test_load_section_yaml(self):
+ """
+ Test if loading yaml document raises validation errors for Sections with undefined type.
+ """
+
+ path = os.path.join(self.dir_path, "resources", "validation_section.yaml")
+ doc = odml.load(path, "YAML")
+
+ sec_type_undefined_err = False
+ sec_type_empty_err = False
+
+ for err in validate(doc).errors:
+ if err.msg == "Section type undefined" and err.obj.name == "sec_type_undefined":
+ sec_type_undefined_err = True
+ elif err.msg == "Section type undefined" and err.obj.name == "sec_type_empty":
+ sec_type_empty_err = True
+
+ assert sec_type_undefined_err
+ assert sec_type_empty_err
+
+ def test_load_dtypes_yaml(self):
+ """
+ Test if loading yaml document raises validation errors for Properties with undefined dtypes.
+ """
+
+ path = os.path.join(self.dir_path, "resources", "validation_dtypes.yaml")
+ doc = odml.load(path, "YAML")
+
+ self.assertError(validate(doc), 'Dtype of property "members_no" currently is "string", '
+ 'but might fit dtype "int"!')
+
+ self.assertError(validate(doc), 'Dtype of property "potential_no" currently is "string", '
+ 'but might fit dtype "float"!')
+
+ self.assertError(validate(doc), 'Dtype of property "dates_no" currently is "string", '
+ 'but might fit dtype "date"!')
+
+ self.assertError(validate(doc), 'Dtype of property "datetimes_no" currently is "string", '
+ 'but might fit dtype "datetime"!')
+
+ self.assertError(validate(doc), 'Dtype of property "times_no" currently is "string", '
+ 'but might fit dtype "time"!')
+
+ self.assertError(validate(doc), 'Dtype of property "sent_no" currently is "string", '
+ 'but might fit dtype "boolean"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Location_no" currently is "string", '
+ 'but might fit dtype "2-tuple"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Coos_no" currently is "string", '
+ 'but might fit dtype "3-tuple"!')
+
+ self.assertError(validate(doc), 'Dtype of property "members_mislabelled" currently is '
+ '"string", but might fit dtype "int"!')
+
+ self.assertError(validate(doc), 'Dtype of property "potential_mislabelled" currently is '
+ '"string", but might fit dtype "float"!')
+
+ self.assertError(validate(doc), 'Dtype of property "dates_mislabelled" currently is '
+ '"string", but might fit dtype "date"!')
+
+ self.assertError(validate(doc), 'Dtype of property "datetimes_mislabelled" currently is '
+ '"string", but might fit dtype "datetime"!')
+
+ self.assertError(validate(doc), 'Dtype of property "times_mislabelled" currently is '
+ '"string", but might fit dtype "time"!')
+
+ self.assertError(validate(doc), 'Dtype of property "sent_mislabelled" currently is '
+ '"string", but might fit dtype "boolean"!')
+
+ self.assertError(validate(doc), 'Dtype of property "texts_mislabelled" currently is '
+ '"string", but might fit dtype "text"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Location_mislabelled" currently is '
+ '"string", but might fit dtype "2-tuple"!')
+
+ self.assertError(validate(doc), 'Dtype of property "Coos_mislabelled" currently is '
+ '"string", but might fit dtype "3-tuple"!')