diff --git a/odml/validation.py b/odml/validation.py index 21498f15..7ea1492b 100644 --- a/odml/validation.py +++ b/odml/validation.py @@ -3,6 +3,7 @@ Generic odML validation framework. """ +import re from . import dtypes LABEL_ERROR = 'error' @@ -386,3 +387,56 @@ def property_values_check(prop): Validation.register_handler('property', property_values_check) + + +def property_values_string_check(prop): + """ + PROTOTYPE + + Tests whether values with dtype "string" are maybe of different dtype. + + :param prop: property the validation is applied on. + """ + + if prop.dtype != "string" or not prop.values: + return + + dtype_checks = { + 'int': r'^(-+)?\d+$', + 'date': r'^\d{2,4}-\d{1,2}-\d{1,2}$', + 'datetime': r'^\d{2,4}-\d{1,2}-\d{1,2} \d{2}:\d{2}(:\d{2})?$', + 'time': r'^\d{2}:\d{2}(:\d{2})?$', + 'float': r'^(-+)?\d+\.\d+$', + 'tuple': r'^\((.*?)\)', + 'boolean': r'^TRUE|FALSE|True|False|t|f+$', + 'text': r'[\r\n]'} + + val_dtypes = [] + + for val in prop.values: + curr_dtype = "string" + + for check_dtype in dtype_checks.items(): + if bool(re.compile(check_dtype[1]).match(val.strip())): + if check_dtype[0] == "tuple" and val.count(';') > 0: + curr_dtype = str(val.count(';') + 1) + "-" + check_dtype[0] + else: + curr_dtype = check_dtype[0] + break + if check_dtype[0] == "text" and len(re.findall(check_dtype[1], val.strip())) > 0: + curr_dtype = check_dtype[0] + break + + val_dtypes += [curr_dtype] + + res_dtype = max(set(val_dtypes), key=val_dtypes.count) + + if len(set(val_dtypes)) > 1: + res_dtype = "string" + + if res_dtype != "string": + msg = 'Dtype of property "%s" currently is "string", but might fit dtype "%s"!' % (prop.name, res_dtype) + yield ValidationError(prop, msg, LABEL_WARNING) + + +Validation.register_handler('property', property_values_string_check) diff --git a/test/resources/validation_dtypes.json b/test/resources/validation_dtypes.json new file mode 100644 index 00000000..d3feb9c7 --- /dev/null +++ b/test/resources/validation_dtypes.json @@ -0,0 +1,370 @@ +{ + "Document": { + "id": "2c9bddd2-0082-410c-89d4-cd6ea44a404b", + "sections": [ + { + "id": "f545ca92-1d4c-4d02-8170-1837b9e16395", + "type": "no_dtypes", + "name": "no_dtypes", + "sections": [ + { + "id": "24d75996-d19d-4138-8d79-8b90d7affbc2", + "name": "sec_string", + "sections": [], + "properties": [ + { + "id": "951db657-ba2d-43b9-9827-f83ef3ce8e64", + "name": "words_no", + "value": [ + "hello", + "-world", + "3", + "True" + ] + } + ] + }, + { + "id": "2b3540fc-a422-4279-b10d-f87e16e0a2c3", + "type": "int", + "name": "sec_int", + "sections": [], + "properties": [ + { + "id": "55829af7-1058-4cd6-8454-de1d77143935", + "name": "members_no", + "value": [ + "-13", + "101", + "-11", + "0", + "-8" + ] + } + ] + }, + { + "id": "725b997a-16af-436f-b6a4-bb7a16822958", + "type": "float", + "name": "sec_float", + "sections": [], + "properties": [ + { + "id": "d26b8eed-fd92-4d04-8434-f0846186ceee", + "name": "potential_no", + "value": [ + "-4.8", + "10.0", + "-11.9", + "-10.0", + "18.0" + ] + } + ] + }, + { + "id": "f8a620b1-b71c-42eb-917b-c5879b4f9e96", + "type": "date", + "name": "sec_date", + "sections": [], + "properties": [ + { + "id": "9d207f37-ca27-4df7-9409-02e369cced7a", + "name": "dates_no", + "value": [ + "1997-12-14", + "00-12-14", + "89-07-04" + ] + } + ] + }, + { + "id": "857f3300-d6ac-4c48-9d95-12a1d70b9092", + "type": "datetime", + "name": "sec_datetime", + "sections": [], + "properties": [ + { + "id": "8fa99c25-4cc9-491b-9398-59afb563f103", + "name": "datetimes_no", + "value": [ + "97-12-14 11:11:11", + "97-12-14 12:12", + "1997-12-14 03:03" + ] + } + ] + }, + { + "id": "922d2ab9-bfa6-413d-ba8d-f57cc775b6ac", + "type": "time", + "name": "sec_time", + "sections": [], + "properties": [ + { + "id": "a275b4cc-e850-4a46-95eb-a01772706468", + "name": "times_no", + "value": [ + "11:11:11", + "12:12:12", + "03:03:03" + ] + } + ] + }, + { + "id": "f18728fc-1e75-43dc-810f-19c9c64f29c0", + "type": "boolean", + "name": "sec_boolean", + "sections": [], + "properties": [ + { + "id": "4080b4ad-fe08-4abc-8071-a84a8169cf30", + "name": "sent_no", + "value": [ + "False", + "True", + "TRUE", + "False", + "t" + ] + } + ] + }, + { + "id": "8c49b24b-0b86-4dd6-a4da-45e1c2c504c6", + "type": "2-tuple", + "name": "sec_2_tuple", + "sections": [], + "properties": [ + { + "id": "4f0cfc1b-0175-46f9-9ad4-62ab19fc6625", + "name": "Location_no", + "value": [ + "(39.12; 67.19)", + "(39.12; 67.19)", + "(39.12; 67.18)" + ] + } + ] + }, + { + "id": "62531fb3-8b5a-4490-a421-8e15cb787f5f", + "type": "3-tuple", + "name": "sec_3_tuple", + "sections": [], + "properties": [ + { + "id": "45eae816-9023-414d-b15f-ae7a8a845f45", + "name": "Coos_no", + "value": [ + "(39.12; 89; 67.19)", + "(39.12; 78; 67.19)", + "(39.12; 56; 67.18)" + ] + } + ] + } + ], + "properties": [] + }, + { + "id": "bc217fca-d51a-4d01-b37f-7270f02f88a4", + "type": "mislabelled_dtypes", + "name": "mislabelled_dtypes", + "sections": [ + { + "id": "3fd7dd7e-246f-4002-a577-534792fb5357", + "type": "string", + "name": "sec_string", + "sections": [], + "properties": [ + { + "id": "7c987cf5-7bc7-452d-a87f-69a8e68397ea", + "name": "words_mislabelled", + "value": [ + "hello", + "-world", + "3", + "True" + ], + "type": "string" + } + ] + }, + { + "id": "196e9408-3d8a-478a-829d-0834d14c9a32", + "type": "int", + "name": "sec_int", + "sections": [], + "properties": [ + { + "id": "c581ffae-8d7f-44e7-92cd-b96c253e680d", + "name": "members_mislabelled", + "value": [ + "-13", + "101", + "-11", + "0", + "-8" + ], + "type": "string" + } + ] + }, + { + "id": "fbe32258-9b80-43b4-8399-856780e1c67e", + "type": "float", + "name": "sec_float", + "sections": [], + "properties": [ + { + "id": "7302729f-ccc0-463d-8c39-78ec1136237f", + "name": "potential_mislabelled", + "value": [ + "-4.8", + "10.0", + "-11.9", + "-10.0", + "18.0" + ], + "type": "string" + } + ] + }, + { + "id": "daa8fc2f-fc44-408f-b824-d6f890974e36", + "type": "date", + "name": "sec_date", + "sections": [], + "properties": [ + { + "id": "5818ae19-80d5-42c1-ae05-61a36c2b9546", + "name": "dates_mislabelled", + "value": [ + "1997-12-14", + "00-12-14", + "89-07-04" + ], + "type": "string" + } + ] + }, + { + "id": "d2d0b323-4701-4fc1-ac9c-9bb9b04c2710", + "type": "datetime", + "name": "sec_datetime", + "sections": [], + "properties": [ + { + "id": "2be4f2c2-5364-4c6e-86c7-8b940cc8ed5f", + "name": "datetimes_mislabelled", + "value": [ + "97-12-14 11:11:11", + "97-12-14 12:12", + "1997-12-14 03:03" + ], + "type": "string" + } + ] + }, + { + "id": "aa38c228-7870-41a0-89bd-f579406468c0", + "type": "time", + "name": "sec_time", + "sections": [], + "properties": [ + { + "id": "d20c097f-162c-456a-beb4-0a55e261c9b8", + "name": "times_mislabelled", + "value": [ + "11:11:11", + "12:12:12", + "03:03:03" + ], + "type": "string" + } + ] + }, + { + "id": "b4d969e7-78b3-4bc3-942b-0126046ba538", + "type": "boolean", + "name": "sec_boolean", + "sections": [], + "properties": [ + { + "id": "d7f0cf25-a87a-4b77-a7a5-41c4b31763a5", + "name": "sent_mislabelled", + "value": [ + "False", + "True", + "TRUE", + "False", + "t" + ], + "type": "string" + } + ] + }, + { + "id": "fe16efe1-5f94-475e-b652-70b7f5495de5", + "type": "text", + "name": "sec_text", + "sections": [], + "properties": [ + { + "id": "ae863fc4-b998-4da1-b586-96c0d0124ddb", + "name": "texts_mislabelled", + "value": [ + "lineA \n lineB", + "lineC\n lineD", + "\nlineE\n lineF" + ], + "type": "string" + } + ] + }, + { + "id": "513ce09d-2d15-40b2-8d65-10438ba55752", + "type": "2-tuple", + "name": "sec_2_tuple", + "sections": [], + "properties": [ + { + "id": "99318b7a-ce26-4763-bd32-9d3dfb4b956a", + "name": "Location_mislabelled", + "value": [ + "(39.12; 67.19)", + "(39.12; 67.19)", + "(39.12; 67.18)" + ], + "type": "string" + } + ] + }, + { + "id": "f5bb809b-e621-4524-9858-1004e2232a57", + "type": "3-tuple", + "name": "sec_3_tuple", + "sections": [], + "properties": [ + { + "id": "1c41cc57-1704-4494-a463-71552a3f90bb", + "name": "Coos_mislabelled", + "value": [ + "(39.12; 89; 67.19)", + "(39.12; 78; 67.19)", + "(39.12; 56; 67.18)" + ], + "type": "string" + } + ] + } + ], + "properties": [] + } + ] + }, + "odml-version": "1.1" +} \ No newline at end of file diff --git a/test/resources/validation_dtypes.xml b/test/resources/validation_dtypes.xml new file mode 100644 index 00000000..a3ecdd9e --- /dev/null +++ b/test/resources/validation_dtypes.xml @@ -0,0 +1,217 @@ + + + + bfc85613-0e25-44e4-a85a-386b32c3c495 +
+ e72e64cd-e6eb-4399-b679-140d01f87365 + no_dtypes + no_dtypes +
+ a028642b-bebf-4392-9575-8cd751c3cdea + sec_string + + 313d13ce-8a46-44de-a333-71c19f54752c + words_no + [hello,-world,3,True] + +
+
+ 2deb48f3-f23b-427c-99c2-73345d5c34bb + int + sec_int + + 5b37aa19-76ef-48dd-bbd5-193a47213f36 + members_no + [-13,101,-11,0,-8] + +
+
+ 4965ce85-5da7-4411-9e66-6af7066fbc46 + float + sec_float + + 48c06a25-a10a-489a-990a-f592f0d87615 + potential_no + [-4.8,10.0,-11.9,-10.0,18.0] + +
+
+ 3d68d344-1fc2-43a5-9561-9fdc52b82fb2 + date + sec_date + + d0780269-20b1-4bc1-a023-4b502ffe63be + dates_no + [1997-12-14,00-12-14,89-07-04] + +
+
+ 9176ec80-7aa2-4a83-acb3-07e9efe7cf66 + datetime + sec_datetime + + 7ad1974a-3102-4e98-b1db-e36143bb8424 + datetimes_no + [97-12-14 11:11:11,97-12-14 12:12,1997-12-14 03:03] + +
+
+ 96756686-8652-46f3-9f7e-23137872fc72 + time + sec_time + + 0201b643-a871-4b8b-9e3a-820f43b81f5a + times_no + [11:11:11,12:12:12,03:03:03] + +
+
+ c7a6950f-dbf0-4478-afc3-f5a045f85914 + boolean + sec_boolean + + c550e679-4d5a-42c8-bbac-41adb9553e8c + sent_no + [False,True,TRUE,False,t] + +
+
+ d5269b4d-5ef9-4e35-986a-9780fa96a5d6 + 2-tuple + sec_2_tuple + + ba9976ca-ce85-417b-86a7-18a19122fdc5 + Location_no + [(39.12; 67.19),(39.12; 67.19),(39.12; 67.18)] + +
+
+ 67d066e7-ee4c-4ffb-ab8c-4539e6a97fbc + 3-tuple + sec_3_tuple + + df72cfbb-22fc-4470-bd4c-bf9d1c98b170 + Coos_no + [(39.12; 89; 67.19),(39.12; 78; 67.19),(39.12; 56; 67.18)] + +
+
+
+ 2fc36b6f-cf3d-4aef-ae4e-0f56d081c515 + mislabelled_dtypes + mislabelled_dtypes +
+ 7a7ee997-365d-4b3f-9edd-e7580a2bd76e + string + sec_string + + 16037a96-072b-431c-ae7b-2d3312eb8f42 + words_mislabelled + [hello,-world,3,True] + string + +
+
+ 65fe690a-9a0b-4557-8271-e5f10956b4e0 + int + sec_int + + 0c78a624-f60c-4555-8440-2299aaead89d + members_mislabelled + [-13,101,-11,0,-8] + string + +
+
+ fa8b21aa-52d0-414e-b93c-c2197f3020cf + float + sec_float + + 50af87b7-4797-40fa-850a-c8033ba0b85b + potential_mislabelled + [-4.8,10.0,-11.9,-10.0,18.0] + string + +
+
+ 6e56b7da-c779-499d-a4fe-92ed48750421 + date + sec_date + + 2aede904-71a3-4897-8614-1dcdd9ed0ddf + dates_mislabelled + [1997-12-14,00-12-14,89-07-04] + string + +
+
+ 7dec0155-657d-4cba-8c06-8be2ceaf4049 + datetime + sec_datetime + + dc50b6ac-0993-432e-b401-06d13ed9e720 + datetimes_mislabelled + [97-12-14 11:11:11,97-12-14 12:12,1997-12-14 03:03] + string + +
+
+ d0278536-a486-43c2-9708-ef16efc7440a + time + sec_time + + afda136e-70e1-40ee-abdd-0e80f56cf624 + times_mislabelled + [11:11:11,12:12:12,03:03:03] + string + +
+
+ aea047b0-abea-4c6c-9a8c-5b6089fde35c + boolean + sec_boolean + + d959337d-ec34-468e-a374-1b1213fa23d5 + sent_mislabelled + [False,True,TRUE,False,t] + string + +
+
+ 1b078e40-1e23-4a74-bf34-c948c46b8bba + text + sec_text + + 4b7430d6-ba74-4f66-88e5-8009bd289815 + texts_mislabelled + ["lineA + lineB","lineC + lineD","lineE + lineF"] + string + +
+
+ fcf33912-8635-4495-a41e-4f1f92e0dae0 + 2-tuple + sec_2_tuple + + d447d0b1-9586-43f5-9d5a-ae426cab43e9 + Location_mislabelled + [(39.12; 67.19),(39.12; 67.19),(39.12; 67.18)] + string + +
+
+ 23091dcb-3949-400e-8740-7cd26f997edf + 3-tuple + sec_3_tuple + + 66fc02ba-c908-4552-8b91-363348c39f28 + Coos_mislabelled + [(39.12; 89; 67.19),(39.12; 78; 67.19),(39.12; 56; 67.18)] + string + +
+
+
diff --git a/test/resources/validation_dtypes.yaml b/test/resources/validation_dtypes.yaml new file mode 100644 index 00000000..bfa63fc7 --- /dev/null +++ b/test/resources/validation_dtypes.yaml @@ -0,0 +1,246 @@ +Document: + id: 2af46ee0-48c3-4874-a67f-5f8b2951cf51 + sections: + - id: ebb1607f-a450-409f-b286-b9a8aef0900f + name: no_dtypes + properties: [] + sections: + - id: d7f47035-4e49-4438-b61f-227717a8bbe4 + name: sec_string + properties: + - id: a211dd69-a2b2-46f1-95f1-6b30d8b96ba0 + name: words_no + value: + - hello + - -world + - '3' + - 'True' + sections: [] + - id: efa678af-e8a5-491e-a0ea-29653bcb8c7b + name: sec_int + properties: + - id: f1e31bef-3652-4890-b570-b92c3903a8cf + name: members_no + value: + - '-13' + - '101' + - '-11' + - '0' + - '-8' + sections: [] + type: int + - id: ba3cd001-033b-4991-a205-9d4fd1138848 + name: sec_float + properties: + - id: 06ebdcd1-9635-4afc-bf2c-1ef44946f387 + name: potential_no + value: + - '-4.8' + - '10.0' + - '-11.9' + - '-10.0' + - '18.0' + sections: [] + type: float + - id: e0a47494-1d45-4e9e-a1c9-2d9987cc6625 + name: sec_date + properties: + - id: c9d46221-a403-4a48-870c-16f7632b24d7 + name: dates_no + value: + - '1997-12-14' + - 00-12-14 + - 89-07-04 + sections: [] + type: date + - id: 1e06a7d7-076b-45f2-b3e1-8795f9cd71c9 + name: sec_datetime + properties: + - id: 7f98fcff-4ac5-4c38-a071-8f0801ca0aa8 + name: datetimes_no + value: + - 97-12-14 11:11:11 + - 97-12-14 12:12 + - 1997-12-14 03:03 + sections: [] + type: datetime + - id: a8c624fc-1e0a-4da4-9862-64a0ad84fa0e + name: sec_time + properties: + - id: b2d169dd-1a00-43ad-b481-a85561c4d001 + name: times_no + value: + - '11:11:11' + - '12:12:12' + - 03:03:03 + sections: [] + type: time + - id: 4aafafb0-4d07-4689-a54c-565b4805f740 + name: sec_boolean + properties: + - id: 650e60e1-7647-40cb-8371-9f65e025c51c + name: sent_no + value: + - 'False' + - 'True' + - 'TRUE' + - 'False' + - t + sections: [] + type: boolean + - id: ffe14c58-0ad7-420c-a97f-7805a1ba8bf6 + name: sec_2_tuple + properties: + - id: e52973a8-9186-4739-a789-73286745cf4c + name: Location_no + value: + - (39.12; 67.19) + - (39.12; 67.19) + - (39.12; 67.18) + sections: [] + type: 2-tuple + - id: e47bc5f7-38f6-4575-891a-91bcf64e7872 + name: sec_3_tuple + properties: + - id: 443ef8a0-5e6e-480f-83ad-e305398dd828 + name: Coos_no + value: + - (39.12; 89; 67.19) + - (39.12; 78; 67.19) + - (39.12; 56; 67.18) + sections: [] + type: 3-tuple + type: no_dtypes + - id: cd3e02f7-ff2f-4b12-85d1-cd0a23ca8a83 + name: mislabelled_dtypes + properties: [] + sections: + - id: 2874da49-418d-452d-b4f8-540764d513aa + name: sec_string + properties: + - id: 8a6360e6-dce7-42c3-9477-eca7f9f51926 + name: words_mislabelled + type: string + value: + - hello + - -world + - '3' + - 'True' + sections: [] + type: string + - id: 8c926b02-4d6f-4e8f-aa92-78460eeeec5e + name: sec_int + properties: + - id: f46c9d64-28f8-4cc8-9fcc-31a0e5337ffe + name: members_mislabelled + type: string + value: + - '-13' + - '101' + - '-11' + - '0' + - '-8' + sections: [] + type: int + - id: 0785a91c-e126-41c1-9c64-dabee9750ea8 + name: sec_float + properties: + - id: 07f877fc-5190-4f21-a834-0bfea38dc5b0 + name: potential_mislabelled + type: string + value: + - '-4.8' + - '10.0' + - '-11.9' + - '-10.0' + - '18.0' + sections: [] + type: float + - id: 7684924a-5875-48f5-838d-509a99274097 + name: sec_date + properties: + - id: 36d587fd-0d83-4f71-b2bd-b08628378e03 + name: dates_mislabelled + type: string + value: + - '1997-12-14' + - 00-12-14 + - 89-07-04 + sections: [] + type: date + - id: ecc8be3a-e17c-48e5-92a4-07f829713531 + name: sec_datetime + properties: + - id: b7d415c1-8c1b-4f94-b425-37c3f6c2f131 + name: datetimes_mislabelled + type: string + value: + - 97-12-14 11:11:11 + - 97-12-14 12:12 + - 1997-12-14 03:03 + sections: [] + type: datetime + - id: 1a5d94f6-6ebc-4f5e-b621-93cfa2bebb50 + name: sec_time + properties: + - id: edc1fa24-f3a4-448f-9f1e-9387383a0fa3 + name: times_mislabelled + type: string + value: + - '11:11:11' + - '12:12:12' + - 03:03:03 + sections: [] + type: time + - id: 75fb464d-9ea5-49a0-a820-343b08cfba2d + name: sec_boolean + properties: + - id: d2c2c6d1-1ca7-4544-8379-d8451954d38f + name: sent_mislabelled + type: string + value: + - 'False' + - 'True' + - 'TRUE' + - 'False' + - t + sections: [] + type: boolean + - id: 889124e7-09a5-400a-b7de-7d0e6d019d7e + name: sec_text + properties: + - id: 0a05a652-cfe7-46c2-bba4-fa9d1714a635 + name: texts_mislabelled + type: string + value: + - "lineA \n lineB" + - "lineC\n lineD" + - "\nlineE\n lineF" + sections: [] + type: text + - id: db9d7872-86d4-4155-9a14-2a44a4402b1a + name: sec_2_tuple + properties: + - id: 508517b1-e67a-42d3-aca1-6bd55ddea0e6 + name: Location_mislabelled + type: string + value: + - (39.12; 67.19) + - (39.12; 67.19) + - (39.12; 67.18) + sections: [] + type: 2-tuple + - id: a75c955a-6bef-40a7-99fd-a26160ae79d2 + name: sec_3_tuple + properties: + - id: 84aa21f3-c571-420c-963f-59c5785a45f2 + name: Coos_mislabelled + type: string + value: + - (39.12; 89; 67.19) + - (39.12; 78; 67.19) + - (39.12; 56; 67.18) + sections: [] + type: 3-tuple + type: mislabelled_dtypes +odml-version: '1.1' diff --git a/test/resources/validation_section.json b/test/resources/validation_section.json new file mode 100644 index 00000000..d7748252 --- /dev/null +++ b/test/resources/validation_section.json @@ -0,0 +1,21 @@ +{ + "Document": { + "id": "318c4323-f83c-4abc-adbd-daf47816fe87", + "sections": [ + { + "id": "552c620d-ac5f-46c4-869e-f2b8170e7a1e", + "name": "sec_type_undefined", + "sections": [], + "properties": [] + }, + { + "id": "52540350-3533-42bc-b438-f031f5ac3641", + "type": "", + "name": "sec_type_empty", + "sections": [], + "properties": [] + } + ] + }, + "odml-version": "1.1" +} \ No newline at end of file diff --git a/test/resources/validation_section.xml b/test/resources/validation_section.xml new file mode 100644 index 00000000..b6853446 --- /dev/null +++ b/test/resources/validation_section.xml @@ -0,0 +1,14 @@ + + + + 69585a6e-bdfd-4a49-b227-d4f00e42723e +
+ e50cfe1b-1c0b-4f47-b76b-5500508b661a + sec_type_undefined +
+
+ d2fd6a82-2568-4ebc-9bd4-93e386a980b7 + + sec_type_empty +
+
diff --git a/test/resources/validation_section.yaml b/test/resources/validation_section.yaml new file mode 100644 index 00000000..93da8572 --- /dev/null +++ b/test/resources/validation_section.yaml @@ -0,0 +1,13 @@ +Document: + id: 468dfc92-d794-4ae9-9ffe-a0944a957d36 + sections: + - id: 5279b30a-3dfd-4c44-ac18-2a80e7647449 + name: sec_type_undefined + properties: [] + sections: [] + - id: 2033d777-fc2b-4b73-a8f0-ae164bb1ea10 + name: sec_type_empty + properties: [] + sections: [] + type: '' +odml-version: '1.1' diff --git a/test/test_validation.py b/test/test_validation.py index d020cd26..7f4a170e 100644 --- a/test/test_validation.py +++ b/test/test_validation.py @@ -1,5 +1,6 @@ import unittest import odml +import os import odml.validation import odml.terminology from . import test_samplefile as samplefile @@ -12,6 +13,7 @@ class TestValidation(unittest.TestCase): def setUp(self): self.doc = samplefile.SampleFileCreator().create_document() self.maxDiff = None + self.dir_path = os.path.dirname(os.path.realpath(__file__)) def filter_repository_errors(self, errors): return filter(lambda x: "A section should have an associated " @@ -152,3 +154,296 @@ def test_standalone_property(self): for err in validate(prop).errors: assert not err.is_error + + def test_prop_string_values(self): + """ + Test if property values set as dtype string but could be of different dtype + raise validation warning. + """ + + prop0 = odml.Property(name='words', dtype="string", + values=['-13', '101', '-11', 'hello']) + assert len(validate(prop0).errors) == 0 + + prop1 = odml.Property(name='members', dtype="string", + values=['-13', '101', '-11', '0', '-8']) + self.assertError(validate(prop1), 'Dtype of property "members" currently is "string",' + ' but might fit dtype "int"!') + + prop2 = odml.Property(name='potential', dtype="string", + values=['-4.8', '10.0', '-11.9', '-10.0', '18.0']) + self.assertError(validate(prop2),'Dtype of property "potential" currently is "string", ' + 'but might fit dtype "float"!') + + prop3 = odml.Property(name='dates', dtype="string", + values=['1997-12-14', '00-12-14', '89-07-04']) + self.assertError(validate(prop3), 'Dtype of property "dates" currently is "string", ' + 'but might fit dtype "date"!') + + prop4 = odml.Property(name='datetimes', dtype="string", + values=['97-12-14 11:11:11', '97-12-14 12:12', '1997-12-14 03:03']) + self.assertError(validate(prop4), 'Dtype of property "datetimes" currently is "string", ' + 'but might fit dtype "datetime"!') + + prop5 = odml.Property(name='times', dtype="string", + values=['11:11:11', '12:12:12', '03:03:03']) + self.assertError(validate(prop5), 'Dtype of property "times" currently is "string", ' + 'but might fit dtype "time"!') + + prop6 = odml.Property(name='sent', dtype="string", + values=['False', True, 'TRUE', False, 't']) + self.assertError(validate(prop6), 'Dtype of property "sent" currently is "string", ' + 'but might fit dtype "boolean"!') + + prop7 = odml.Property(name='texts', dtype="string", + values=['line1\n line2', 'line3\n line4', '\nline5\nline6']) + self.assertError(validate(prop7), 'Dtype of property "texts" currently is "string", ' + 'but might fit dtype "text"!') + + prop8 = odml.Property(name="Location", dtype='string', + values=['(39.12; 67.19)', '(39.12; 67.19)', '(39.12; 67.18)']) + self.assertError(validate(prop8), 'Dtype of property "Location" currently is "string", ' + 'but might fit dtype "2-tuple"!') + + prop9 = odml.Property(name="Coos", dtype='string', + values=['(39.12; 89; 67.19)', '(39.12; 78; 67.19)', + '(39.12; 56; 67.18)']) + self.assertError(validate(prop9), 'Dtype of property "Coos" currently is "string", ' + 'but might fit dtype "3-tuple"!') + + def test_load_section_xml(self): + """ + Test if loading xml document raises validation errors for Sections with undefined type. + """ + + path = os.path.join(self.dir_path, "resources", "validation_section.xml") + doc = odml.load(path) + + sec_type_undefined_err = False + sec_type_empty_err = False + + for err in validate(doc).errors: + if err.msg == "Section type undefined" and err.obj.name == "sec_type_undefined": + sec_type_undefined_err = True + elif err.msg == "Section type undefined" and err.obj.name == "sec_type_empty": + sec_type_empty_err = True + + assert sec_type_undefined_err + assert sec_type_empty_err + + def test_load_dtypes_xml(self): + """ + Test if loading xml document raises validation errors for Properties with undefined dtypes. + """ + + path = os.path.join(self.dir_path, "resources", "validation_dtypes.xml") + doc = odml.load(path) + + self.assertError(validate(doc), 'Dtype of property "members_no" currently is "string", ' + 'but might fit dtype "int"!') + + self.assertError(validate(doc), 'Dtype of property "potential_no" currently is "string", ' + 'but might fit dtype "float"!') + + self.assertError(validate(doc), 'Dtype of property "dates_no" currently is "string", ' + 'but might fit dtype "date"!') + + self.assertError(validate(doc), 'Dtype of property "datetimes_no" currently is "string", ' + 'but might fit dtype "datetime"!') + + self.assertError(validate(doc), 'Dtype of property "times_no" currently is "string", ' + 'but might fit dtype "time"!') + + self.assertError(validate(doc), 'Dtype of property "sent_no" currently is "string", ' + 'but might fit dtype "boolean"!') + + self.assertError(validate(doc), 'Dtype of property "Location_no" currently is "string", ' + 'but might fit dtype "2-tuple"!') + + self.assertError(validate(doc), 'Dtype of property "Coos_no" currently is "string", ' + 'but might fit dtype "3-tuple"!') + + self.assertError(validate(doc), 'Dtype of property "members_mislabelled" currently is ' + '"string", but might fit dtype "int"!') + + self.assertError(validate(doc), 'Dtype of property "potential_mislabelled" currently is ' + '"string", but might fit dtype "float"!') + + self.assertError(validate(doc), 'Dtype of property "dates_mislabelled" currently is ' + '"string", but might fit dtype "date"!') + + self.assertError(validate(doc), 'Dtype of property "datetimes_mislabelled" currently is ' + '"string", but might fit dtype "datetime"!') + + self.assertError(validate(doc), 'Dtype of property "times_mislabelled" currently is ' + '"string", but might fit dtype "time"!') + + self.assertError(validate(doc), 'Dtype of property "sent_mislabelled" currently is ' + '"string", but might fit dtype "boolean"!') + + self.assertError(validate(doc), 'Dtype of property "texts_mislabelled" currently is ' + '"string", but might fit dtype "text"!') + + self.assertError(validate(doc), 'Dtype of property "Location_mislabelled" currently is ' + '"string", but might fit dtype "2-tuple"!') + + self.assertError(validate(doc), 'Dtype of property "Coos_mislabelled" currently is ' + '"string", but might fit dtype "3-tuple"!') + + def test_load_section_json(self): + """ + Test if loading json document raises validation errors for Sections with undefined type. + """ + + path = os.path.join(self.dir_path, "resources", "validation_section.json") + doc = odml.load(path, "JSON") + + sec_type_undefined_err = False + sec_type_empty_err = False + + for err in validate(doc).errors: + if err.msg == "Section type undefined" and err.obj.name == "sec_type_undefined": + sec_type_undefined_err = True + elif err.msg == "Section type undefined" and err.obj.name == "sec_type_empty": + sec_type_empty_err = True + + assert sec_type_undefined_err + assert sec_type_empty_err + + def test_load_dtypes_json(self): + """ + Test if loading json document raises validation errors for Properties with undefined dtypes. + """ + + path = os.path.join(self.dir_path, "resources", "validation_dtypes.json") + doc = odml.load(path, "JSON") + + self.assertError(validate(doc), 'Dtype of property "members_no" currently is "string", ' + 'but might fit dtype "int"!') + + self.assertError(validate(doc), 'Dtype of property "potential_no" currently is "string", ' + 'but might fit dtype "float"!') + + self.assertError(validate(doc), 'Dtype of property "dates_no" currently is "string", ' + 'but might fit dtype "date"!') + + self.assertError(validate(doc), 'Dtype of property "datetimes_no" currently is "string", ' + 'but might fit dtype "datetime"!') + + self.assertError(validate(doc), 'Dtype of property "times_no" currently is "string", ' + 'but might fit dtype "time"!') + + self.assertError(validate(doc), 'Dtype of property "sent_no" currently is "string", ' + 'but might fit dtype "boolean"!') + + self.assertError(validate(doc), 'Dtype of property "Location_no" currently is "string", ' + 'but might fit dtype "2-tuple"!') + + self.assertError(validate(doc), 'Dtype of property "Coos_no" currently is "string", ' + 'but might fit dtype "3-tuple"!') + + self.assertError(validate(doc), 'Dtype of property "members_mislabelled" currently is ' + '"string", but might fit dtype "int"!') + + self.assertError(validate(doc), 'Dtype of property "potential_mislabelled" currently is ' + '"string", but might fit dtype "float"!') + + self.assertError(validate(doc), 'Dtype of property "dates_mislabelled" currently is ' + '"string", but might fit dtype "date"!') + + self.assertError(validate(doc), 'Dtype of property "datetimes_mislabelled" currently is ' + '"string", but might fit dtype "datetime"!') + + self.assertError(validate(doc), 'Dtype of property "times_mislabelled" currently is ' + '"string", but might fit dtype "time"!') + + self.assertError(validate(doc), 'Dtype of property "sent_mislabelled" currently is ' + '"string", but might fit dtype "boolean"!') + + self.assertError(validate(doc), 'Dtype of property "texts_mislabelled" currently is ' + '"string", but might fit dtype "text"!') + + self.assertError(validate(doc), 'Dtype of property "Location_mislabelled" currently is ' + '"string", but might fit dtype "2-tuple"!') + + self.assertError(validate(doc), 'Dtype of property "Coos_mislabelled" currently is ' + '"string", but might fit dtype "3-tuple"!') + + def test_load_section_yaml(self): + """ + Test if loading yaml document raises validation errors for Sections with undefined type. + """ + + path = os.path.join(self.dir_path, "resources", "validation_section.yaml") + doc = odml.load(path, "YAML") + + sec_type_undefined_err = False + sec_type_empty_err = False + + for err in validate(doc).errors: + if err.msg == "Section type undefined" and err.obj.name == "sec_type_undefined": + sec_type_undefined_err = True + elif err.msg == "Section type undefined" and err.obj.name == "sec_type_empty": + sec_type_empty_err = True + + assert sec_type_undefined_err + assert sec_type_empty_err + + def test_load_dtypes_yaml(self): + """ + Test if loading yaml document raises validation errors for Properties with undefined dtypes. + """ + + path = os.path.join(self.dir_path, "resources", "validation_dtypes.yaml") + doc = odml.load(path, "YAML") + + self.assertError(validate(doc), 'Dtype of property "members_no" currently is "string", ' + 'but might fit dtype "int"!') + + self.assertError(validate(doc), 'Dtype of property "potential_no" currently is "string", ' + 'but might fit dtype "float"!') + + self.assertError(validate(doc), 'Dtype of property "dates_no" currently is "string", ' + 'but might fit dtype "date"!') + + self.assertError(validate(doc), 'Dtype of property "datetimes_no" currently is "string", ' + 'but might fit dtype "datetime"!') + + self.assertError(validate(doc), 'Dtype of property "times_no" currently is "string", ' + 'but might fit dtype "time"!') + + self.assertError(validate(doc), 'Dtype of property "sent_no" currently is "string", ' + 'but might fit dtype "boolean"!') + + self.assertError(validate(doc), 'Dtype of property "Location_no" currently is "string", ' + 'but might fit dtype "2-tuple"!') + + self.assertError(validate(doc), 'Dtype of property "Coos_no" currently is "string", ' + 'but might fit dtype "3-tuple"!') + + self.assertError(validate(doc), 'Dtype of property "members_mislabelled" currently is ' + '"string", but might fit dtype "int"!') + + self.assertError(validate(doc), 'Dtype of property "potential_mislabelled" currently is ' + '"string", but might fit dtype "float"!') + + self.assertError(validate(doc), 'Dtype of property "dates_mislabelled" currently is ' + '"string", but might fit dtype "date"!') + + self.assertError(validate(doc), 'Dtype of property "datetimes_mislabelled" currently is ' + '"string", but might fit dtype "datetime"!') + + self.assertError(validate(doc), 'Dtype of property "times_mislabelled" currently is ' + '"string", but might fit dtype "time"!') + + self.assertError(validate(doc), 'Dtype of property "sent_mislabelled" currently is ' + '"string", but might fit dtype "boolean"!') + + self.assertError(validate(doc), 'Dtype of property "texts_mislabelled" currently is ' + '"string", but might fit dtype "text"!') + + self.assertError(validate(doc), 'Dtype of property "Location_mislabelled" currently is ' + '"string", but might fit dtype "2-tuple"!') + + self.assertError(validate(doc), 'Dtype of property "Coos_mislabelled" currently is ' + '"string", but might fit dtype "3-tuple"!')