-
Notifications
You must be signed in to change notification settings - Fork 0
/
NLP - Categories Testing Framework.step
1 lines (1 loc) · 41.8 KB
/
NLP - Categories Testing Framework.step
1
{"creationTimeStamp":"2023-12-13T17:17:34.377Z","modifiedTimeStamp":"2024-01-08T18:14:21.252Z","createdBy":"viya_admin","modifiedBy":"viya_admin","name":"NLP - Categories Testing Framework.step","displayName":"NLP - Categories Testing Framework.step","localDisplayName":"NLP - Categories Testing Framework.step","properties":{},"links":[{"method":"GET","rel":"self","href":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8","uri":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8","type":"application/vnd.sas.data.flow.step"},{"method":"GET","rel":"alternate","href":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8","uri":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8","type":"application/vnd.sas.data.flow.step.summary"},{"method":"GET","rel":"up","href":"/dataFlows/steps","uri":"/dataFlows/steps","type":"application/vnd.sas.collection","itemType":"application/vnd.sas.data.flow.step.summary"},{"method":"PUT","rel":"update","href":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8","uri":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8","type":"application/vnd.sas.data.flow.step","responseType":"application/vnd.sas.data.flow.step"},{"method":"DELETE","rel":"delete","href":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8","uri":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8"},{"method":"GET","rel":"transferExport","href":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8","uri":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8","responseType":"application/vnd.sas.transfer.object"},{"method":"PUT","rel":"transferImportUpdate","href":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8","uri":"/dataFlows/steps/c6906cb1-f6df-436c-a331-1b43dc4489b8","type":"application/vnd.sas.transfer.object","responseType":"application/vnd.sas.summary"}],"metadataVersion":0.0,"version":2,"type":"code","flowMetadata":{"inputPorts":[{"name":"inputTable","displayName":"inputTable","localDisplayName":"inputTable","minEntries":1,"maxEntries":1,"defaultEntries":0,"type":"table"}],"outputPorts":[{"name":"outputTable","displayName":"outputTable","localDisplayName":"outputTable","minEntries":1,"maxEntries":1,"defaultEntries":0,"type":"table","supportsView":false,"requiresStructure":false}]},"ui":"{\n\t\"showPageContentOnly\": true,\n\t\"pages\": [\n\t\t{\n\t\t\t\"id\": \"page1\",\n\t\t\t\"type\": \"page\",\n\t\t\t\"label\": \"Parameters\",\n\t\t\t\"children\": [\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"param_input\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Input Parameters\",\n\t\t\t\t\t\"open\": true,\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"inputTable\",\n\t\t\t\t\t\t\t\"type\": \"inputtable\",\n\t\t\t\t\t\t\t\"label\": \"Select input table:\",\n\t\t\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"analyticsProjectLocation\",\n\t\t\t\t\t\t\t\"type\": \"textfield\",\n\t\t\t\t\t\t\t\"label\": \"Analytics project caslib:\",\n\t\t\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"text1\",\n\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\"text\": \"Paste the location of your Visual Text Analytics project in Model Studio. Refer About tab for details on how to obtain the caslib.\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"modelBinary\",\n\t\t\t\t\t\t\t\"type\": \"textfield\",\n\t\t\t\t\t\t\t\"label\": \"Model binary:\",\n\t\t\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"text2\",\n\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\"text\": \"Provide name of the binary file. Refer About tab for details on how to identify the binary.\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"docId\",\n\t\t\t\t\t\t\t\"type\": \"columnselector\",\n\t\t\t\t\t\t\t\"label\": \"Select unique identifier:\",\n\t\t\t\t\t\t\t\"order\": false,\n\t\t\t\t\t\t\t\"columntype\": \"a\",\n\t\t\t\t\t\t\t\"max\": 1,\n\t\t\t\t\t\t\t\"min\": 1,\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"table\": \"inputTable\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"textVar\",\n\t\t\t\t\t\t\t\"type\": \"columnselector\",\n\t\t\t\t\t\t\t\"label\": \"Select text variable:\",\n\t\t\t\t\t\t\t\"order\": false,\n\t\t\t\t\t\t\t\"columntype\": \"c\",\n\t\t\t\t\t\t\t\"max\": 1,\n\t\t\t\t\t\t\t\"min\": 1,\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"table\": \"inputTable\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"actualVar\",\n\t\t\t\t\t\t\t\"type\": \"columnselector\",\n\t\t\t\t\t\t\t\"label\": \"Select 1 actual (ground truth) variable for assessment:\",\n\t\t\t\t\t\t\t\"order\": false,\n\t\t\t\t\t\t\t\"columntype\": \"a\",\n\t\t\t\t\t\t\t\"max\": 1,\n\t\t\t\t\t\t\t\"min\": 1,\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"table\": \"inputTable\"\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"section2\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Output Specifications\",\n\t\t\t\t\t\"open\": true,\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"outputTablePromote\",\n\t\t\t\t\t\t\t\"type\": \"checkbox\",\n\t\t\t\t\t\t\t\"label\": \"Automatically promote output table\",\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"enabled\": [\n\t\t\t\t\t\t\t\t\"true\",\n\t\t\t\t\t\t\t\t\"=\",\n\t\t\t\t\t\t\t\t\"false\"\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"outputTable\",\n\t\t\t\t\t\t\t\"type\": \"outputtable\",\n\t\t\t\t\t\t\t\"label\": \"Output table:\",\n\t\t\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t}\n\t\t\t]\n\t\t},\n\t\t{\n\t\t\t\"id\": \"about\",\n\t\t\t\"type\": \"page\",\n\t\t\t\"label\": \"About\",\n\t\t\t\"children\": [\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"about_description\",\n\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\"text\": \"NLP - Categories Testing Framework\\n=========================\\n\\nThis custom step facilitates testing, assessment and continuous improvement of a SAS Visual Text Analytics (VTA) categorization model. \\n\\nRapid AI prototyping improves time to value. Implement processes based on this custom step (and custom reporting templates provided in the same repo) to assess the current progress of your model.\\n\\nAn example application is to run this step in parallel with your NLP model development and visualize assessment output using SAS Visual Analytics.\\n\",\n\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"about_parameters\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Parameters\",\n\t\t\t\t\t\"open\": true,\n\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"parameters_text\",\n\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\"text\": \"This custom step requires a SAS Visual Text Analytics (VTA) license. \\n\\nIt runs on data loaded to a SAS Cloud Analytics Services (CAS) library (known as a caslib). Ensure you are connected to CAS before running this step. \\n\\nMake sure the step has permissions to write to the output caslib, and also that caslibs are assigned once you've established a connection to CAS.\\n\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"parameters_input\",\n\t\t\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\t\t\"label\": \"Input Parameters\",\n\t\t\t\t\t\t\t\"open\": 1,\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\"id\": \"input_parameters_text\",\n\t\t\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\t\t\"text\": \"1. Input table containing a text column (input port, required): attach a CAS table to this port.\\n\\n2. Analytics project caslib (text field, required): paste the location of your VTA project. Here's how you find the location:\\n\\n i. Open the project in Model Studio \\n ii. Navigate to the Data tab\\n iii. Copy the location from the properties menu (right-hand side). Feel free to copy the entire string instead of just the \\\"Analytics_project...\\\" portion.\\n\\n3. Model binary name (text field, required): paste the model binary filename here. To identify the model binary, \\n i. Right-click on your categories node and select Results.\\n ii. Copy the binary referred in the score code.\\n\\n4. Unique ID (column selector, required): select the unique identifier for each observation in your table.\\n\\n5. Text variable (column selector, required): select a character variable containing text you wish to analyze.\\n\\n6. Actuals (column selector, required): select a column to serve as the basis for assessment (also known as target, ground truth or category role).\",\n\t\t\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"parameters_output_specs\",\n\t\t\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\t\t\"label\": \"Output Specifications\",\n\t\t\t\t\t\t\t\"open\": 1,\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\"id\": \"output_parameters_text\",\n\t\t\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\t\t\"text\": \"This custom step creates a single output table and automatically promotes it to global scope. Promotion facilitates downstream analysis in SAS Visual Analytics. \\n\\n1. Output table (output table port, required): attach a CAS table. Note that you will obtain multiple observations per document ranging across candidate categories. \\n\\n2. Promote to global scope (checked): currently, the option to promote the output table is frozen. Future versions may provide you the flexibility to choose whether to promote or not.\\n\\nA selection of some important columns in the output table:\\n\\n1. Category_Level: all candidate categories which are assessed\\n2. Record_Type: a flag indicating whether the record is a True Positive, False Positive, True Negative or a False Negative\\n3. Unique_ID: a new unique identifier variable created for each observation in the output table to facilitate further discovery\\n4. _match_text_: keywords contributing to the categories matched in the output table, to facilitate qualitative analysis\\n\",\n\t\t\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"about_runtimecontrol\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Run-time Control\",\n\t\t\t\t\t\"open\": 0,\n\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"runtimecontrol_text\",\n\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\"text\": \"Note: Run-time control is optional. You may choose whether to execute the main code of this step or not, based on upstream conditions set by earlier SAS programs. This includes nodes run prior to this custom step earlier in a SAS Studio Flow, or a previous program in the same session.\\n\\nRefer this blog (https://communities.sas.com/t5/SAS-Communities-Library/Switch-on-switch-off-run-time-control-of-SAS-Studio-Custom-Steps/ta-p/885526) for more details on the concept.\\n\\nThe following macro variable,\\n\\n_nctf_run_trigger\\n\\nwill initialize with a value of 1 by default, indicating an \\\"enabled\\\" status and allowing the custom step to run.\\n\\nIf you wish to control execution of this custom step, include code in an upstream SAS program to set this variable to 0. This \\\"disables\\\" execution of the custom step.\\n\\nTo \\\"disable\\\" this step, run the following code upstream:\\n\\n%global _nctf_run_trigger;\\n%let _nctf_run_trigger =0;\\n\\nTo \\\"enable\\\" this step again, run the following (it's assumed that this has already been set as a global variable):\\n\\n%let _nctf_run_trigger =1;\\n\\nIMPORTANT: Be aware that disabling this step means that none of its main execution code will run, and any downstream code which was dependent on this code may fail. Change this setting only if it aligns with the objective of your SAS Studio program.\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"about_documentation\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Documentation\",\n\t\t\t\t\t\"open\": 0,\n\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"documentation_text\",\n\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\"text\": \"1. Documentation on the textRuleScore.applyCategory action: https://go.documentation.sas.com/doc/en/pgmsascdc/default/casvtapg/cas-textrulescore-applycategory.htm\\n\\n2. This custom step uses FedSQL to join CAS tables. The dynamic cardinality feature proves useful for joins involving 3+ tables (also taking their size into consideration). Thanks to Andy Therber, SAS, for details. Documentation: https://go.documentation.sas.com/doc/en/pgmsascdc/default/casfedsql/p0lrihvbn5xnfdn1a86poyhemp9f.htm\\n\\n3. Documentation on the fedsql.execDirect action: https://go.documentation.sas.com/doc/en/pgmsascdc/default/caspg/cas-fedsql-execdirect.htm\\n\\n4. From a use-case perspective, refer this (slightly old but still relevant) SAS Communities article for an example application: https://communities.sas.com/t5/SAS-Communities-Library/Priming-the-pump-for-better-risk-assessment/ta-p/565370\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"version_text\",\n\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\"text\": \"Version: 1.0 (03JAN2024)\",\n\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"contact_text\",\n\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\"text\": \"Created/contact: \\n\\n- Sundaresh Sankaran (sundaresh.sankaran@sas.com)\\n\\n- Renato Luppi (renato.luppi@sas.com)\\n\",\n\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t}\n\t\t\t]\n\t\t}\n\t],\n\t\"syntaxversion\": \"1.3.0\",\n\t\"values\": {\n\t\t\"inputTable\": {\n\t\t\t\"library\": \"\",\n\t\t\t\"table\": \"\"\n\t\t},\n\t\t\"analyticsProjectLocation\": \"\",\n\t\t\"modelBinary\": \"\",\n\t\t\"docId\": [],\n\t\t\"textVar\": [],\n\t\t\"actualVar\": [],\n\t\t\"outputTablePromote\": true,\n\t\t\"outputTable\": {\n\t\t\t\"library\": \"\",\n\t\t\t\"table\": \"\"\n\t\t}\n\t},\n\t\"columnExclusions\": [\n\t\t[\n\t\t\t\"textVar\",\n\t\t\t\"docId\"\n\t\t],\n\t\t[\n\t\t\t\"actualVar\",\n\t\t\t\"docId\"\n\t\t],\n\t\t[\n\t\t\t\"actualVar\",\n\t\t\t\"textVar\"\n\t\t]\n\t]\n}","templates":{"SAS":"/*-----------------------------------------------------------------------------------------*\n START MACRO DEFINITIONS.\n*------------------------------------------------------------------------------------------*/\n\n/* -----------------------------------------------------------------------------------------* \n Macro to create an error flag for capture during code execution.\n\n Input:\n 1. errorFlagName: The name of the error flag you wish to create. Ensure you provide a \n unique value to this parameter since it will be declared as a global variable.\n\n Output:\n 2. &errorFlagName : A global variable which takes the name provided to errorFlagName.\n\n Also available at: \n https://github.com/SundareshSankaran/sas_utility_programs/blob/main/code/Error%20Flag%20Creation/macro_create_error_flag.sas\n*------------------------------------------------------------------------------------------ */\n\n\n%macro _create_error_flag(errorFlagName);\n\n %global &errorFlagName.;\n %let &errorFlagName.=0;\n\n%mend _create_error_flag;\n\n/* -------------------------------------------------------------------------------------------* \n Macro to initialize a run-time trigger global macro variable to run SAS Studio Custom Steps. \n A value of 1 (the default) enables this custom step to run. A value of 0 (provided by \n upstream code) sets this to disabled.\n\n Input:\n 1. triggerName: The name of the runtime trigger you wish to create. Ensure you provide a \n unique value to this parameter since it will be declared as a global variable.\n\n Output:\n 2. &triggerName : A global variable which takes the name provided to triggerName.\n \n Also available at:\n https://github.com/SundareshSankaran/sas_utility_programs/blob/main/code/Create_Run_Time_Trigger/macro_create_runtime_trigger.sas\n*-------------------------------------------------------------------------------------------- */\n\n%macro _create_runtime_trigger(triggerName);\n\n %global &triggerName.;\n\n %if %sysevalf(%superq(&triggerName.)=, boolean) %then %do;\n \n %put NOTE: Trigger macro variable &triggerName. does not exist. Creating it now.;\n %let &triggerName.=1;\n\n %end;\n\n%mend _create_runtime_trigger;\n\n/*-----------------------------------------------------------------------------------------*\n Macro variable to capture indicator of a currently active CAS session\n*------------------------------------------------------------------------------------------*/\n\n%global casSessionExists;\n%global _current_uuid_;\n\n/*-----------------------------------------------------------------------------------------*\n Macro to capture indicator and UUIDof any currently active CAS session.\n UUID is not expensive and can be used in future to consider graceful reconnect.\n*------------------------------------------------------------------------------------------*/\n\n%macro _nctf_checkSession;\n %if %sysfunc(symexist(_SESSREF_)) %then %do;\n %let casSessionExists= %sysfunc(sessfound(&_SESSREF_.));\n %if &casSessionExists.=1 %then %do;\n proc cas;\n session.sessionId result = sessresults;\n call symputx(\"_current_uuid_\", sessresults[1]);\n %put NOTE: A CAS session &_SESSREF_. is currently active with UUID &_current_uuid_. ;\n quit;\n %end;\n %end;\n%mend _nctf_checkSession;\n\n/*-----------------------------------------------------------------------------------------*\n This macro creates a global macro variable called _usr_nameCaslib\n that contains the caslib name (aka. caslib-reference-name) associated with the libname \n and assumes that the libname is using the CAS engine.\n\n As sysvalue has a length of 1024 chars, we use the trimmed option in proc sql\n to remove leading and trailing blanks in the caslib name.\n From macro provided by Wilbram Hazejager\n*------------------------------------------------------------------------------------------*/\n\n%macro _usr_getNameCaslib(_usr_LibrefUsingCasEngine); \n\n %global _usr_nameCaslib;\n %let _usr_nameCaslib=;\n\n proc sql noprint;\n select sysvalue into :_usr_nameCaslib trimmed from dictionary.libnames\n where libname = upcase(\"&_usr_LibrefUsingCasEngine.\") and upcase(sysname)=\"CASLIB\";\n quit;\n\n%mend _usr_getNameCaslib;\n\n/* -----------------------------------------------------------------------------------------* \n Macro to split the Model Studio Project location and obtain the caslib portion for \n downstream use.\n\n Input:\n 1. projectLocation: A string provided by a text field in a SAS Studio Custom step.\n \n\n Output:\n 1. analyticsProjectCaslib: Set inside macro, a global variable indicating the caslib portion\n of the model studio path.\n also available at: https://raw.githubusercontent.com/SundareshSankaran/sas_utility_programs/main/code/Project_Caslib_from_Model_Studio_Location/project_caslib_from_model_studio.sas\n*------------------------------------------------------------------------------------------ */\n\n%macro _extract_caslib_from_ms_location(projectLocation);\n\n %global analyticsProjectCaslib;\n\n data _null_;\n if index(\"&projectLocation.\",\"/\") > 0 then do;\n call symput(\"analyticsProjectCaslib\",scan(\"&projectLocation.\",2,\"/\",\"MO\"));\n end;\n else do;\n call symput(\"analyticsProjectCaslib\",\"&projectLocation.\");\n end;\n run;\n\n %put NOTE: The macro variable analyticsProjectCaslib resolves to: &analyticsProjectCaslib.;\n\n%mend _extract_caslib_from_ms_location;\n\n/* -----------------------------------------------------------------------------------------* \n Macro to promote a CAS table.\n\n FUTURE PLACEHOLDER: The following functions will be converted to user defined functions\n instead of being called within the same proc cas block.\n\n Inputs:\n 1. casLib: A valid caslib.\n 2. casTable: A name of a CAS table.\n \n also available at: https://raw.githubusercontent.com/SundareshSankaran/sas_utility_programs/main/code/Promote%20a%20CAS%20Table/promote_cas_table.sas\n*------------------------------------------------------------------------------------------ */\n\n%macro _promote_cas_table(casLib, casTable, targetCaslib, targetTable);\n \n proc cas;\n\n function doesTableExist(_casLib, _casTable);\n table.tableExists result=tableExistsResultTable status=rc / \n caslib = _casLib, \n table = _casTable;\n tableExists = dictionary(tableExistsResultTable, \"exists\");\n print tableExists;\n return tableExists;\n end;\n\n function dropTableIfExists(casLib,casTable);\n print \"Entering dropTableIfExists\";\n print casLib;\n tableExists = doesTableExist(casLib, casTable);\n if tableExists != 0 then do;\n print \"Dropping table: \"||casLib||\".\"||casTable;\n table.dropTable status=rc / caslib=casLib, table=casTable, quiet=True;\n if rc.statusCode != 0 then do;\n exit();\n end;\n dropTableIfExists(casLib, casTable);\n end;\n end;\n\n function promoteTable(casLib, casTable, targetCaslib, targetTable); \n table.promote /\n target = targetTable,\n targetcaslib = targetCaslib,\n name = casTable,\n caslib = casLib\n ; \n end;\n\n dropTableIfExists(\"&targetCaslib.\", \"&targetTable.\");\n promoteTable(\"&casLib.\",\"&casTable.\", \"&targetCaslib.\", \"&targetTable.\");\n\n quit;\n\n%mend _promote_cas_table;\n\n\n\n/*-----------------------------------------------------------------------------------------*\n EXECUTION CODE MACRO \n*------------------------------------------------------------------------------------------*/\n\n%macro _nctf_main_execution_code;\n\n/*-----------------------------------------------------------------------------------------*\n Create an error flag. \n*------------------------------------------------------------------------------------------*/\n\n %_create_error_flag(_nctf_error_flag);\n\n/*-----------------------------------------------------------------------------------------*\n Check if an active CAS session exists. \n*------------------------------------------------------------------------------------------*/\n\n %_nctf_checkSession;\n\n %if &casSessionExists. = 0 %then %do;\n %put ERROR: A CAS session does not exist. Connect to a CAS session upstream. ;\n %let _nctf_error_flag = 1;\n %end;\n %else %do;\n\n/*-----------------------------------------------------------------------------------------*\n Check Input table libref to ensure it points to a valid caslib.\n*------------------------------------------------------------------------------------------*/\n\n %if &_nctf_error_flag. = 0 %then %do;\n\n %global inputCaslib;\n \n %_usr_getNameCaslib(&inputTable_lib.);\n %let inputCaslib=&_usr_nameCaslib.;\n %put NOTE: &inputCaslib. is the caslib for the input table.;\n %let _usr_nameCaslib=;\n\n %if \"&inputCaslib.\" = \"\" %then %do;\n %put ERROR: Input table caslib is blank. Check if Base table is a valid CAS table. ;\n %let _nctf_error_flag=1;\n %end;\n\n %end;\n\n/*-----------------------------------------------------------------------------------------*\n Check Output table libref to ensure it points to a valid caslib.\n*------------------------------------------------------------------------------------------*/\n\n %if &_nctf_error_flag. = 0 %then %do;\n\n %global outputCaslib;\n \n %_usr_getNameCaslib(&outputTable_lib.);\n %let outputCaslib=&_usr_nameCaslib.;\n %put NOTE: &outputCaslib. is the output caslib.;\n %let _usr_nameCaslib=;\n\n %if \"&outputCaslib.\" = \"\" %then %do;\n %put ERROR: Output table caslib is blank. Check if Output table is a valid CAS table. ;\n %let _nctf_error_flag=1;\n %end;\n\n %end;\n\n\n %if &_nctf_error_flag. = 0 %then %do;\n\n /*-----------------------------------------------------------------------------------------*\n Extract caslib from the Model Studio project location.\n *------------------------------------------------------------------------------------------*/\n\n %_extract_caslib_from_ms_location(&analyticsProjectLocation.);\n\n /*-----------------------------------------------------------------------------------------*\n Run CAS statements.\n *------------------------------------------------------------------------------------------*/\n\n proc cas;\n\n /*------------------------------------------------------------------------------------*\n Helper functions. As mentioned in previous note, a future placeholder is to convert\n these to user-defined functions and avoid having to declare them within every \n proc cas block.\n *------------------------------- -----------------------------------------------------*/\n\n function doesTableExist(_casLib, _casTable);\n table.tableExists result=tableExistsResultTable status=rc / \n caslib = _casLib, \n table = _casTable;\n tableExists = dictionary(tableExistsResultTable, \"exists\");\n print tableExists;\n return tableExists;\n end;\n\n function dropTableIfExists(casLib,casTable);\n print \"Entering dropTableIfExists\";\n print casLib;\n tableExists = doesTableExist(casLib, casTable);\n if tableExists != 0 then do;\n print \"Dropping table: \"||casLib||\".\"||casTable;\n table.dropTable status=rc / caslib=casLib, table=casTable, quiet=True;\n if rc.statusCode != 0 then do;\n exit();\n end;\n dropTableIfExists(casLib, casTable);\n end;\n end;\n\n /*------------------------------------------------------------------------------------*\n Obtain parameters from the UI\n *------------------------------- -----------------------------------------------------*/\n inputTableName = symget(\"inputTable_name_base\");\n inputCaslib = symget(\"inputCaslib\");\n analyticsProjectCaslib = symget(\"analyticsProjectCaslib\");\n modelBinary = symget(\"modelBinary\");\n docId = symget(\"docId\");\n textVar = symget(\"textVar\");\n outputCaslib = symget(\"outputCaslib\");\n outputTableName = symget(\"outputTable_name_base\");\n\n /*------------------------------------------------------------------------------------*\n Apply the NLP model (currently supports only categories)\n *------------------------------- -----------------------------------------------------*/\n textRuleScore.applyCategory/\n model = {caslib=analyticsProjectCaslib, name=modelBinary},\n table = {caslib=inputCaslib, name=inputTableName},\n docId = docId,\n text = textVar,\n casOut = {caslib=outputCaslib, name=\"__TEMP_CAT\", replace=TRUE},\n matchOut = {caslib=outputCaslib, name=\"__TEMP_MAT\", replace=TRUE}\n ;\n\n\n /*------------------------------------------------------------------------------------*\n note this query text string is depicted in separate strings to reduce chances of \n error (due to unbalanced quotation marks) which affects readability.\n *------------------------------- -----------------------------------------------------*/\n\n \n queryText_1 = \"create table &outputCaslib.._&outputTable_name. {options replace = True} as select a.*, b._category_, b._score_, b._match_text_, b._start_, b._end_ from (select &docId., &textVar., &actualVar. from &inputCaslib..&inputTable_name.) a \";\n queryText_2 = \" left join (select __TEMP_CAT.&docId., __TEMP_CAT._category_, __TEMP_CAT._score_, __TEMP_MAT._match_text_, __TEMP_MAT._start_, __TEMP_MAT._end_ from &outputCaslib..__TEMP_CAT\" ;\n queryText_3 = \" left join &outputCaslib..__TEMP_MAT on __TEMP_CAT.&docId. = __TEMP_MAT.&docId.) b on a.&docId. = b.&docId.\";\n\n /*------------------------------------------------------------------------------------*\n The following section on the use of the dynamic cardinality feature within\n fed sql is a very useful reference. Note that this is suitable for situations\n involving joins of 3+ tables (the case here) and also taking the size of table\n into consideration. Considering use / disuse of this flag based on any \n prior knowledge you might have about your category output tables.\n\n FUTURE PLACEHOLDER: Provide an option in the UI.\n\n https://go.documentation.sas.com/doc/en/pgmsascdc/default/casfedsql/p0lrihvbn5xnfdn1a86poyhemp9f.htm \n *------------------------------- -----------------------------------------------------*/\n\n fedsql.execDirect /\n cntl = {dynamicCardinality=TRUE},\n query = queryText_1||queryText_2||queryText_3\n ;\n\n /*------------------------------------------------------------------------------------*\n Create candidate category tables for assessment.\n *------------------------------- -----------------------------------------------------*/\n\n fedsql.execDirect / \n query = \"create table &outputCaslib..__TEMPCAT_1 {options replace = True} as select distinct &actualVar. as Category_Level from &inputCaslib..&inputTable_name. \";\n\n fedsql.execDirect / \n query = \"create table &outputCaslib..__TEMPCAT_2 {options replace = True} as select distinct scan(_category_,-1,'/') as Category_Level from &outputCaslib.._&outputTable_name.\";\n\n datastep.runCode /\n code = \"data &outputCaslib..__TEMPCAT_3 (replace = Yes) ; length Category_Level varchar(*); Category_Level = ''; if _threadid_ = 1;run;\";\n\n /*------------------------------------------------------------------------------------*\n For practical purposes, the number of records in the following temp tables\n allow us to append (and then subsequently select distinct records). A merge\n data step is preferred but may emit a warning about multiple lengths.\n *------------------------------- -----------------------------------------------------*/\n\n datastep.runCode /\n code = \"data &outputCaslib..__TEMPCAT (replace = Yes) ; set &outputCaslib..__TEMPCAT_1 &outputCaslib..__TEMPCAT_2 &outputCaslib..__TEMPCAT_3 ; run;\";\n\n /*------------------------------------------------------------------------------------*\n Create a 'candidate' dataset. Text categorization differs from other \n \"classification\" problems in that it may allow for multiple labels \n and even multiple truths in some rare cases. Every record is assessed as if it \n were a candidate for all possible categories. \n\n It's tempting to consider a nice single FedSQL query to output the final table,\n but we'll prioritise functionality over form and put in a \n FUTURE PLACEHOLDER: consider optimizing all FedSQL queries below.\n\n *------------------------------- -----------------------------------------------------*/\n\n fedsql.execDirect / \n query = \"select distinct Category_Level from &outputCaslib..__TEMPCAT\";\n\n\n fedsql.execDirect / \n query = \"create table &outputCaslib..__TEMP1 {options replace = True} as select a.*, b.* from (select distinct &docId. from &inputCaslib..&inputTable_name.) a, (select distinct Category_Level from &outputCaslib..__TEMPCAT) b \";\n \n fedsql.execDirect /\n query = \"create table &outputCaslib..__TEMP2 {options replace = True} as select a.*, b.nbr_predictions from (select * from &outputCaslib..__TEMP1) a left join (select &docId., scan(_category_,-1,'/') as Category_Level, count(*) as nbr_predictions from &outputCaslib.._&outputTable_name. group by &docId., _category_) b on a.&docId. = b.&docId. and a.Category_Level = b.Category_Level\";\n\n fedsql.execDirect /\n query = \"create table &outputCaslib..__TEMP3 {options replace = True} as select a.*, b.nbr_actuals from (select * from &outputCaslib..__TEMP1) a left join (select &docId., &actualVar. as Category_Level, count(*) as nbr_actuals from &outputCaslib.._&outputTable_name. group by &docId., &actualVar.) b on a.&docId. = b.&docId. and a.Category_Level = b.Category_Level\";\n\n fedsql.execDirect /\n query = \"create table &outputCaslib..__TEMP4 {options replace = True} as select a.*, b.nbr_actuals from (select * from &outputCaslib..__TEMP2) a inner join (select * from &outputCaslib..__TEMP3) b on a.&docId. = b.&docId. and a.Category_Level = b.Category_Level\";\n\n /*------------------------------------------------------------------------------------*\n Delete intermediate tables.\n *------------------------------- -----------------------------------------------------*/\n\n dropTableIfExists(\"&outputCaslib.\", \"__TEMP_CAT\");\n dropTableIfExists(\"&outputCaslib.\", \"__TEMP_MAT\");\n dropTableIfExists(\"&outputCaslib.\", \"__TEMPCAT_1\");\n dropTableIfExists(\"&outputCaslib.\", \"__TEMPCAT_2\");\n dropTableIfExists(\"&outputCaslib.\", \"__TEMPCAT_3\");\n dropTableIfExists(\"&outputCaslib.\", \"__TEMPCAT\");\n dropTableIfExists(\"&outputCaslib.\", \"__TEMP1\");\n dropTableIfExists(\"&outputCaslib.\", \"__TEMP2\");\n dropTableIfExists(\"&outputCaslib.\", \"__TEMP3\");\n\n\n quit;\n \n data &outputCaslib..__TEMP4;\n length Record_Type $15.;\n set &outputCaslib..__TEMP4;\n /*------------------------------------------------------------------------------------*\n Calculation of True Positives\n \"Win the lottery.\"\n *------------------------------- -----------------------------------------------------*/\n if nbr_predictions > 0 and nbr_actuals > 0 then do;\n Record_Type = \"True Positive\";\n naive_true_positives = nbr_predictions;\n end;\n /*------------------------------------------------------------------------------------*\n Calculation of False Positives\n \"Make a wrong call.\"\n *------------------------------- -----------------------------------------------------*/\n if nbr_predictions > nbr_actuals then do;\n Record_Type = \"False Positive\";\n naive_false_positives = max(0,nbr_predictions) - max(0,nbr_actuals);\n end;\n /*------------------------------------------------------------------------------------*\n Calculation of True Negatives\n \"Don't consider the unlikely.\"\n *------------------------------- -----------------------------------------------------*/\n if nbr_predictions = . and nbr_actuals = . then do;\n Record_Type = \"True Negative\";\n end;\n /*------------------------------------------------------------------------------------*\n Calculation of False Negatives\n \"Ignore this at your peril.\"\n *------------------------------- -----------------------------------------------------*/\n if nbr_predictions = . and nbr_actuals > 0 then do;\n Record_Type = \"False Negative\";\n naive_false_negatives = nbr_actuals;\n end;\n\n run;\n\n proc cas;\n\n fedsql.execDirect /\n query = \"create table &outputCaslib.._&outputTable_name. {options replace = True} as select a.*, b.&textVar., b.&actualVar., b._category_, b._score_, b._match_text_, b._start_, b._end_ from &outputCaslib..__TEMP4 a left join &outputCaslib.._&outputTable_name. b on a.&docId. = b.&docId. and a.Category_Level = scan(b._category_,-1,'/') \";\n\n /*------------------------------------------------------------------------------------*\n Unique IDs per observation is useful for downstream analysis in VA.\n Unfortunately, the generateIds action doesn't allow for adding a unique ID\n to the same table. Therefore, we make a copy and then promote.\n *------------------------------- -----------------------------------------------------*/\n\n \n textManagement.generateIds / \n casOut={name=\"__&outputTable_name.\", caslib=\"&outputCaslib.\", replace=TRUE },\n id=\"Unique_Obs_ID\",\n table={name=\"_&outputTable_name.\", caslib=\"&outputCaslib.\"}\n ;\n\n quit;\n\n /*------------------------------------------------------------------------------------*\n FUTURE PLACEHOLDER: Make promotion an option (even though VA is a preferred \n application for analysis)\n *------------------------------- -----------------------------------------------------*/\n\n %_promote_cas_table(&outputCaslib., __&outputTable_name., &outputCaslib., &outputTable_name. );\n\n\n proc cas;\n\n function doesTableExist(_casLib, _casTable);\n table.tableExists result=tableExistsResultTable status=rc / \n caslib = _casLib, \n table = _casTable;\n tableExists = dictionary(tableExistsResultTable, \"exists\");\n print tableExists;\n return tableExists;\n end;\n\n function dropTableIfExists(casLib,casTable);\n print \"Entering dropTableIfExists\";\n print casLib;\n tableExists = doesTableExist(casLib, casTable);\n if tableExists != 0 then do;\n print \"Dropping table: \"||casLib||\".\"||casTable;\n table.dropTable status=rc / caslib=casLib, table=casTable, quiet=True;\n if rc.statusCode != 0 then do;\n exit();\n end;\n dropTableIfExists(casLib, casTable);\n end;\n end;\n\n\n dropTableIfExists(\"&outputCaslib.\", \"__TEMP4\");\n dropTableIfExists(\"&outputCaslib.\", \"_&outputTable_name.\");\n dropTableIfExists(\"&outputCaslib.\", \"__&outputTable_name.\");\n\n quit;\n\n %end;\n\n %end;\n\n%mend _nctf_main_execution_code;\n\n/*-----------------------------------------------------------------------------------------*\n END MACRO DEFINITIONS.\n*------------------------------------------------------------------------------------------*/\n\n\n/*-----------------------------------------------------------------------------------------*\n EXECUTION CODE\n The execution code is controlled by the trigger variable defined in this custom step. This\n trigger variable is in an \"enabled\" (value of 1) state by default, but in some cases, as \n dictated by logic, could be set to a \"disabled\" (value of 0) state.\n*------------------------------------------------------------------------------------------*/\n/*-----------------------------------------------------------------------------------------*\n Create a run-time trigger. \n*------------------------------------------------------------------------------------------*/\n\n%_create_runtime_trigger(_nctf_run_trigger);\n\n%if &_nctf_run_trigger. = 1 %then %do;\n\n %_nctf_main_execution_code;\n\n%end;\n%if &_nctf_run_trigger. = 0 %then %do;\n\n %put NOTE: This step has been disabled. Nothing to do.;\n\n%end;\n\n\n/*-----------------------------------------------------------------------------------------*\n Clean up existing macro variables and macro definitions.\n*------------------------------------------------------------------------------------------*/\n%if %symexist(_nctf_error_flag) %then %do;\n %symdel _nctf_error_flag;\n%end;\n\n%if %symexist(casSessionExists) %then %do;\n %symdel casSessionExists;\n%end;\n\n%if %symexist(_nctf_run_trigger) %then %do;\n %symdel _nctf_run_trigger;\n%end;\n\n%if %symexist(_current_uuid) %then %do;\n %symdel _current_uuid;\n%end;\n\n%if %symexist(_usr_nameCaslib) %then %do;\n %symdel _usr_nameCaslib;\n%end;\n\n%if %symexist(analyticsProjectCaslib) %then %do;\n %symdel analyticsProjectCaslib;\n%end;\n\n%if %symexist(inputCaslib) %then %do;\n %symdel inputCaslib;\n%end;\n\n%if %symexist(outputCaslib) %then %do;\n %symdel outputCaslib;\n%end;\n\n%sysmacdelete _create_error_flag;\n%sysmacdelete _create_runtime_trigger;\n%sysmacdelete _nctf_checkSession;\n%sysmacdelete _usr_getNameCaslib;\n%sysmacdelete _extract_caslib_from_ms_location;\n%sysmacdelete _promote_cas_table;\n%sysmacdelete _nctf_main_execution_code;\n"}}