Skip to content

Commit

Permalink
added HemOnc to the automation
Browse files Browse the repository at this point in the history
  • Loading branch information
Timur committed May 20, 2021
1 parent 050c873 commit 172a9bc
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 1 deletion.
22 changes: 22 additions & 0 deletions working/packages/vocabulary_download/bash_functions_hemonc.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
DO $_$
DECLARE
z text;
BEGIN
z:=$FUNCTIONBODY$
CREATE OR REPLACE FUNCTION vocabulary_download.get_hemonc_prepare (iPath text, iFilename text)
RETURNS void AS
$BODY$#!/bin/bash
#set permissions=775 by default
umask 002

#move result to original folder
cd "$1"
rm -f *.*
mv work/*.tab .
$BODY$
LANGUAGE 'plsh'
SECURITY DEFINER;
$FUNCTIONBODY$;
--convert CRLF to LF for bash
EXECUTE REPLACE(z,E'\r','');
END $_$;
175 changes: 175 additions & 0 deletions working/packages/vocabulary_download/get_hemonc.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
CREATE OR REPLACE FUNCTION vocabulary_download.get_hemonc(
iOperation text default null,
out session_id int4,
out last_status INT,
out result_output text
)
AS
$BODY$
DECLARE
pVocabularyID constant text:='HEMONC';
pVocabulary_auth vocabulary_access.vocabulary_auth%type;
pVocabulary_url vocabulary_access.vocabulary_url%type;
pVocabulary_login vocabulary_access.vocabulary_login%type;
pVocabulary_pass vocabulary_access.vocabulary_pass%type;
pVocabularySrcDate date;
pVocabularySrcVersion text;
pVocabularyNewDate date;
pVocabularyNewVersion text;
pCookie text;
pContent text;
pDownloadURL text;
pDownloadURL2 text;
auth_hidden_param varchar(10000);
pErrorDetails text;
pVocabularyOperation text;
pJumpToOperation text; --ALL (default), JUMP_TO_HEMONC_PREPARE, JUMP_TO_HEMONC_IMPORT
cRet text;
CRLF constant text:=E'\r\n';
pSession int4;
pVocabulary_load_path text;
z record;
BEGIN
pVocabularyOperation:='GET_HEMONC';
select nextval('vocabulary_download.log_seq') into pSession;

select new_date, new_version, src_date, src_version
into pVocabularyNewDate, pVocabularyNewVersion, pVocabularySrcDate, pVocabularySrcVersion
from vocabulary_pack.CheckVocabularyUpdate(pVocabularyID);

set local search_path to vocabulary_download;

perform write_log (
iVocabularyID=>pVocabularyID,
iSessionID=>pSession,
iVocabulary_operation=>pVocabularyOperation||' started',
iVocabulary_status=>0
);

if pVocabularyNewDate is null then raise exception '% already updated',pVocabularyID; end if;

if iOperation is null then pJumpToOperation:='ALL'; else pJumpToOperation:=iOperation; end if;
if iOperation not in ('ALL', 'JUMP_TO_HEMONC_PREPARE', 'JUMP_TO_HEMONC_IMPORT') then raise exception 'Wrong iOperation %',iOperation; end if;

if not pg_try_advisory_xact_lock(hashtext(pVocabularyID)) then raise exception 'Processing of % already started',pVocabularyID; end if;

select var_value||pVocabularyID into pVocabulary_load_path from devv5.config$ where var_name='vocabulary_load_path';

if pJumpToOperation='ALL' then
--get credentials
select vocabulary_auth, vocabulary_url, vocabulary_login, vocabulary_pass
into pVocabulary_auth, pVocabulary_url, pVocabulary_login, pVocabulary_pass from devv5.vocabulary_access where vocabulary_id=pVocabularyID and vocabulary_order=1;

pDownloadURL := SUBSTRING(pVocabulary_url,'^(https?://([^/]+))')||SUBSTRING(http_content,'<a href="(/dataset.xhtml\?persistentId=.+?)"><span style=.+?>HemOnc ontology</span></a>') from py_http_get(url=>pVocabulary_url,allow_redirects=>true);

pDownloadURL2 := 'https://dataverse.harvard.edu/api/access/datafile/'||SUBSTRING(LOWER(http_content),'.+<a href="/file.xhtml\?fileid=([\d]+).+?">.+?concept_relationship_stage\.tab.+?</a>.+') from py_http_get(url=>pDownloadURL,allow_redirects=>true);

--start downloading concept_relationship_stage
pVocabularyOperation:='GET_HEMONC concept_relationship_stage downloading';
perform run_wget (
iPath=>pVocabulary_load_path,
iFilename=>'concept_relationship_stage.tab',
iDownloadLink=>pDownloadURL2
);
perform write_log (
iVocabularyID=>pVocabularyID,
iSessionID=>pSession,
iVocabulary_operation=>'GET_HEMONC downloading complete',
iVocabulary_status=>1
);

pDownloadURL2 := 'https://dataverse.harvard.edu/api/access/datafile/'||SUBSTRING(LOWER(http_content),'.+<a href="/file.xhtml\?fileid=([\d]+).+?">.+?concept_stage\.tab.+?</a>.+') from py_http_get(url=>pDownloadURL,allow_redirects=>true);
--start downloading concept_stage
pVocabularyOperation:='GET_HEMONC concept_stage downloading';
perform run_wget (
iPath=>pVocabulary_load_path,
iFilename=>'concept_stage.tab',
iDownloadLink=>pDownloadURL2,
iDeleteAll=>0
);
perform write_log (
iVocabularyID=>pVocabularyID,
iSessionID=>pSession,
iVocabulary_operation=>'GET_HEMONC downloading complete',
iVocabulary_status=>1
);

pDownloadURL2 := 'https://dataverse.harvard.edu/api/access/datafile/'||SUBSTRING(LOWER(http_content),'.+<a href="/file.xhtml\?fileid=([\d]+).+?">.+?concept_synonym_stage\.tab.+?</a>.+') from py_http_get(url=>pDownloadURL,allow_redirects=>true);
--start downloading concept_synonym_stage
pVocabularyOperation:='GET_HEMONC concept_synonym_stage downloading';
perform run_wget (
iPath=>pVocabulary_load_path,
iFilename=>'concept_synonym_stage.tab',
iDownloadLink=>pDownloadURL2,
iDeleteAll=>0
);
perform write_log (
iVocabularyID=>pVocabularyID,
iSessionID=>pSession,
iVocabulary_operation=>'GET_HEMONC downloading complete',
iVocabulary_status=>1
);
end if;

if pJumpToOperation in ('ALL','JUMP_TO_HEMONC_PREPARE') then
pJumpToOperation:='ALL';
--extraction
pVocabularyOperation:='GET_HEMONC prepare';
perform get_hemonc_prepare (
iPath=>pVocabulary_load_path,
iFilename=>lower(pVocabularyID)
);
perform write_log (
iVocabularyID=>pVocabularyID,
iSessionID=>pSession,
iVocabulary_operation=>'GET_HEMONC prepare complete',
iVocabulary_status=>1
);
end if;

if pJumpToOperation in ('ALL','JUMP_TO_HEMONC_IMPORT') then
pJumpToOperation:='ALL';
--finally we have all input tables, we can start importing
pVocabularyOperation:='GET_HEMONC load_input_tables';
perform sources.load_input_tables(pVocabularyID,pVocabularyNewDate,pVocabularyNewVersion);
perform write_log (
iVocabularyID=>pVocabularyID,
iSessionID=>pSession,
iVocabulary_operation=>'GET_HEMONC load_input_tables complete',
iVocabulary_status=>1
);
end if;

perform write_log (
iVocabularyID=>pVocabularyID,
iSessionID=>pSession,
iVocabulary_operation=>'GET_HEMONC all tasks done',
iVocabulary_status=>3
);

session_id:=pSession;
last_status:=3;
result_output:=to_char(pVocabularySrcDate,'YYYYMMDD')||' -> '||to_char(pVocabularyNewDate,'YYYYMMDD')||', '||pVocabularySrcVersion||' -> '||pVocabularyNewVersion;
return;

EXCEPTION WHEN OTHERS THEN
get stacked diagnostics cRet = pg_exception_context;
cRet:='ERROR: '||SQLERRM||CRLF||'CONTEXT: '||cRet;
set local search_path to vocabulary_download;
perform write_log (
iVocabularyID=>pVocabularyID,
iSessionID=>pSession,
iVocabulary_operation=>pVocabularyOperation,
iVocabulary_error=>cRet,
iError_details=>pErrorDetails,
iVocabulary_status=>2
);

session_id:=pSession;
last_status:=2;
result_output:=cRet;
return;
END;
$BODY$
LANGUAGE 'plpgsql'
SECURITY DEFINER;
2 changes: 1 addition & 1 deletion working/packages/vocabulary_pack/CheckVocabularyUpdate.sql
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ BEGIN
cVocabVer := SUBSTRING (LOWER(cVocabHTML),'.+?<h3>version actuelle</h3><div class="telechargement_bas"><h4>ccam version ([\d.]+)</h4>.+');
WHEN cVocabularyName = 'HEMONC'
THEN
cVocabDate := TO_DATE (SUBSTRING (LOWER(cVocabHTML),'.+?>hemonc ontology ([\d-]+)</span>.+'),'yyyy-mm-dd');
cVocabDate := TO_DATE (SUBSTRING (LOWER(cVocabHTML),'.+?>hemonc ontology</span>.+?<span class="text-muted">(.+?)</span>.+'),'month dd, yyyy');
cVocabVer := 'HemOnc '||to_char(cVocabDate,'yyyy-mm-dd');
WHEN cVocabularyName = 'DMD'
THEN
Expand Down

0 comments on commit 172a9bc

Please sign in to comment.