Skip to content

Commit

Permalink
Merge pull request #41 from EGA-archive/bugfix/fix-dataout-view
Browse files Browse the repository at this point in the history
Upgrade database structure for data-out
  • Loading branch information
silverdaz committed Mar 7, 2019
2 parents 1dc0e12 + f501fc7 commit e6aa534
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 199 deletions.
192 changes: 76 additions & 116 deletions deploy/images/db/download.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,35 @@ CREATE SCHEMA local_ega_download;

SET search_path TO local_ega_download;

CREATE TABLE local_ega_download.status (
id INTEGER,
code VARCHAR(32) NOT NULL,
description TEXT,
-- contraints
PRIMARY KEY(id), UNIQUE (id), UNIQUE (code)
);

INSERT INTO local_ega_download.status(id,code,description)
VALUES (10, 'INIT' , 'Initializing a download request'),
(20, 'REENCRYPTING', 'Re-Encrypting the header for a given user'),
(30, 'STREAMING' , 'Streaming file from the Archive'),
(40, 'DONE' , 'Download completed'), -- checksums are in the Crypt4GH formatted file
-- and validated by the decryptor
(0, 'ERROR' , 'An Error occured, check the error table');


CREATE TABLE local_ega_download.main (
CREATE TABLE local_ega_download.requests (
id SERIAL, PRIMARY KEY(id), UNIQUE (id),

-- which files was downloaded
file_id INTEGER NOT NULL REFERENCES local_ega.main(id), -- No "ON DELETE CASCADE"
start_coordinate BIGINT DEFAULT 0,
end_coordinate BIGINT NULL, -- might be missing

-- Status/Progress
status VARCHAR NOT NULL REFERENCES local_ega_download.status (code), -- No "ON DELETE CASCADE"
-- DEFAULT 'INIT' ?
-- user info
user_info TEXT NULL,
client_ip TEXT NULL,

-- Stats
start_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT clock_timestamp(),
end_timestamp TIMESTAMP,
bytes INTEGER DEFAULT 0,
speed FLOAT DEFAULT 0.0, -- bytes per seconds

-- table logs
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT clock_timestamp(),
last_modified TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT clock_timestamp()
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT clock_timestamp()
);


-- Insert new request, and return some archive information
CREATE TYPE request_type AS (req_id INTEGER, -- local_ega_download.main.id%TYPE,
file_id INTEGER, -- local_ega.archive_files.id%TYPE,
header TEXT, -- local_ega.archive_files.header%TYPE,
archive_path TEXT, -- local_ega.archive_files.archive_file_reference%TYPE,
archive_type local_ega.storage);--local_ega.archive_files.archive_file_type%TYPE);

CREATE FUNCTION make_request(sid local_ega.main.stable_id%TYPE)
CREATE TYPE request_type AS (req_id INTEGER,
header TEXT,
archive_path TEXT,
archive_type local_ega.storage,
file_size INTEGER,
unencrypted_checksum VARCHAR,
unencrypted_checksum_type local_ega.checksum_algorithm);

CREATE FUNCTION make_request(sid local_ega.main.stable_id%TYPE,
uinfo local_ega_download.requests.user_info%TYPE,
cip local_ega_download.requests.client_ip%TYPE,
scoord local_ega_download.requests.start_coordinate%TYPE DEFAULT 0,
ecoord local_ega_download.requests.end_coordinate%TYPE DEFAULT NULL)
RETURNS request_type AS $make_request$
#variable_conflict use_column
DECLARE
Expand All @@ -57,106 +39,84 @@ DECLARE
rid INTEGER;
BEGIN

-- Find the file
SELECT * INTO archive_rec FROM local_ega.archive_files WHERE stable_id = sid LIMIT 1;

IF archive_rec IS NULL THEN
RAISE EXCEPTION 'Archive file not found for stable_id: % ', sid;
RAISE EXCEPTION 'archived file not found for stable_id: % ', sid;
END IF;

INSERT INTO local_ega_download.main (file_id, status)
VALUES (archive_rec.id, 'INIT')
RETURNING local_ega_download.main.id INTO rid;

req.req_id := rid;
req.file_id := archive_rec.id;
req.header := archive_rec.header;
req.archive_path := archive_rec.archive_file_reference;
req.archive_type := archive_rec.archive_file_type;
-- New entry, or reuse old entry
INSERT INTO local_ega_download.requests (file_id, user_info, client_ip, start_coordinate, end_coordinate)
VALUES (archive_rec.file_id, uinfo, cip, scoord, ecoord)
ON CONFLICT (id) DO NOTHING
RETURNING local_ega_download.requests.id INTO rid;

-- result
req.req_id := rid;
req.header := archive_rec.header;
req.archive_path := archive_rec.archive_path;
req.archive_type := archive_rec.archive_type;
req.file_size := archive_rec.archive_filesize;
req.unencrypted_checksum := archive_rec.unencrypted_checksum;
req.unencrypted_checksum_type := archive_rec.unencrypted_checksum_type;
RETURN req;
END;
$make_request$ LANGUAGE plpgsql;

-- When there is an updated, remember the timestamp
CREATE FUNCTION download_updated()
RETURNS TRIGGER AS $download_updated$
BEGIN
NEW.last_modified = clock_timestamp();
RETURN NEW;
END;
$download_updated$ LANGUAGE plpgsql;

CREATE TRIGGER download_updated AFTER UPDATE ON local_ega_download.main FOR EACH ROW EXECUTE PROCEDURE download_updated();
CREATE TABLE local_ega_download.success (
id SERIAL, PRIMARY KEY(id), UNIQUE (id),

-- which requested file it was
req_id INTEGER NOT NULL REFERENCES local_ega_download.requests(id), -- No "ON DELETE CASCADE"

-- Stats
bytes BIGINT DEFAULT 0,
speed FLOAT DEFAULT 0.0, -- bytes per seconds

-- table logs
occured_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT clock_timestamp()
);


-- Mark a download as complete, and calculate the speed
CREATE FUNCTION download_complete(reqid local_ega_download.main.id%TYPE,
dlsize local_ega_download.main.bytes%TYPE)
RETURNS void AS $download_complete$
DECLARE
fid local_ega.main.id%TYPE;
curr_timestamp TIMESTAMP;
CREATE FUNCTION download_complete(rid local_ega_download.requests.id%TYPE,
dlsize local_ega_download.success.bytes%TYPE,
s local_ega_download.success.speed%TYPE)
RETURNS void AS $insert_success$
BEGIN
curr_timestamp := clock_timestamp();
UPDATE local_ega_download.main
SET status = 'DONE',
end_timestamp = curr_timestamp,
bytes = dlsize,
speed = dlsize / extract( epoch from (curr_timestamp - start_timestamp)) -- extract (epoch for interval) = elapsed seconds
-- now pray for no div by zero
WHERE id = reqid
RETURNING file_id INTO fid;

-- turn off active errors
UPDATE local_ega_download.main_errors SET active = FALSE WHERE file_id = fid;
INSERT INTO local_ega_download.success(req_id,bytes,speed)
VALUES(rid,dlsize,s);
END;
$download_complete$ LANGUAGE plpgsql;
$insert_success$ LANGUAGE plpgsql;


-- ##################################################
-- ERRORS
-- ##################################################
CREATE TABLE local_ega_download.main_errors (
id SERIAL, PRIMARY KEY(id), UNIQUE (id),
active BOOLEAN NOT NULL DEFAULT TRUE,
file_id INTEGER NOT NULL REFERENCES local_ega.main(id), -- ON DELETE CASCADE,
req_id INTEGER NOT NULL REFERENCES local_ega_download.main(id), -- ON DELETE CASCADE,

code TEXT NOT NULL,
description TEXT NOT NULL,

client_ip TEXT, -- where from
hostname TEXT, -- where it happened

-- table logs
occured_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT clock_timestamp()
);
CREATE TABLE local_ega_download.errors (
id SERIAL, PRIMARY KEY(id), UNIQUE (id),
req_id INTEGER NOT NULL REFERENCES local_ega_download.requests(id), -- ON DELETE CASCADE,

-- Just showing the current/active errors
CREATE VIEW local_ega_download.errors AS
SELECT id, code, description, client_ip, hostname, occured_at FROM local_ega_download.main_errors
WHERE active = TRUE;

CREATE FUNCTION insert_error(req_id local_ega_download.main.id%TYPE,
h local_ega_download.errors.hostname%TYPE,
etype local_ega_download.errors.code%TYPE,
msg local_ega_download.errors.description%TYPE,
client_ip local_ega_download.errors.client_ip%TYPE)
RETURNS void AS $download_error$
DECLARE
fid local_ega_download.main.file_id%TYPE;
BEGIN
code TEXT NOT NULL,
description TEXT NOT NULL,

UPDATE local_ega_download.main
SET status = 'ERROR'
WHERE id = req_id
RETURNING file_id INTO fid;

IF fid IS NULL THEN
RAISE EXCEPTION 'Request id not found: %', req_id;
END IF;
-- where it happened
hostname TEXT,

INSERT INTO local_ega_download.main_errors (file_id,req_id,hostname,code,description,client_ip)
VALUES (fid,req_id, h, etype, msg, client_ip);
-- table logs
occured_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT clock_timestamp()
);

CREATE FUNCTION insert_error(rid local_ega_download.requests.id%TYPE,
h local_ega_download.errors.hostname%TYPE,
etype local_ega_download.errors.code%TYPE,
msg local_ega_download.errors.description%TYPE)
RETURNS void AS $insert_error$
BEGIN
INSERT INTO local_ega_download.errors (req_id,hostname,code,description)
VALUES (rid, h, etype, msg);
END;
$download_error$ LANGUAGE plpgsql;
$insert_error$ LANGUAGE plpgsql;

106 changes: 34 additions & 72 deletions deploy/images/db/ebi.sql
Original file line number Diff line number Diff line change
@@ -1,85 +1,47 @@
CREATE SCHEMA local_ega_ebi;

SET search_path TO local_ega_ebi;

-- Special view for EBI Data-Out
CREATE VIEW local_ega.ebi_files AS
SELECT id AS file_id,
stable_id AS file_name,
archive_file_reference AS file_path,
archive_file_type AS file_type,
archive_file_size AS file_size,
archive_file_checksum AS unencrypted_checksum,
archive_file_checksum_type AS unencrypted_checksum_type,
header AS header,
created_by AS created_by,
last_modified_by AS last_updated_by,
created_at AS created,
last_modified AS last_updated
CREATE VIEW local_ega_ebi.file AS
SELECT stable_id AS file_id,
archive_file_reference AS file_name,
archive_file_reference AS file_path,
reverse(split_part(reverse(submission_file_path::text), '/'::text, 1)) AS display_file_name,
archive_file_size AS file_size,
NULL::text AS checksum,
NULL::text AS checksum_type,
archive_file_checksum AS unencrypted_checksum,
archive_file_checksum_type AS unencrypted_checksum_type,
status AS file_status,
header AS header
FROM local_ega.main
WHERE status = 'READY';


-- Relation File <-> Index File
CREATE TABLE local_ega.index_files (
id SERIAL, PRIMARY KEY(id), UNIQUE (id),

file_id INTEGER NOT NULL REFERENCES local_ega.main (id) ON DELETE CASCADE,

index_file_reference TEXT NOT NULL, -- file path if POSIX, object id if S3
index_file_type local_ega.storage -- S3 or POSIX file system
);

-- Relation File EGAF <-> Dataset EGAD
CREATE TABLE local_ega.file2dataset (
CREATE TABLE local_ega_ebi.filedataset (
id SERIAL, PRIMARY KEY(id), UNIQUE (id),

file_id INTEGER NOT NULL REFERENCES local_ega.main (id) ON DELETE CASCADE, -- not stable_id

dataset_id TEXT NOT NULL
dataset_stable_id TEXT NOT NULL
);

-- Event
CREATE TABLE local_ega.event (
event_id SERIAL, UNIQUE (event_id),

client_ip varchar(45) NOT NULL,

event varchar(256) NOT NULL,

event_type varchar(256) NOT NULL,

email varchar(256) NOT NULL,

created timestamp NOT NULL DEFAULT now(),

CONSTRAINT event_pkey PRIMARY KEY (event_id)
-- This view was created to be in sync with the entity eu.elixir.ega.ebi.downloader.domain.entity.FileDataset
-- which uses a view and has an @Id annotation in file_id
CREATE VIEW local_ega_ebi.file_dataset AS
SELECT m.stable_id AS file_id, dataset_stable_id as dataset_id FROM local_ega_ebi.filedataset fd
INNER JOIN local_ega.main m ON fd.file_id=m.id;

-- Relation File <-> Index File
CREATE TABLE local_ega_ebi.fileindexfile (
id SERIAL, PRIMARY KEY(id), UNIQUE (id),
file_id INTEGER NOT NULL REFERENCES local_ega.main (id) ON DELETE CASCADE, -- not stable_id
index_file_id TEXT,
index_file_reference TEXT NOT NULL, -- file path if POSIX, object id if S3
index_file_type local_ega.storage -- S3 or POSIX file system
);

-- Download Log
CREATE TABLE local_ega.download_log (
download_log_id SERIAL, PRIMARY KEY(download_log_id), UNIQUE (download_log_id),

client_ip varchar(45) NOT NULL,

api varchar(45) NOT NULL,

email varchar(256) NOT NULL,

file_id varchar(15) NOT NULL,

download_speed float8 NOT NULL DEFAULT '-1'::integer,

download_status varchar(256) NOT NULL DEFAULT 'success'::character varying,

encryption_type varchar(256) NOT NULL,

start_coordinate int8 NOT NULL DEFAULT 0,

end_coordinate int8 NOT NULL DEFAULT 0,

bytes int8 NOT NULL DEFAULT 0,

created timestamp NOT NULL DEFAULT now(),

token_source varchar(255) NOT NULL

);
-- This view was created to be in sync with the entity eu.elixir.ega.ebi.downloader.domain.entity.FileIndexFile
-- which seems to use a view and has an @Id annotation in file_id
CREATE VIEW local_ega_ebi.file_index_file AS
SELECT m.stable_id AS file_id, index_file_id FROM local_ega_ebi.fileindexfile fif
INNER JOIN local_ega.main m ON fif.file_id=m.id;
10 changes: 5 additions & 5 deletions deploy/images/db/grants.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ GRANT USAGE ON SCHEMA local_ega TO lega_in, lega_out;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA local_ega TO lega_in; -- Read/Write access on local_ega.* for lega_in
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA local_ega TO lega_in; -- Don't forget the sequences
GRANT SELECT ON local_ega.archive_files TO lega_out; -- Read-Only access for lega_out
GRANT SELECT ON local_ega.ebi_files TO lega_out; -- Used by EBI
GRANT SELECT ON local_ega.index_files TO lega_out; -- Used by EBI
GRANT SELECT ON local_ega.file2dataset TO lega_out; -- Used by EBI
GRANT SELECT ON local_ega.event TO lega_out; -- Used by EBI
GRANT SELECT ON local_ega.download_log TO lega_out; -- Used by EBI

-- Set up rights access for audit schema
GRANT USAGE ON SCHEMA local_ega_download TO lega_out;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA local_ega_download TO lega_out; -- Read/Write on audit.* for lega_out
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA local_ega_download TO lega_out; -- Don't forget the sequences

-- Set up rights access for local_ega_ebi schema
GRANT USAGE ON SCHEMA local_ega_ebi TO lega_out;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA local_ega_ebi TO lega_out;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA local_ega_ebi TO lega_out;
15 changes: 9 additions & 6 deletions deploy/images/db/main.sql
Original file line number Diff line number Diff line change
Expand Up @@ -277,14 +277,17 @@ CREATE TRIGGER mark_ready

-- View on the archive files
CREATE VIEW local_ega.archive_files AS
SELECT id,
stable_id,
archive_file_reference,
archive_file_type,
header
SELECT id AS file_id
, stable_id AS stable_id
, archive_file_reference AS archive_path
, archive_file_type AS archive_type
, archive_file_size AS archive_filesize
, archive_file_checksum AS unencrypted_checksum
, archive_file_checksum_type AS unencrypted_checksum_type
, header AS header
, version AS version
FROM local_ega.main
WHERE status = 'READY';

-- ##########################################################################
-- About the encryption
-- ##########################################################################
Expand Down

0 comments on commit e6aa534

Please sign in to comment.