diff --git a/.gitignore b/.gitignore index bb3e30b2a..1e3537bf3 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,6 @@ scripts/importer/downloaded_files postgres-data # local personal things personal +# travis test remnants +master-schema.sql +settings.json.tmp diff --git a/.travis.yml b/.travis.yml index e9d8926e4..9323e2edd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,3 +10,8 @@ install: - pip install coverage coveralls script: - test/travis_script.sh +addons: + postgresql: "10" + apt: + packages: + - postgresql-client-10 diff --git a/backend/test.py b/backend/test.py index 4834e81e8..6eb43006a 100644 --- a/backend/test.py +++ b/backend/test.py @@ -101,7 +101,7 @@ def tearDown(self): try: u = db.User.select(db.User).where(db.User.email==self.USER).get() try: - u.access.get().delete_instance() + u.dataset_access.get().delete_instance() except peewee.PeeweeException: pass try: diff --git a/settings_sample.json b/settings_sample.json index a42391034..7eef537e3 100644 --- a/settings_sample.json +++ b/settings_sample.json @@ -5,11 +5,17 @@ "googleSecret" : "a secret from google", "redirectUri" : "https://google oauth redirect uri", - "postgresHost": "postgres host", - "postgresPort": 5432, - "postgresUser": "postgres", - "postgresPass": "", - "postgresName": "", + "mysqlHost" : "127.0.0.1", + "mysqlPasswd" : "password", + "mysqlSchema" : "swefreq", + "mysqlUser" : "swefreq", + "mysqlPort" : 3306, + + "postgresHost" : "postgres host", + "postgresPort" : 5432, + "postgresUser" : "postgres", + "postgresPass" : "", + "postgresName" : "", "mongoHost" : "mongodb host", "mongoPassword" : "password", diff --git a/sql/beacon_schema.sql b/sql/beacon_schema.sql deleted file mode 100644 index a43a41af6..000000000 --- a/sql/beacon_schema.sql +++ /dev/null @@ -1,47 +0,0 @@ -------------------------------------------------------------------------------- --- --- - --------------------------------------------------------------------------------- --- Beacon consent codes. --- --- These tables are only used by the beacon, and are thus copied directly from --- the default beacon schema. - -CREATE TABLE beacon.consent_code_category_table ( - id serial PRIMARY KEY, - name character varying(11) -); - -INSERT INTO beacon.consent_code_category_table(name) VALUES ('PRIMARY'); -INSERT INTO beacon.consent_code_category_table(name) VALUES ('SECONDARY'); -INSERT INTO beacon.consent_code_category_table(name) VALUES ('REQUIREMENT'); - -CREATE TABLE beacon.consent_code_table ( - id serial PRIMARY KEY, - name character varying(100) NOT NULL, - abbr character varying(20) NOT NULL, - description character varying(400) NOT NULL, - additional_constraint_required boolean NOT NULL, - category_id int NOT NULL REFERENCES beacon.consent_code_category_table(id) -); - -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('No restrictions', 'NRES', 'No restrictions on data use.', false, 1); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('General research use and clinical care', 'GRU(CC)', 'For health/medical/biomedical purposes, including the study of population origins or ancestry.', false, 1); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Health/medical/biomedical research and clinical care', 'HMB(CC)', 'Use of the data is limited to health/medical/biomedical purposes; does not include the study of population origins or ancestry.', false, 1); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Disease-specific research and clinical care', 'DS-[XX](CC)', 'Use of the data must be related to [disease].', true, 1); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Population origins/ancestry research', 'POA', 'Use of the data is limited to the study of population origins or ancestry.', false, 1); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Oher research-specific restrictions', 'RS-[XX]', 'Use of the data is limited to studies of [research type] (e.g., pediatric research).', true, 2); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Research use only', 'RUO', 'Use of data is limited to research purposes (e.g., does not include its use in clinical care).', false, 2); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('No “general methods” research', 'NMDS', 'Use of the data includes methods development research (e.g., development of software or algorithms) ONLY within the bounds of other data use limitations.', false, 2); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Genetic studies only', 'GSO', 'Use of the data is limited to genetic studies only (i.e., no “phenotype-only” research).', false, 2); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Not-for-profit use only', 'NPU', 'Use of the data is limited to not-for-profit organizations.', false, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Publication required', 'PUB', 'Requestor agrees to make results of studies using the data available to the larger scientific community.', false, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Collaboration required', 'COL-[XX]', 'Requestor must agree to collaboration with the primary study investigator(s).', true, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Ethics approval required', 'IRB', 'Requestor must provide documentation of local IRB/REC approval.', false, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Geographical restrictions', 'GS-[XX]', 'Use of the data is limited to within [geographic region].', true, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Publication moratorium/embargo', 'MOR-[XX]', 'Requestor agrees not to publish results of studies until [date].', true, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Time limits on use', 'TS-[XX]', 'Use of data is approved for [x months].', true, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('User-specific restrictions', 'US', 'Use of data is limited to use by approved users.', false, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Project-specific restrictions', 'PS', 'Use of data is limited to use within an approved project.', false, 3); -INSERT INTO beacon.consent_code_table(name, abbr, description, additional_constraint_required, category_id) VALUES ('Institution-specific restrictions', 'IS', 'Use of data is limited to use within an approved institution.', false, 3); diff --git a/sql/data_schema.sql b/sql/data_schema.sql index 94fcd2d42..3b192034c 100644 --- a/sql/data_schema.sql +++ b/sql/data_schema.sql @@ -5,6 +5,7 @@ -- (reference-data, variants, and coverage) the goes into the Swefreq system. -- -- -- -------------------------------------------------------------------------------- +CREATE SCHEMA IF NOT EXISTS data; -------------------------------------------------------------------------------- -- dbSNP tables. @@ -22,13 +23,12 @@ CREATE TABLE IF NOT EXISTS data.dbsnp_versions ( CREATE TABLE IF NOT EXISTS data.dbsnp ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, version_id integer REFERENCES data.dbsnp_versions, - rsid bigint UNIQUE, + rsid bigint, chrom varchar(10), - pos integer + pos integer, + UNIQUE(version_id, rsid) ); -CREATE INDEX IF NOT EXISTS rsid_index ON data.dbsnp USING hash (rsid); - -------------------------------------------------------------------------------- -- Reference Set tables -- @@ -36,6 +36,7 @@ CREATE INDEX IF NOT EXISTS rsid_index ON data.dbsnp USING hash (rsid); CREATE TABLE IF NOT EXISTS data.reference_sets ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, dbsnp_version integer REFERENCES data.dbsnp_versions, + reference_build varchar UNIQUE, -- should be ^(GRCh[0-9]+([.]p[0-9]+)?)$ reference_name varchar, ensembl_version varchar, gencode_version varchar, @@ -49,7 +50,6 @@ CREATE TABLE IF NOT EXISTS data.genes ( gene_id varchar(15), gene_name varchar, full_name varchar, - other_names varchar[], canonical_transcript varchar(15), chrom varchar(10), start_pos integer, @@ -57,6 +57,12 @@ CREATE TABLE IF NOT EXISTS data.genes ( strand varchar ); +CREATE TABLE IF NOT EXISTS data.gene_other_names ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + gene integer REFERENCES data.genes, + name varchar +); + CREATE TABLE IF NOT EXISTS data.transcripts ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, transcript_id varchar(15), @@ -111,6 +117,7 @@ CREATE TABLE IF NOT EXISTS data.datasets ( full_name varchar(100) NOT NULL, browser_uri varchar(200) DEFAULT NULL, beacon_uri varchar(200) DEFAULT NULL, + beacon_description text DEFAULT NULL, avg_seq_depth real DEFAULT NULL, seq_type varchar(50) DEFAULT NULL, seq_tech varchar(50) DEFAULT NULL, @@ -143,7 +150,9 @@ CREATE TABLE IF NOT EXISTS data.dataset_versions ( available_from timestamp DEFAULT current_timestamp, ref_doi varchar(100) DEFAULT NULL, data_contact_name varchar(100) DEFAULT NULL, - data_contact_link varchar(100) DEFAULT NULL + data_contact_link varchar(100) DEFAULT NULL, + num_variants integer DEFAULT NULL, + coverage_levels integer[] DEFAULT NULL ); CREATE TABLE IF NOT EXISTS data.dataset_files ( @@ -161,14 +170,13 @@ CREATE TABLE IF NOT EXISTS data.dataset_files ( CREATE TABLE IF NOT EXISTS data.variants ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, dataset_version integer REFERENCES data.dataset_versions, + variant_type varchar, -- variants go here `"enum": ["DEL", "INS", "DUP", "INV", "CNV", "SNP", "DUP:TANDEM", "DEL:ME", "INS:ME"]` rsid integer, chrom varchar(10), pos integer, ref varchar, alt varchar, site_quality real, - genes varchar[], - transcripts varchar[], orig_alt_alleles varchar[], hom_count integer, allele_freq real, @@ -180,6 +188,18 @@ CREATE TABLE IF NOT EXISTS data.variants ( vep_annotations jsonb ); +CREATE TABLE IF NOT EXISTS data.variant_genes ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + variant integer REFERENCES data.variants, + gene integer REFERENCES data.genes +); + +CREATE TABLE IF NOT EXISTS data.variant_transcripts ( + id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, + variant integer REFERENCES data.variants, + transcript integer REFERENCES data.transcripts +); + CREATE TABLE IF NOT EXISTS data.coverage ( id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, dataset_version integer REFERENCES data.dataset_versions, @@ -187,15 +207,7 @@ CREATE TABLE IF NOT EXISTS data.coverage ( pos integer, mean real, median real, - cov1 real, - cov5 real, - cov10 real, - cov15 real, - cov20 real, - cov25 real, - cov30 real, - cov50 real, - cov100 real + coverage real[] ); CREATE TABLE IF NOT EXISTS data.metrics ( @@ -205,3 +217,25 @@ CREATE TABLE IF NOT EXISTS data.metrics ( mids integer[], hist integer ); + +-------------------------------------------------------------------------------- +-- Data views +-- + +CREATE OR REPLACE VIEW data.dataset_version_current AS + SELECT * FROM data.dataset_versions + WHERE (dataset, id) + IN (SELECT dataset, MAX(id) FROM data.dataset_versions + WHERE available_from < now() + GROUP BY dataset); + +-------------------------------------------------------------------------------- +-- Indexes +-- + +CREATE INDEX variant_pos ON data.variants (pos); +CREATE INDEX dbsnp_chrom_pos ON data.dbsnp (chrom, pos); +CREATE INDEX coverage_pos_chrom ON data.coverage (chrom, pos); +CREATE INDEX variants_rsid ON data.variants (rsid); +CREATE INDEX variants_chrom_pos ON data.variants (chrom, pos); +CREATE INDEX transcripts_transcript_id ON data.transcripts (transcript_id); diff --git a/sql/schema.dot b/sql/schema.dot deleted file mode 100644 index c085353e6..000000000 --- a/sql/schema.dot +++ /dev/null @@ -1,108 +0,0 @@ -digraph { - ranksep=1 nodesep=1 rankdir=BT - node [ shape = none ] - - user [ label=< - - - - - - -
user
user_pkint
nametext|null
emailtext
affiliationtext|null
countrytext|null
> ]; - - dataset [ label=< - - - - - - - - - - - -
dataset
dataset_pkint
sample_set_pkint
name text
browser_uri text|null
beacon_uri text|null
avg_seq_depth float|null
seq_type text|null
seq_tech text|null
seq_center text|null
dataset_size uint|null
> ]; - - user_log [ label=< - - - - - - -
user_log
user_log_pkint
user_pkint
dataset_pkint
actionenum|null
tstimestamp
> ]; - - dataset_access [ label=< - - - - - - - - -
dataset_access
dataset_access_pkint
dataset_pkint
user_pkint
wants_newsletterbool|false
is_adminbool|false
has_consentedbool|false
has_accessbool|false
> ]; - - - dataset_logo [ label=< - - - - - -
dataset_logo
dataset_logo_pkint
dataset_pkint
mimetypetext
datablob
> ]; - - dataset_version [ label=< - - - - - - - - - -
dataset_version
dataset_version_pkint
dataset_pkint
versiontext
tstimestamp
is_currentbool|true
descriptiontext
termstext
var_call_reftext|null
> ]; - - dataset_file [ label=< - - - - - -
dataset_file
dataset_file_pkint
dataset_version_pkint
nametext
uritext
> ]; - - study [ label=< - - - - - - - - - - -
study
study_pkint
pi_nametext
pi_emailtext
contact_nametext
contact_emailtext
titletext
descriptiontext|null
tstimestamp
ref_doitext|null
> ]; - - sample_set [ label=< - - - - - - -
sample_set
sample_set_pkint
study_pkint
ethnicitytext|null
collectiontext|null
sample_sizeint
> ]; - - sample_set:study_pk -> study:pk; - dataset:sample_set_pk -> sample_set:pk; - user_log:user_pk -> user:pk; - dataset_access:user -> user:pk - dataset_access:dataset -> dataset:pk - dataset_version:dataset -> dataset:pk - dataset_file:dv -> dataset_version:pk - user_log:dataset -> dataset:pk - dataset_logo:dataset -> dataset:pk -} diff --git a/sql/schema.dot.png b/sql/schema.dot.png deleted file mode 100644 index aa918f5ea..000000000 Binary files a/sql/schema.dot.png and /dev/null differ diff --git a/sql/swefreq.sql b/sql/swefreq.sql deleted file mode 100644 index b253e21a0..000000000 --- a/sql/swefreq.sql +++ /dev/null @@ -1,262 +0,0 @@ --- Script for creating the swefreq tables. To run this file use: --- mysql databasename revoked.ts) - GROUP BY granted.user_pk, granted.dataset_pk, granted.action - ); - -CREATE OR REPLACE VIEW dataset_access_pending AS - SELECT DISTINCT - access.*, - FALSE AS has_access, - request.ts AS access_requested - FROM dataset_access AS access - JOIN ( SELECT user_pk, dataset_pk, MAX(ts) AS ts - FROM user_access_log WHERE action = "access_requested" - GROUP BY user_pk, dataset_pk ) AS request - ON access.user_pk = request.user_pk AND - access.dataset_pk = request.dataset_pk - WHERE (access.user_pk, access.dataset_pk) IN ( - -- get user_pk for all users that have pending access requests - SELECT requested.user_pk, requested.dataset_pk - FROM _user_access_log_summary AS requested - LEFT JOIN _user_access_log_summary AS granted - ON requested.user_pk = granted.user_pk AND - requested.dataset_pk = granted.dataset_pk AND - granted.action = 'access_granted' - LEFT JOIN _user_access_log_summary AS revoked - ON requested.user_pk = revoked.user_pk AND - requested.dataset_pk = revoked.dataset_pk AND - revoked.action = 'access_revoked' - WHERE requested.action = 'access_requested' AND - (granted.user_pk IS NULL OR requested.ts > granted.ts) AND - (revoked.user_pk IS NULL OR requested.ts > revoked.ts) - GROUP BY requested.user_pk, requested.dataset_pk, requested.action - ); - -CREATE TABLE IF NOT EXISTS dataset_logo ( - dataset_logo_pk INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT, - dataset_pk INTEGER NOT NULL, - mimetype VARCHAR(50) NOT NULL, - data MEDIUMBLOB NOT NULL, - CONSTRAINT UNIQUE (dataset_pk), - CONSTRAINT FOREIGN KEY (dataset_pk) REFERENCES dataset(dataset_pk) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; - -CREATE TABLE IF NOT EXISTS linkhash ( - linkhash_pk INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT, - dataset_version_pk INTEGER NOT NULL, - user_pk INTEGER NOT NULL, - hash VARCHAR(64) NOT NULL, - expires_on TIMESTAMP NOT NULL, - CONSTRAINT UNIQUE (hash), - CONSTRAINT FOREIGN KEY (dataset_version_pk) - REFERENCES dataset_version(dataset_version_pk), - CONSTRAINT FOREIGN KEY (user_pk) REFERENCES user(user_pk) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; - --- dataset_version_current, a view that only contains the (most) current --- version of each entry dataset_version - -CREATE OR REPLACE VIEW dataset_version_current AS - SELECT * FROM dataset_version - WHERE (dataset_pk, dataset_version_pk) IN ( - SELECT dataset_pk, MAX(dataset_version_pk) FROM dataset_version - WHERE available_from < now() - GROUP BY dataset_pk ); diff --git a/sql/user_schema.sql b/sql/user_schema.sql index 6ca1f32f8..ccefa8197 100644 --- a/sql/user_schema.sql +++ b/sql/user_schema.sql @@ -6,6 +6,8 @@ -- -- -------------------------------------------------------------------------------- +CREATE SCHEMA IF NOT EXISTS users; + -------------------------------------------------------------------------------- -- User fields -- @@ -73,3 +75,69 @@ CREATE TABLE IF NOT EXISTS users.user_download_log ( dataset_file integer NOT NULL REFERENCES data.dataset_files, ts timestamp NOT NULL DEFAULT current_timestamp ); + +-------------------------------------------------------------------------------- +-- User views +-- + +CREATE OR REPLACE VIEW users.user_access_log_summary AS + SELECT MAX(id) AS id, + user_id, + dataset, + "action", + MAX(ts) AS ts + FROM users.user_access_log + GROUP BY user_id, dataset, "action" +; + +CREATE OR REPLACE VIEW users.dataset_access_current AS + SELECT DISTINCT + access.*, + TRUE AS has_access, + request.ts AS access_requested + FROM users.dataset_access AS access + JOIN ( SELECT user_id, dataset, MAX(ts) AS ts + FROM users.user_access_log WHERE action = 'access_requested' + GROUP BY user_id, dataset ) AS request + ON access.user_id = request.user_id AND + access.dataset = request.dataset + WHERE (access.user_id, access.dataset) IN ( + SELECT granted.user_id, granted.dataset + FROM users.user_access_log_summary AS granted + LEFT JOIN users.user_access_log_summary AS revoked + ON granted.user_id = revoked.user_id AND + granted.dataset = revoked.dataset AND + revoked.action = 'access_revoked' + WHERE granted.action = 'access_granted' AND + (revoked.user_id IS NULL OR granted.ts > revoked.ts) + GROUP BY granted.user_id, granted.dataset, granted.action + ); + +CREATE OR REPLACE VIEW users.dataset_access_pending AS + SELECT DISTINCT + access.*, + FALSE AS has_access, + request.ts AS access_requested + FROM users.dataset_access AS access + JOIN ( SELECT user_id, dataset, MAX(ts) AS ts + FROM users.user_access_log WHERE action = 'access_requested' + GROUP BY user_id, dataset ) AS request + ON access.user_id = request.user_id AND + access.dataset = request.dataset + WHERE (access.user_id, access.dataset) IN ( + -- get user_id for all users that have pending access requests + SELECT requested.user_id, requested.dataset + FROM users.user_access_log_summary AS requested + LEFT JOIN users.user_access_log_summary AS granted + ON requested.user_id = granted.user_id AND + requested.dataset = granted.dataset AND + granted.action = 'access_granted' + LEFT JOIN users.user_access_log_summary AS revoked + ON requested.user_id = revoked.user_id AND + requested.dataset = revoked.dataset AND + revoked.action = 'access_revoked' + WHERE requested.action = 'access_requested' AND + (granted.user_id IS NULL OR requested.ts > granted.ts) AND + (revoked.user_id IS NULL OR requested.ts > revoked.ts) + GROUP BY requested.user_id, requested.dataset, requested.action + ); diff --git a/test/data/clean_dummy_data.sql b/test/data/clean_dummy_data.sql index 1e9e3dba5..6355b8491 100644 --- a/test/data/clean_dummy_data.sql +++ b/test/data/clean_dummy_data.sql @@ -1,21 +1,23 @@ -- Delete test data -DELETE FROM user_access_log WHERE user_pk > 1000000 OR dataset_pk > 1000000; -DELETE FROM dataset_access WHERE user_pk > 1000000 OR dataset_pk > 1000000; -DELETE FROM user WHERE user_pk > 1000000; -DELETE FROM dataset_file WHERE dataset_file_pk > 1000000; -DELETE FROM dataset_version WHERE dataset_pk > 1000000; -DELETE FROM sample_set WHERE sample_set_pk > 1000000; -DELETE FROM collection WHERE collection_pk > 1000000; -DELETE FROM dataset WHERE dataset_pk > 1000000; -DELETE FROM study WHERE study_pk > 1000000; +DELETE FROM users.user_access_log WHERE id > 1000000 OR dataset > 1000000; +DELETE FROM users.dataset_access WHERE id > 1000000 OR dataset > 1000000; +DELETE FROM users.users WHERE id > 1000000; +DELETE FROM data.dataset_files WHERE id > 1000000; +DELETE FROM data.dataset_versions WHERE id > 1000000; +DELETE FROM data.sample_sets WHERE id > 1000000; +DELETE FROM data.datasets WHERE id > 1000000; +DELETE FROM data.reference_sets WHERE id > 1000000; +DELETE FROM data.dbsnp_versions WHERE id > 1000000; +DELETE FROM data.collections WHERE id > 1000000; +DELETE FROM data.studies WHERE id > 1000000; -- Reset auto increment counters -ALTER TABLE user_access_log AUTO_INCREMENT = 1; -ALTER TABLE dataset_access AUTO_INCREMENT = 1; -ALTER TABLE user AUTO_INCREMENT = 1; -ALTER TABLE dataset_file AUTO_INCREMENT = 1; -ALTER TABLE dataset_version AUTO_INCREMENT = 1; -ALTER TABLE collection AUTO_INCREMENT = 1; -ALTER TABLE sample_set AUTO_INCREMENT = 1; -ALTER TABLE dataset AUTO_INCREMENT = 1; -ALTER TABLE study AUTO_INCREMENT = 1; +ALTER SEQUENCE data.dataset_files_id_seq RESTART WITH 1; +ALTER SEQUENCE data.dataset_versions_id_seq RESTART WITH 1; +ALTER SEQUENCE data.collections_id_seq RESTART WITH 1; +ALTER SEQUENCE data.sample_sets_id_seq RESTART WITH 1; +ALTER SEQUENCE data.datasets_id_seq RESTART WITH 1; +ALTER SEQUENCE data.studies_id_seq RESTART WITH 1; +ALTER SEQUENCE users.users_id_seq RESTART WITH 1; +ALTER SEQUENCE users.user_access_log_id_seq RESTART WITH 1; +ALTER SEQUENCE users.dataset_access_id_seq RESTART WITH 1; diff --git a/test/data/load_dummy_data.sql b/test/data/load_dummy_data.sql index d8238201b..1277d8796 100644 --- a/test/data/load_dummy_data.sql +++ b/test/data/load_dummy_data.sql @@ -1,35 +1,49 @@ -INSERT INTO study (study_pk, pi_name, pi_email, contact_name, contact_email, title, description, publication_date, ref_doi) +-- dbSNP tables. + +INSERT INTO data.dbsnp_versions (id, version_id) + VALUES (1000001, 'dummy 1'), + (1000002, 'dummy 2'); + +-- Reference Set tables + +INSERT INTO data.reference_sets (id, dbsnp_version, reference_build, reference_name, ensembl_version, gencode_version, dbnsfp_version, omim_version) + VALUES (1000001, 1000002, 'GRCh1p2', 'Dummyman', 'homo_sapiens_core_0_3', '11', 'b142', 'ominfo'), + (1000002, 1000001, 'GRCh2p1', 'Mummydam', 'homo_sapiens_core_1_2', '23', 'b131', 'omimeme'); + +-- Study and Dataset fields + +INSERT INTO data.studies (id, pi_name, pi_email, contact_name, contact_email, title, study_description, publication_date, ref_doi) VALUES (1000001, 'PI_STUDY1', 'pi1@example.com', 'Contact Study 1', 'contact1@example.com', 'Study 1', 'Study 1 description', '2017-01-01', 'study1DOI'), (1000002, 'PI_STUDY2', 'pi2@example.com', 'Contact Study 2', 'contact2@example.com', 'Study 2', 'Study 2 description', '2017-02-01', 'study2DOI'); -INSERT INTO collection (collection_pk, name, ethnicity) VALUES +INSERT INTO data.collections (id, study_name, ethnicity) VALUES (1000001, 'Collection1', 'CollEth1'), (1000002, 'Collection2', 'CollEth2'), (1000003, 'Collection3', 'CollEth3'); -INSERT INTO dataset (dataset_pk, study_pk, short_name, full_name, browser_uri, beacon_uri, avg_seq_depth, seq_type, seq_tech, seq_center, dataset_size, mongodb_collection) - VALUES (1000001, 1000001, 'Dataset 1', 'Dataset 1 Long name', 'http://example.com/browser1', 'http://example.com/beacon1', 1.0, 'SeqType1', 'SeqTech1', 'SeqCenter1', 1001, 'na'), - (1000002, 1000002, 'Dataset 2', 'Dataset 2 Long name', 'http://example.com/browser2', 'http://example.com/beacon2', 2.0, 'SeqType2', 'SeqTech2', 'SeqCenter2', 1002, 'na'); +INSERT INTO data.datasets (id, study, reference_set, short_name, full_name, browser_uri, beacon_uri, beacon_description, avg_seq_depth, seq_type, seq_tech, seq_center, dataset_size) + VALUES (1000001, 1000001, 1000001, 'Dataset 1', 'Dataset 1 Long name', 'http://example.com/browser1', 'http://example.com/beacon1', 'Dummy Dataset 1', 1.0, 'SeqType1', 'SeqTech1', 'SeqCenter1', 1001), + (1000002, 1000002, 1000002, 'Dataset 2', 'Dataset 2 Long name', 'http://example.com/browser2', 'http://example.com/beacon2', 'Dummy Dataset 2', 2.0, 'SeqType2', 'SeqTech2', 'SeqCenter2', 1002); -INSERT INTO sample_set (sample_set_pk, dataset_pk, collection_pk, sample_size, phenotype) +INSERT INTO data.sample_sets (id, dataset, "collection", sample_size, phenotype) VALUES (1000001, 1000001, 1000001, 10, 'SamplePheno1'), (1000002, 1000001, 1000002, 15, 'SamplePheno2 Coll1'), (1000003, 1000002, 1000003, 20, 'SamplePheno2 Coll2'); -INSERT INTO dataset_version (dataset_version_pk, dataset_pk, version, description, terms, var_call_ref, available_from, ref_doi, data_contact_name, data_contact_link) - VALUES (1000001, 1000001, 'Version 1-1', 'Dataset 1-1, description', 'Dataset 1-1, terms', 'CallRef11', '2017-01-01', 'datset11DOI', "Gunnar Green", "gunnar.green@example.com"), - (1000002, 1000002, 'Version 2-1', 'Dataset 2-1, description', 'Dataset 2-1, terms', 'CallRef21', '2017-02-01', 'datset21DOI', NULL, NULL), - (1000003, 1000002, 'Version 2-2', 'Dataset 2-2, description', 'Dataset 2-2, terms', 'CallRef22', '2017-02-02', 'datset22DOI', "Strummer project", "https://example.com/strummer"), - (1000004, 1000002, 'InvVer 2-3', 'Dataset 2-3, description', 'Dataset 2-3, terms', 'CallRef23', '2030-02-03', 'datset23DOI', "Drummer project", "https://example.com/drummer"); +INSERT INTO data.dataset_versions (id, dataset, dataset_version, dataset_description, terms, var_call_ref, available_from, ref_doi, data_contact_name, data_contact_link, num_variants, coverage_levels) + VALUES (1000001, 1000001, 'Version 1-1', 'Dataset 1-1, description', 'Dataset 1-1, terms', 'CallRef11', '2017-01-01', 'datset11DOI', 'Gunnar Green', 'gunnar.green@example.com', 10, ARRAY[1,5,10]), + (1000002, 1000002, 'Version 2-1', 'Dataset 2-1, description', 'Dataset 2-1, terms', 'CallRef21', '2017-02-01', 'datset21DOI', NULL, NULL, 100, ARRAY[1,5,10]), + (1000003, 1000002, 'Version 2-2', 'Dataset 2-2, description', 'Dataset 2-2, terms', 'CallRef22', '2017-02-02', 'datset22DOI', 'Strummer project', 'https://example.com/strummer', 1000, ARRAY[1,5,10]), + (1000004, 1000002, 'InvVer 2-3', 'Dataset 2-3, description', 'Dataset 2-3, terms', 'CallRef23', '2030-02-03', 'datset23DOI', 'Drummer project', 'https://example.com/drummer', 10000, ARRAY[1,5,10]); -INSERT INTO dataset_file(dataset_file_pk, dataset_version_pk, name, uri, bytes) +INSERT INTO data.dataset_files(id, dataset_version, basename, uri, file_size) VALUES (1000001, 1000001, 'File11-1', '/release/file111.txt', 100), (1000002, 1000001, 'File11-2', '/release/file112.txt', 100000), (1000003, 1000002, 'File21-1', '/release/file211.txt', 1000000000), (1000004, 1000003, 'File22-1', '/release/file221.txt', 973826482736), (1000005, 1000004, 'File23-1', '/release/file231.txt', 239847293874293874); -INSERT INTO user(user_pk, name, email, affiliation, country, identity, identity_type) VALUES +INSERT INTO users.users(id, username, email, affiliation, country, identity, identity_type) VALUES (1000100, 'Not req yet', 'email0', 'i', '', 'email0', 'elixir'), (1000101, 'Requested access', 'email1', 'w1', '', 'email1', 'google'), (1000102, 'Approved access', 'email2', 'c1', '', 'email2', 'elixir'), @@ -47,15 +61,15 @@ INSERT INTO user(user_pk, name, email, affiliation, country, identity, identity_ (1000114, 'Admin2', 'admin2', 'Rootspace', '', 'admin2', 'elixir'), (1000115, 'Admin12', 'admin12', 'Rootspace', '', 'admin12', 'google'); -INSERT INTO dataset_access(user_pk, dataset_pk) VALUES +INSERT INTO users.dataset_access(user_id, dataset) VALUES (1000100, 1000001), (1000101, 1000001), (1000102, 1000001), (1000103, 1000001), (1000104, 1000001), (1000105, 1000001), (1000106, 1000001), (1000107, 1000001), (1000108, 1000001), (1000108, 1000002), (1000109, 1000001), (1000109, 1000002), (1000110, 1000001), (1000110, 1000002), (1000111, 1000001), (1000111, 1000002), (1000112, 1000001), (1000112, 1000002); -INSERT INTO dataset_access(user_pk, dataset_pk, is_admin) VALUES - (1000113, 1000001, 1), (1000114, 1000002, 1), (1000115, 1000001, 1), (1000115, 1000002, 1); +INSERT INTO users.dataset_access(user_id, dataset, is_admin) VALUES + (1000113, 1000001, TRUE), (1000114, 1000002, TRUE), (1000115, 1000001, TRUE), (1000115, 1000002, TRUE); -INSERT INTO user_access_log(user_pk, dataset_pk, action, ts) VALUES +INSERT INTO users.user_access_log(user_id, dataset, "action", ts) VALUES (1000101, 1000001, 'access_requested', '2017-01-01'), (1000102, 1000001, 'access_requested', '2017-01-02'), (1000103, 1000001, 'access_requested', '2017-01-03'), @@ -104,13 +118,20 @@ INSERT INTO user_access_log(user_pk, dataset_pk, action, ts) VALUES (1000115, 1000002, 'access_requested', '2017-02-15'), (1000115, 1000002, 'access_granted', '2017-02-16'); -SELECT "Waiting", user.name, user.affiliation as visibility, user.user_pk, - dataset_access_pending.dataset_pk, - dataset_access_pending.dataset_access_pk -FROM dataset_access_pending JOIN user ON (dataset_access_pending.user_pk = user.user_pk) -WHERE dataset_pk > 1000000; +SELECT 'Waiting', users.users.username, users.users.affiliation AS visibility, + users.users.id, users.dataset_access_pending.dataset, + users.dataset_access_pending.id + FROM users.dataset_access_pending + JOIN users.users + ON (users.dataset_access_pending.user_id = users.users.id) + WHERE dataset > 1000000; + +SELECT 'Current', users.users.username, users.users.affiliation AS visibility, + users.users.id, users.dataset_access_current.dataset, + users.dataset_access_current.id + FROM users.dataset_access_current + JOIN users.users + ON (users.dataset_access_current.user_id = users.users.id) + WHERE dataset > 1000000; -SELECT "Current", user.name, user.affiliation as visibility, user.user_pk, dataset_access_current.dataset_pk, - dataset_access_current.dataset_access_pk -FROM dataset_access_current JOIN user ON (dataset_access_current.user_pk = user.user_pk) -WHERE dataset_pk > 1000000; +-- Variant and coverage data fields diff --git a/test/travis_before_install.sh b/test/travis_before_install.sh index 3e5190a9e..07592decd 100755 --- a/test/travis_before_install.sh +++ b/test/travis_before_install.sh @@ -1,11 +1,9 @@ #!/usr/bin/env bash set -x -docker pull mysql:5.7 -docker pull ubuntu:16.04 +PSQL_VERSION="10" +PSQL_PORT="5433" -VOLUME='mysql-data-volume' -MYSQL_PORT=3366 +docker pull postgres:"${PSQL_VERSION}" -scripts/download_and_create_docker_db_volume.sh -docker run -v $VOLUME:/var/lib/mysql --rm --name mysql -d -p $MYSQL_PORT:3306 mysql:5.7 +docker run --rm -d -p $PSQL_PORT:5432 postgres:"${PSQL_VERSION}" diff --git a/test/travis_script.sh b/test/travis_script.sh index 2ec534f29..046f4752f 100755 --- a/test/travis_script.sh +++ b/test/travis_script.sh @@ -3,9 +3,8 @@ set -xe ## SETUP SETTINGS cp settings_sample.json settings.json -sed -i 's/password//' settings.json -sed -i 's/"mysqlSchema" : "swefreq"/"mysqlSchema" : "swefreq_test"/' settings.json -sed -i 's/"mysqlPort" : 3306/"mysqlPort" : 3366/' settings.json +sed -i.tmp 's/"postgresHost" : "postgres host"/"postgresHost" : "127.0.0.1"/' settings.json +sed -i.tmp 's/"postgresPort" : 5432/"postgresPort" : 5433/' settings.json echo "SETTINGS" cat settings.json @@ -14,21 +13,22 @@ echo "/SETTINGS" echo ">>> Test 1. The SQL Patch" LATEST_RELEASE=$(git tag | grep '^v' | sort -V | tail -n 1) -git show ${LATEST_RELEASE}:sql/swefreq.sql > master-schema.sql +git show ${LATEST_RELEASE}:sql/*_schema.sql > master-schema.sql + +psql -U postgres -h 127.0.0.1 -p 5433 -f master-schema.sql +psql -U postgres -h 127.0.0.1 -p 5433 -f sql/patch-master-db.sql -mysql -u swefreq -h 127.0.0.1 -P 3366 swefreq_test < master-schema.sql -mysql -u swefreq -h 127.0.0.1 -P 3366 swefreq_test < sql/patch-master-db.sql # Empty the database -mysql -u swefreq -h 127.0.0.1 -P 3366 swefreq_test <<__END__ -DROP DATABASE swefreq_test; -CREATE DATABASE swefreq_test; +psql -U postgres -h 127.0.0.1 -p 5433 <<__END__ +DROP SCHEMA data; +DROP SCHEMA users; __END__ echo ">>> Test 2. Load the swefreq schema" -mysql -u swefreq -h 127.0.0.1 -P 3366 swefreq_test < sql/swefreq.sql -mysql -u swefreq -h 127.0.0.1 -P 3366 swefreq_test < test/data/load_dummy_data.sql - +psql -U postgres -h 127.0.0.1 -p 5433 -f sql/data_schema.sql +psql -U postgres -h 127.0.0.1 -p 5433 -f sql/user_schema.sql +psql -U postgres -h 127.0.0.1 -p 5433 -f test/data/load_dummy_data.sql echo ">>> Test 3. Check that the backend starts" @@ -44,7 +44,7 @@ sleep 2 # Lets wait a little bit so the server has started exit_handler() { rv=$? # Ignore errors in the exit handler - set +e + set +e # We want to make sure the background process has stopped, otherwise the # travis test will stall for a long time. kill -9 $BACKEND_PID