diff --git a/data/genomics/homo_sapiens/README.md b/data/genomics/homo_sapiens/README.md index 7aabb60e1596..fc0bdcd4c0c4 100644 --- a/data/genomics/homo_sapiens/README.md +++ b/data/genomics/homo_sapiens/README.md @@ -111,7 +111,7 @@ Downloaded the gtf and gff3 files from Ensembl: 5. Change chromosome name to `chr22` 6. Replace spaces with tabs -7. The coordinates in `genome.gtf` were adapted to start from 1 +7. The coordinates in `genome.gtf` were adapted to start from 1, and the last entries that ended in coordinates >40000 were adapted to end at coordinate 40000. ## Index files diff --git a/data/genomics/homo_sapiens/genome/genome.gtf b/data/genomics/homo_sapiens/genome/genome.gtf index ed2ae84a2c76..857300a6bab5 100644 --- a/data/genomics/homo_sapiens/genome/genome.gtf +++ b/data/genomics/homo_sapiens/genome/genome.gtf @@ -34,11 +34,7 @@ chr22 havana gene 3337 10681 . - . gene_id "ENSG00000239435"; gene_version "2"; chr22 havana transcript 3337 10681 . - . gene_id "ENSG00000239435"; gene_version "2"; transcript_id "ENST00000493696"; transcript_version "2"; gene_name "KCNMB3P1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "KCNMB3P1-202"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA"; chr22 havana_tagene gene 20869 28084 . + . gene_id "ENSG00000287285"; gene_version "1"; gene_name "AP000547.4"; gene_source "havana_tagene"; gene_biotype "lncRNA"; chr22 havana_tagene transcript 20869 28084 . + . gene_id "ENSG00000287285"; gene_version "1"; transcript_id "ENST00000656324"; transcript_version "1"; gene_name "AP000547.4"; gene_source "havana_tagene"; gene_biotype "lncRNA"; transcript_name "AP000547.4-201"; transcript_source "havana_tagene"; transcript_biotype "lncRNA"; tag "basic"; -chr22 havana gene 29885 43085 . + . gene_id "ENSG00000283633"; gene_version "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; -chr22 havana transcript 29885 43085 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000592918"; transcript_version "5"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "1"; -chr22 havana transcript 29922 43085 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000592107"; transcript_version "5"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-202"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "1"; -chr22 havana transcript 30024 42307 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000591299"; transcript_version "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-203"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "2"; -chr22 havana gene 29861 126716 . + . gene_id "ENSG00000100181"; gene_version "22"; gene_name "TPTEP1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; -chr22 havana transcript 29861 126716 . + . gene_id "ENSG00000100181"; gene_version "22"; transcript_id "ENST00000558085"; transcript_version "6"; gene_name "TPTEP1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "TPTEP1-205"; transcript_source "havana"; transcript_biotype "processed_transcript"; tag "basic"; transcript_support_level "2"; -chr22 havana transcript 29885 76804 . + . gene_id "ENSG00000100181"; gene_version "22"; transcript_id "ENST00000400593"; transcript_version "6"; gene_name "TPTEP1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "TPTEP1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; transcript_support_level "1"; -chr22 havana transcript 30018 81783 . + . gene_id "ENSG00000100181"; gene_version "22"; transcript_id "ENST00000426585"; transcript_version "5"; gene_name "TPTEP1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "TPTEP1-204"; transcript_source "havana"; transcript_biotype "processed_transcript"; transcript_support_level "1"; +chr22 havana gene 29885 40000 . + . gene_id "ENSG00000283633"; gene_version "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; +chr22 havana transcript 29885 40000 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000592918"; transcript_version "5"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "1"; +chr22 havana transcript 29922 40000 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000592107"; transcript_version "5"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-202"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "1"; +chr22 havana transcript 30024 40000 . + . gene_id "ENSG00000283633"; gene_version "1"; transcript_id "ENST00000591299"; transcript_version "1"; gene_name "AP000547.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "AP000547.3-203"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "2";