-
Notifications
You must be signed in to change notification settings - Fork 37
/
SOFA.obo
3243 lines (2941 loc) · 125 KB
/
SOFA.obo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
format-version: 1.2
subsetdef: Alliance_of_Genome_Resources "Alliance of Genome Resources Gene Biotype Slim"
subsetdef: biosapiens "biosapiens protein feature ontology"
subsetdef: DBVAR "database of genomic structural variation"
subsetdef: SOFA "SO feature annotation"
synonymtypedef: aa1 "amino acid 1 letter code"
synonymtypedef: aa3 "amino acid 3 letter code"
synonymtypedef: AAMOD "amino acid modification"
synonymtypedef: AGR "Alliance of Genome Resources"
synonymtypedef: BS "biosapiens"
synonymtypedef: dbsnp "dbsnp variant terms"
synonymtypedef: dbvar "DBVAR"
synonymtypedef: ebi_variants "ensembl variant terms"
synonymtypedef: RNAMOD "RNA modification" EXACT
synonymtypedef: VAR "variant annotation term"
ontology: so/subsets/SOFA
[Term]
id: SO:0000000
name: Sequence_Ontology
namespace: sequence
subset: SOFA
is_obsolete: true
[Term]
id: SO:0000001
name: region
namespace: sequence
def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke]
subset: SOFA
synonym: "sequence" EXACT []
is_a: SO:0000110 ! sequence_feature
[Term]
id: SO:0000004
name: interior_coding_exon
namespace: sequence
def: "A coding exon that is not the most 3-prime or the most 5-prime in a given transcript." []
subset: SOFA
synonym: "interior coding exon" EXACT []
is_a: SO:0000195 ! coding_exon
[Term]
id: SO:0000005
name: satellite_DNA
namespace: sequence
def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.insdc.org/files/feature_table.html]
subset: SOFA
synonym: "INSDC_feature:repeat_region" BROAD []
synonym: "INSDC_qualifier:satellite" EXACT []
synonym: "satellite DNA" EXACT []
xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki"
is_a: SO:0000705 ! tandem_repeat
[Term]
id: SO:0000006
name: PCR_product
namespace: sequence
def: "A region amplified by a PCR reaction." [SO:ke]
comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406.
subset: SOFA
synonym: "amplicon" RELATED []
synonym: "PCR product" EXACT []
xref: http://en.wikipedia.org/wiki/RAPD "wiki"
is_a: SO:0000695 ! reagent
[Term]
id: SO:0000007
name: read_pair
namespace: sequence
def: "One of a pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls]
subset: SOFA
synonym: "mate pair" EXACT []
synonym: "read-pair" EXACT []
is_a: SO:0000150 ! read
relationship: part_of SO:0000149 ! contig
relationship: part_of SO:0001790 ! paired_end_fragment
[Term]
id: SO:0000013
name: scRNA
namespace: sequence
def: "A small non coding RNA sequence, present in the cytoplasm." [SO:ke]
subset: SOFA
synonym: "INSDC_feature:ncRNA" BROAD []
synonym: "INSDC_qualifier:scRNA" EXACT []
synonym: "small cytoplasmic RNA" EXACT []
is_a: SO:0000655 ! ncRNA
relationship: derives_from SO:0000483 ! nc_primary_transcript
[Term]
id: SO:0000038
name: match_set
namespace: sequence
def: "A collection of match parts." [SO:ke]
subset: SOFA
is_obsolete: true
[Term]
id: SO:0000039
name: match_part
namespace: sequence
def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke]
subset: SOFA
synonym: "match part" EXACT []
is_a: SO:0001410 ! experimental_feature
relationship: part_of SO:0000343 ! match
[Term]
id: SO:0000050
name: gene_part
namespace: sequence
def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke]
subset: SOFA
is_obsolete: true
[Term]
id: SO:0000057
name: operator
namespace: sequence
def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma]
comment: Moved to transcriptional_cis_regulatory_region (SO:0001055) from gene_group_regulatory_region (SO:0000752) on 11 Feb 2021 when SO:0000752 was merged into SO:0001055. See GitHub Issue #529.
subset: SOFA
synonym: "operator segment" EXACT []
xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki"
is_a: SO:0001055 ! transcriptional_cis_regulatory_region
[Term]
id: SO:0000059
name: nuclease_binding_site
namespace: sequence
def: "A binding site that, of a nucleotide molecule, that interacts selectively and non-covalently with polypeptide residues of a nuclease." [SO:cb]
subset: SOFA
synonym: "nuclease binding site" EXACT []
is_a: SO:0001654 ! nucleotide_to_protein_binding_site
[Term]
id: SO:0000101
name: transposable_element
namespace: sequence
def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html]
subset: SOFA
synonym: "transposable element" EXACT []
synonym: "transposon" EXACT []
xref: http://en.wikipedia.org/wiki/Transposable_element "wiki"
is_a: SO:0001039 ! integrated_mobile_genetic_element
[Term]
id: SO:0000102
name: expressed_sequence_match
namespace: sequence
def: "A match to an EST or cDNA sequence." [SO:ke]
subset: SOFA
synonym: "expressed sequence match" EXACT []
is_a: SO:0000347 ! nucleotide_match
[Term]
id: SO:0000103
name: clone_insert_end
namespace: sequence
def: "The end of the clone insert." [SO:ke]
subset: SOFA
synonym: "clone insert end" EXACT []
is_a: SO:0000699 ! junction
relationship: part_of SO:0000753 ! clone_insert
[Term]
id: SO:0000104
name: polypeptide
namespace: sequence
alt_id: SO:0000358
def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma]
comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute.
subset: SOFA
synonym: "protein" EXACT []
xref: http://en.wikipedia.org/wiki/Polypeptide "wiki"
is_a: SO:0001411 ! biological_region
relationship: derives_from SO:0000316 ! CDS
[Term]
id: SO:0000109
name: sequence_variant_obs
namespace: sequence
def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke]
subset: SOFA
synonym: "mutation" RELATED []
is_obsolete: true
[Term]
id: SO:0000110
name: sequence_feature
namespace: sequence
def: "Any extent of continuous biological sequence." [LAMHDI:mb, SO:ke]
subset: SOFA
synonym: "INSDC_feature:misc_feature" EXACT []
synonym: "INSDC_note:other" EXACT []
synonym: "INSDC_note:sequence_feature" EXACT []
synonym: "located sequence feature" RELATED []
synonym: "located_sequence_feature" EXACT []
synonym: "sequence feature" EXACT []
[Term]
id: SO:0000112
name: primer
namespace: sequence
def: "An oligo to which new deoxyribonucleotides can be added by DNA polymerase." [SO:ke]
subset: SOFA
synonym: "DNA primer" EXACT []
synonym: "primer oligonucleotide" EXACT []
synonym: "primer polynucleotide" EXACT []
synonym: "primer sequence" EXACT []
xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki"
is_a: SO:0000441 ! ss_oligo
[Term]
id: SO:0000113
name: proviral_region
namespace: sequence
def: "A viral sequence which has integrated into a host genome." [SO:ke]
subset: SOFA
synonym: "proviral region" EXACT []
synonym: "proviral sequence" RELATED []
is_a: SO:0001039 ! integrated_mobile_genetic_element
[Term]
id: SO:0000114
name: methylated_cytosine
namespace: sequence
def: "A methylated deoxy-cytosine." [SO:ke]
subset: SOFA
synonym: "methylated C" EXACT []
synonym: "methylated cytosine" EXACT []
synonym: "methylated cytosine base" EXACT []
synonym: "methylated cytosine residue" EXACT []
synonym: "methylated_C" EXACT []
is_a: SO:0000306 ! methylated_DNA_base_feature
[Term]
id: SO:0000120
name: protein_coding_primary_transcript
namespace: sequence
def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke]
comment: May contain introns.
subset: SOFA
synonym: "pre mRNA" RELATED []
synonym: "protein coding primary transcript" EXACT []
is_a: SO:0000185 ! primary_transcript
[Term]
id: SO:0000139
name: ribosome_entry_site
namespace: sequence
def: "Region in mRNA where ribosome assembles." [SO:ke]
subset: SOFA
synonym: "INSDC_feature:regulatory" BROAD []
synonym: "INSDC_qualifier:ribosome_binding_site" EXACT []
synonym: "ribosome entry site" EXACT []
is_a: SO:0000836 ! mRNA_region
relationship: part_of SO:0000204 ! five_prime_UTR
[Term]
id: SO:0000140
name: attenuator
namespace: sequence
def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as]
subset: SOFA
synonym: "attenuator sequence" EXACT []
synonym: "INSDC_feature:regulatory" BROAD []
synonym: "INSDC_qualifier:attenuator" EXACT []
xref: http://en.wikipedia.org/wiki/Attenuator "wiki"
is_a: SO:0005836 ! regulatory_region
relationship: part_of SO:0000234 ! mRNA
[Term]
id: SO:0000141
name: terminator
namespace: sequence
def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.insdc.org/files/feature_table.html]
comment: Moved from transcription_regulatory_region (SO:0001679) to transcriptional_cis_regulatory_region (SO:0001055) by Dave Sant on Feb 11, 2021 when transcription_regulatory_region was merged into transcriptional_cis_regulatory_region to be consistent with GO and reduce redundancy as part of the GREEKC consortium. See GitHub Issue #527.
subset: SOFA
synonym: "INSDC_feature:regulatory" BROAD []
synonym: "INSDC_qualifier:terminator" EXACT []
synonym: "terminator sequence" EXACT []
xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki"
is_a: SO:0001055 ! transcriptional_cis_regulatory_region
relationship: part_of SO:0000673 ! transcript
[Term]
id: SO:0000143
name: assembly_component
namespace: sequence
def: "A region of known length which may be used to manufacture a longer region." [SO:ke]
subset: SOFA
synonym: "assembly component" EXACT []
is_a: SO:0001410 ! experimental_feature
[Term]
id: SO:0000147
name: exon
namespace: sequence
def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke]
comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.
subset: SOFA
synonym: "INSDC_feature:exon" EXACT []
xref: http://en.wikipedia.org/wiki/Exon "wiki"
is_a: SO:0000833 ! transcript_region
[Term]
id: SO:0000148
name: supercontig
namespace: sequence
def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls]
subset: SOFA
synonym: "scaffold" RELATED []
is_a: SO:0000353 ! sequence_assembly
relationship: part_of SO:0000719 ! ultracontig
[Term]
id: SO:0000149
name: contig
namespace: sequence
def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls]
subset: SOFA
xref: http://en.wikipedia.org/wiki/Contig "wiki"
is_a: SO:0000143 ! assembly_component
is_a: SO:0000353 ! sequence_assembly
relationship: part_of SO:0000148 ! supercontig
[Term]
id: SO:0000150
name: read
namespace: sequence
def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd]
subset: SOFA
is_a: SO:0000143 ! assembly_component
relationship: part_of SO:0000149 ! contig
[Term]
id: SO:0000151
name: clone
namespace: sequence
def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke]
subset: SOFA
xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki"
is_a: SO:0000695 ! reagent
[Term]
id: SO:0000159
name: deletion
namespace: sequence
alt_id: SO:1000033
def: "The point at which one or more contiguous nucleotides were excised." [SO:ke]
subset: SOFA
synonym: "deleted_sequence" EXACT []
synonym: "nucleotide deletion" EXACT []
synonym: "nucleotide_deletion" EXACT []
xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki"
xref: loinc:LA6692-3 "Deletion"
is_a: SO:0001059 ! sequence_alteration
is_a: SO:0001411 ! biological_region
[Term]
id: SO:0000161
name: methylated_adenine
namespace: sequence
def: "A modified base in which adenine has been methylated." [SO:ke]
subset: SOFA
synonym: "methylated A" EXACT []
synonym: "methylated adenine" EXACT []
synonym: "methylated adenine base" EXACT []
synonym: "methylated adenine residue" EXACT []
synonym: "methylated_A" EXACT []
is_a: SO:0000306 ! methylated_DNA_base_feature
[Term]
id: SO:0000162
name: splice_site
namespace: sequence
def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke]
comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery.
subset: SOFA
synonym: "splice site" EXACT []
xref: http://en.wikipedia.org/wiki/Splice_site "wiki"
is_a: SO:0000835 ! primary_transcript_region
[Term]
id: SO:0000163
name: five_prime_cis_splice_site
namespace: sequence
def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke]
subset: SOFA
synonym: "5' splice site" EXACT []
synonym: "donor" RELATED []
synonym: "donor splice site" EXACT []
synonym: "five prime splice site" EXACT []
synonym: "splice donor site" EXACT []
is_a: SO:0001419 ! cis_splice_site
[Term]
id: SO:0000164
name: three_prime_cis_splice_site
namespace: sequence
def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke]
subset: SOFA
synonym: "3' splice site" RELATED []
synonym: "acceptor" RELATED []
synonym: "acceptor splice site" EXACT []
synonym: "splice acceptor site" EXACT []
synonym: "three prime splice site" EXACT []
is_a: SO:0001419 ! cis_splice_site
[Term]
id: SO:0000165
name: enhancer
namespace: sequence
def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.insdc.org/files/feature_table.html]
comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO.
subset: SOFA
synonym: "INSDC_feature:regulatory" BROAD []
synonym: "INSDC_qualifier:enhancer" EXACT []
xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki"
is_a: SO:0000727 ! cis_regulatory_module
[Term]
id: SO:0000167
name: promoter
namespace: sequence
def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the core transcription machinery. A region (DNA) to which RNA polymerase binds, to begin transcription." [SO:regcreative]
comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. Moved from is_a: SO:0001055 transcriptional_cis_regulatory_region as per request from GREEKC initiative in August 2020. Merged with RNA_polymerase_promoter (SO:0001203) Aug 2020. Moved up one level from is_a CRM (SO:0000727) to is_a transcriptional_cis_regulatory_region (SO:0001055) as part of the GREEKC work January 2021. Pascale Gaudet from Gene Ontology pointed out that CRM can be located upstream of the promoter and therefore cannot include the promoter.
subset: SOFA
synonym: "INSDC_feature:regulatory" BROAD []
synonym: "INSDC_qualifier:promoter" EXACT []
synonym: "promoter sequence" EXACT []
xref: http://en.wikipedia.org/wiki/Promoter "wiki"
is_a: SO:0000842 ! gene_component_region
is_a: SO:0001055 ! transcriptional_cis_regulatory_region
[Term]
id: SO:0000177
name: cross_genome_match
namespace: sequence
def: "A nucleotide match against a sequence from another organism." [SO:ma]
subset: SOFA
synonym: "cross genome match" EXACT []
is_a: SO:0000347 ! nucleotide_match
[Term]
id: SO:0000178
name: operon
namespace: sequence
def: "The DNA region of a group of adjacent genes whose transcription is coordinated on one or several mutually overlapping transcription units transcribed in the same direction and sharing at least one gene." [SO:ma]
comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Definition updated with per Mejia-Almonte et.al Redefining fundamental concepts of transcription initiation in prokaryotes Aug 5 2020.
subset: SOFA
synonym: "INSDC_feature:operon" EXACT []
xref: http://en.wikipedia.org/wiki/Operon "wiki"
is_a: SO:0005855 ! gene_group
[Term]
id: SO:0000179
name: clone_insert_start
namespace: sequence
def: "The start of the clone insert." [SO:ke]
subset: SOFA
synonym: "clone insert start" EXACT []
is_a: SO:0000699 ! junction
relationship: part_of SO:0000753 ! clone_insert
[Term]
id: SO:0000181
name: translated_nucleotide_match
namespace: sequence
def: "A match against a translated sequence." [SO:ke]
subset: SOFA
synonym: "translated nucleotide match" EXACT []
is_a: SO:0000347 ! nucleotide_match
[Term]
id: SO:0000183
name: non_transcribed_region
namespace: sequence
def: "A region of the gene which is not transcribed." [SO:ke]
subset: SOFA
synonym: "non transcribed region" EXACT []
synonym: "non-transcribed sequence" EXACT []
synonym: "nontranscribed region" EXACT []
synonym: "nontranscribed sequence" EXACT []
is_a: SO:0000842 ! gene_component_region
[Term]
id: SO:0000185
name: primary_transcript
namespace: sequence
def: "A transcript that in its initial state requires modification to be functional." [SO:ma]
subset: SOFA
synonym: "INSDC_feature:precursor_RNA" EXACT []
synonym: "INSDC_feature:prim_transcript" EXACT []
synonym: "precursor RNA" EXACT []
synonym: "primary transcript" EXACT []
xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki"
is_a: SO:0000673 ! transcript
[Term]
id: SO:0000187
name: repeat_family
namespace: sequence
def: "A group of characterized repeat sequences." [SO:ke]
subset: SOFA
is_obsolete: true
[Term]
id: SO:0000188
name: intron
namespace: sequence
def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.insdc.org/files/feature_table.html]
comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.
subset: SOFA
synonym: "INSDC_feature:intron" EXACT []
xref: http://en.wikipedia.org/wiki/Intron "wiki"
is_a: SO:0000835 ! primary_transcript_region
[Term]
id: SO:0000193
name: RFLP_fragment
namespace: sequence
def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj]
subset: SOFA
synonym: "restriction fragment length polymorphism" EXACT []
synonym: "RFLP" EXACT []
synonym: "RFLP fragment" EXACT []
xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki"
is_a: SO:0000412 ! restriction_fragment
[Term]
id: SO:0000195
name: coding_exon
namespace: sequence
def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke]
subset: SOFA
synonym: "coding exon" EXACT []
is_a: SO:0000147 ! exon
[Term]
id: SO:0000196
name: five_prime_coding_exon_coding_region
namespace: sequence
def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm]
subset: SOFA
synonym: "five prime exon coding region" EXACT []
is_a: SO:0001215 ! coding_region_of_exon
relationship: part_of SO:0000200 ! five_prime_coding_exon
[Term]
id: SO:0000197
name: three_prime_coding_exon_coding_region
namespace: sequence
def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm]
subset: SOFA
synonym: "three prime exon coding region" EXACT []
is_a: SO:0001215 ! coding_region_of_exon
relationship: part_of SO:0000195 ! coding_exon
[Term]
id: SO:0000198
name: noncoding_exon
namespace: sequence
def: "An exon that does not contain any codons." [SO:ke]
subset: SOFA
synonym: "noncoding exon" EXACT []
is_a: SO:0000147 ! exon
[Term]
id: SO:0000200
name: five_prime_coding_exon
namespace: sequence
def: "The 5' most coding exon." [SO:ke]
subset: SOFA
synonym: "5' coding exon" EXACT []
synonym: "five prime coding exon" EXACT []
is_a: SO:0000195 ! coding_exon
[Term]
id: SO:0000203
name: UTR
namespace: sequence
def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke]
subset: SOFA
synonym: "untranslated region" EXACT []
is_a: SO:0000836 ! mRNA_region
[Term]
id: SO:0000204
name: five_prime_UTR
namespace: sequence
def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.insdc.org/files/feature_table.html]
subset: SOFA
synonym: "5' UTR" EXACT []
synonym: "five prime UTR" EXACT []
synonym: "five_prime_untranslated_region" EXACT []
synonym: "INSDC_feature:5'UTR" EXACT []
xref: http://en.wikipedia.org/wiki/5'_UTR "wiki"
is_a: SO:0000203 ! UTR
[Term]
id: SO:0000205
name: three_prime_UTR
namespace: sequence
def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.insdc.org/files/feature_table.html]
subset: SOFA
synonym: "INSDC_feature:3'UTR" EXACT []
synonym: "three prime untranslated region" EXACT []
synonym: "three prime UTR" EXACT []
xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki"
is_a: SO:0000203 ! UTR
[Term]
id: SO:0000209
name: rRNA_primary_transcript
namespace: sequence
def: "A primary transcript encoding a ribosomal RNA." [SO:ke]
subset: SOFA
synonym: "ribosomal RNA primary transcript" EXACT []
synonym: "rRNA primary transcript" EXACT []
is_a: SO:0000483 ! nc_primary_transcript
[Term]
id: SO:0000233
name: mature_transcript
namespace: sequence
def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke]
comment: A processed transcript cannot contain introns.
subset: SOFA
synonym: "mature transcript" EXACT []
xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki"
is_a: SO:0000673 ! transcript
relationship: derives_from SO:0000185 ! primary_transcript
[Term]
id: SO:0000234
name: mRNA
namespace: sequence
def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma]
comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology.
subset: SOFA
synonym: "INSDC_feature:mRNA" EXACT []
synonym: "messenger RNA" EXACT []
synonym: "protein_coding_transcript" EXACT []
xref: http://en.wikipedia.org/wiki/MRNA "wiki"
xref: http://www.gencodegenes.org/gencode_biotypes.html "GENCODE"
is_a: SO:0000233 ! mature_transcript
[Term]
id: SO:0000235
name: TF_binding_site
namespace: sequence
def: "A DNA site where a transcription factor binds." [SO:ke]
comment: Definition updated along with definitions in Mejia-Almonte et.al PMID:32665585. Added relationship part_of SO:0000727 CRM in place of previous CRM relationship has_part TF_binding_site August 2020 in response to requests from GREEKC initiative. Moved from transcription_regulatory_region (SO:0001679) to transcriptional_cis_regulatory_region (SO:0001055) by Dave Sant on Feb 11, 2021 when transcription_regulatory_region was merged into transcriptional_cis_regulatory_region to be consistent with GO and reduce redundancy as part of the GREEKC consortium. See GitHub Issue #527.
subset: SOFA
synonym: "TF binding site" EXACT []
synonym: "transcription factor binding site" EXACT []
is_a: SO:0001055 ! transcriptional_cis_regulatory_region
is_a: SO:0001654 ! nucleotide_to_protein_binding_site
relationship: part_of SO:0000727 ! cis_regulatory_module
[Term]
id: SO:0000236
name: ORF
namespace: sequence
def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma]
comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology.
subset: SOFA
synonym: "open reading frame" EXACT []
is_a: SO:0000717 ! reading_frame
[Term]
id: SO:0000239
name: flanking_region
namespace: sequence
def: "The sequences extending on either side of a specific region." [SO:ke]
subset: SOFA
synonym: "flanking region" EXACT []
is_a: SO:0001412 ! topologically_defined_region
[Term]
id: SO:0000252
name: rRNA
namespace: sequence
def: "rRNA is an RNA component of a ribosome that can provide both structural scaffolding and catalytic activity." [http://www.insdc.org/files/feature_table.html, ISBN:0198506732]
comment: Definition updated 10 June 2021 as part of restructuring rRNA terms and reforming definitions to have similar structures. Request from EBI. See GitHub Issue #493
subset: SOFA
synonym: "INSDC_feature:rRNA" EXACT []
synonym: "INSDC_qualifier:unknown" BROAD []
synonym: "ribosomal ribonucleic acid" EXACT []
synonym: "ribosomal RNA" EXACT []
xref: http://en.wikipedia.org/wiki/RRNA "wiki"
is_a: SO:0000655 ! ncRNA
relationship: derives_from SO:0000209 ! rRNA_primary_transcript
[Term]
id: SO:0000253
name: tRNA
namespace: sequence
def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732]
comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.
subset: SOFA
synonym: "INSDC_feature:tRNA" EXACT []
synonym: "INSDC_qualifier:unknown" BROAD []
synonym: "transfer ribonucleic acid" RELATED []
synonym: "transfer RNA" RELATED []
xref: http://en.wikipedia.org/wiki/TRNA "wiki"
is_a: SO:0000655 ! ncRNA
relationship: derives_from SO:0000483 ! nc_primary_transcript
[Term]
id: SO:0000274
name: snRNA
namespace: sequence
def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.insdc.org/files/feature_table.html, PMID:11733745, WB:ems]
comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.
subset: SOFA
synonym: "INSDC_feature:ncRNA" BROAD []
synonym: "INSDC_qualifier:snRNA" EXACT []
synonym: "small nuclear RNA" EXACT []
xref: http://en.wikipedia.org/wiki/SnRNA "wiki"
is_a: SO:0000655 ! ncRNA
relationship: derives_from SO:0000483 ! nc_primary_transcript
[Term]
id: SO:0000275
name: snoRNA
namespace: sequence
def: "Small nucleolar RNAs (snoRNAs) are short non-coding RNAs enriched in the nucleolus as components of small nucleolar ribonucleoproteins. They guide ribose methylation and pseudouridylation of rRNAs and snRNAs, and a subgroup regulate excision of rRNAs from rRNA precursor transcripts. snoRNAs may also guide rRNA acetylation and tRNA methylation, and regulate mRNA abundance and alternative splicing." [GOC:kgc, PMID:31828325]
comment: Updated the definition of snoRNA (SO:0000275) from "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." to "Small nucleolar RNAs (snoRNAs) are short non-coding RNAs enriched in the nucleolus as components of small nucleolar ribonucleoproteins. They guide ribose methylation and pseudouridylation of rRNAs and snRNAs, and a subgroup regulate excision of rRNAs from rRNA precursor transcripts. snoRNAs may also guide rRNA acetylation and tRNA methylation, and regulate mRNA abundance and alternative splicing." to acknowledge that some snoRNAs functionally localize to other compartments (cytoplasm or even secreted). See GitHub Issue #578.
subset: SOFA
synonym: "INSDC_feature:ncRNA" BROAD []
synonym: "INSDC_qualifier:snoRNA" EXACT []
synonym: "small nucleolar RNA" EXACT []
is_a: SO:0000655 ! ncRNA
relationship: derives_from SO:0000483 ! nc_primary_transcript
[Term]
id: SO:0000276
name: miRNA
namespace: sequence
alt_id: SO:0000649
def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene (or the product of other non coding RNA genes. Micro RNAs are produced from precursor molecules (SO:0001244) that can form local hairpin structures, which ordinarily are processed (usually via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:11081512, PMID:12592000]
subset: SOFA
synonym: "INSDC_feature:ncRNA" BROAD []
synonym: "INSDC_qualifier:miRNA" EXACT []
synonym: "micro RNA" EXACT []
synonym: "microRNA" EXACT []
synonym: "small temporal RNA" EXACT []
synonym: "stRNA" EXACT []
xref: http://en.wikipedia.org/wiki/MiRNA "wiki"
xref: http://en.wikipedia.org/wiki/StRNA "wiki"
is_a: SO:0000370 ! small_regulatory_ncRNA
relationship: derives_from SO:0000835 ! primary_transcript_region
[Term]
id: SO:0000289
name: microsatellite
namespace: sequence
def: "A repeat_region containing repeat_units of 2 to 10 bp repeated in tandem." [http://www.informatics.jax.org/silver/glossary.shtml, NCBI:th]
subset: SOFA
synonym: "INSDC_feature:repeat_region" BROAD []
synonym: "INSDC_qualifier:microsatellite" EXACT []
synonym: "microsatellite locus" EXACT []
synonym: "microsatellite marker" EXACT []
synonym: "short tandem repeat" EXACT []
synonym: "STR" EXACT [http://www.ncbi.nlm.nih.gov/books/NBK21126/def-item/A9651/]
xref: http://en.wikipedia.org/wiki/Microsatellite "wiki"
is_a: SO:0000005 ! satellite_DNA
[Term]
id: SO:0000294
name: inverted_repeat
namespace: sequence
def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke]
subset: SOFA
synonym: "INSDC_feature:repeat_region" BROAD []
synonym: "INSDC_qualifier:inverted" EXACT []
synonym: "inverted repeat" EXACT []
synonym: "inverted repeat sequence" EXACT []
xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki"
is_a: SO:0000657 ! repeat_region
[Term]
id: SO:0000296
name: origin_of_replication
namespace: sequence
def: "A region of nucleic acid from which replication initiates; includes sequences that are recognized by replication proteins, the site from which the first separation of complementary strands occurs, and specific replication start sites." [http://www.insdc.org/files/feature_table.html, NCBI:cf]
subset: SOFA
synonym: "INSDC_feature:rep_origin" EXACT []
synonym: "ori" EXACT []
synonym: "origin of replication" EXACT []
xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki"
is_a: SO:0001411 ! biological_region
relationship: part_of SO:0001235 ! replicon
[Term]
id: SO:0000303
name: clip
namespace: sequence
def: "Part of the primary transcript that is clipped off during processing." [SO:ke]
subset: SOFA
is_a: SO:0000835 ! primary_transcript_region
[Term]
id: SO:0000305
name: modified_DNA_base
namespace: sequence
def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.insdc.org/files/feature_table.html]
comment: Modified base:<modified_base>.
subset: SOFA
synonym: "INSDC_feature:modified_base" EXACT []
synonym: "modified base site" EXACT []
is_a: SO:0001236 ! base
is_a: SO:0001720 ! epigenetically_modified_region
[Term]
id: SO:0000306
name: methylated_DNA_base_feature
namespace: sequence
def: "A nucleotide modified by methylation." [SO:ke]
subset: SOFA
synonym: "methylated base feature" EXACT []
is_a: SO:0000305 ! modified_DNA_base
[Term]
id: SO:0000307
name: CpG_island
namespace: sequence
def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd]
subset: SOFA
synonym: "CG island" EXACT []
synonym: "CpG island" EXACT []
xref: http://en.wikipedia.org/wiki/CpG_island "wiki"
is_a: SO:0001411 ! biological_region
[Term]
id: SO:0000314
name: direct_repeat
namespace: sequence
def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-followed by-GCTGA." [SO:ke]
subset: SOFA
synonym: "direct repeat" EXACT []
synonym: "INSDC_feature:repeat_region" BROAD []
synonym: "INSDC_qualifier:direct" EXACT []
xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki"
is_a: SO:0000657 ! repeat_region
[Term]
id: SO:0000315
name: TSS
namespace: sequence
def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke]
comment: Added relationship is_a SO:0002309 core_promoter_element with the creation of core_promoter_element as part of GREEKC initiative August 2020 - Dave Sant.
subset: SOFA
synonym: "INSDC_feature:misc_feature" BROAD []
synonym: "INSDC_note:transcription_start_site" EXACT []
synonym: "transcription start site" EXACT []
synonym: "transcription_start_site" EXACT []
is_a: SO:0000714 ! nucleotide_motif
is_a: SO:0000835 ! primary_transcript_region
relationship: overlaps SO:0000235 ! TF_binding_site
relationship: part_of SO:0000167 ! promoter
[Term]
id: SO:0000316
name: CDS
namespace: sequence
def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma]
subset: SOFA
synonym: "coding sequence" EXACT []
synonym: "coding_sequence" EXACT []
synonym: "INSDC_feature:CDS" EXACT []
is_a: SO:0000836 ! mRNA_region
[Term]
id: SO:0000318
name: start_codon
namespace: sequence
def: "First codon to be translated by a ribosome." [SO:ke]
subset: SOFA
synonym: "initiation codon" EXACT []
synonym: "start codon" EXACT []
xref: http://en.wikipedia.org/wiki/Start_codon "wiki"
is_a: SO:0000360 ! codon
[Term]
id: SO:0000319
name: stop_codon
namespace: sequence
def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke]
subset: SOFA
synonym: "stop codon" EXACT []
xref: http://en.wikipedia.org/wiki/Stop_codon "wiki"
is_a: SO:0000360 ! codon
[Term]
id: SO:0000324
name: tag
namespace: sequence
def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke]
subset: SOFA
is_a: SO:0000696 ! oligo
[Term]
id: SO:0000325
name: rRNA_large_subunit_primary_transcript
namespace: sequence
def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke]
subset: SOFA
synonym: "35S rRNA primary transcript" EXACT []
synonym: "rRNA large subunit primary transcript" EXACT []
is_a: SO:0000209 ! rRNA_primary_transcript
[Term]
id: SO:0000326
name: SAGE_tag
namespace: sequence
def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract]
subset: SOFA
synonym: "SAGE tag" EXACT []
is_a: SO:0000324 ! tag
[Term]
id: SO:0000330
name: conserved_region
namespace: sequence
def: "Region of sequence similarity by descent from a common ancestor." [SO:ke]
subset: SOFA
synonym: "conserved region" EXACT []
synonym: "INSDC_feature:misc_feature" BROAD []
synonym: "INSDC_note:conserved_region" EXACT []
xref: http://en.wikipedia.org/wiki/Conserved_region "wiki"
is_a: SO:0001410 ! experimental_feature
[Term]
id: SO:0000331
name: STS
namespace: sequence
def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com]
subset: SOFA
synonym: "INSDC_feature:STS" EXACT []
synonym: "sequence tag site" EXACT []
is_a: SO:0000324 ! tag
[Term]
id: SO:0000332
name: coding_conserved_region
namespace: sequence
def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke]
subset: SOFA
synonym: "coding conserved region" EXACT []
is_a: SO:0000330 ! conserved_region
[Term]
id: SO:0000333
name: exon_junction
namespace: sequence
def: "The boundary between two exons in a processed transcript." [SO:ke]
subset: SOFA
synonym: "exon junction" EXACT []
is_a: SO:0000699 ! junction
relationship: part_of SO:0000233 ! mature_transcript
[Term]
id: SO:0000334
name: nc_conserved_region
namespace: sequence
def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke]
subset: SOFA
synonym: "conserved non-coding element" EXACT []
synonym: "conserved non-coding sequence" EXACT []
synonym: "nc conserved region" EXACT []
synonym: "noncoding conserved region" EXACT []
is_a: SO:0000330 ! conserved_region
[Term]
id: SO:0000336
name: pseudogene
namespace: sequence
def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html]
subset: Alliance_of_Genome_Resources
subset: SOFA
synonym: "INSDC_feature:gene" BROAD []
synonym: "INSDC_qualifier:pseudo" EXACT []
synonym: "INSDC_qualifier:unknown" EXACT []
xref: http://en.wikipedia.org/wiki/Pseudogene "wiki"
is_a: SO:0001411 ! biological_region
relationship: non_functional_homolog_of SO:0000704 ! gene
[Term]
id: SO:0000337
name: RNAi_reagent
namespace: sequence
def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd]
subset: SOFA
synonym: "RNAi reagent" EXACT []
is_a: SO:0000442 ! ds_oligo