diff --git a/api/__init__.py b/api/__init__.py index 68cf28ca..266f6e51 100644 --- a/api/__init__.py +++ b/api/__init__.py @@ -78,6 +78,7 @@ def create_app(): tomato_seq_db.init_app(bar_app) single_cell_db.init_app(bar_app) summarization_db.init_app(bar_app) + rice_interactions_db.init_app(bar_app) # Initialize the cache cache.init_app(bar_app) @@ -104,6 +105,8 @@ def create_app(): from api.resources.snps import snps from api.resources.sequence import sequence from api.resources.gene_annotation import gene_annotation + from api.resources.interactions import itrns + from api.resources.gene_localizations import loc bar_api.add_namespace(gene_information) bar_api.add_namespace(rnaseq_gene_expression) @@ -114,6 +117,8 @@ def create_app(): bar_api.add_namespace(snps) bar_api.add_namespace(sequence) bar_api.add_namespace(gene_annotation) + bar_api.add_namespace(itrns) + bar_api.add_namespace(loc) bar_api.init_app(bar_app) return bar_app @@ -130,6 +135,7 @@ def create_app(): tomato_seq_db = SQLAlchemy(metadata=MetaData()) single_cell_db = SQLAlchemy(metadata=MetaData()) summarization_db = SQLAlchemy(metadata=MetaData()) +rice_interactions_db = SQLAlchemy(metadata=MetaData()) # Initialize Redis cache = Cache( diff --git a/api/models/rice_interactions.py b/api/models/rice_interactions.py new file mode 100644 index 00000000..73c71c36 --- /dev/null +++ b/api/models/rice_interactions.py @@ -0,0 +1,52 @@ +from api import rice_interactions_db as db + + +class Interactions(db.Model): + __bind_key__ = "rice_interactions" + __tablename__ = "interactions" + Protein1 = db.Column(db.String(14), primary_key=True) + Protein2 = db.Column(db.String(14), primary_key=True) + S_cerevisiae = db.Column(db.SmallInteger(), primary_key=False) + S_pombe = db.Column(db.SmallInteger(), primary_key=False) + Worm = db.Column(db.SmallInteger(), primary_key=False) + Fly = db.Column(db.SmallInteger(), primary_key=False) + Human = db.Column(db.SmallInteger(), primary_key=False) + Mouse = db.Column(db.SmallInteger(), primary_key=False) + Total_hits = db.Column(db.SmallInteger(), primary_key=False) + Num_species = db.Column(db.SmallInteger(), primary_key=False) + Quality = db.Column(db.SmallInteger(), primary_key=False) + Index = db.Column(db.SmallInteger(), primary_key=False) + Pcc = db.Column(db.Float, primary_key=False) + Bind_id = db.Column(db.SmallInteger(), primary_key=False) + + +class Rice_mPLoc(db.Model): + __bind_key__ = "rice_interactions" + __tablename__ = "Rice_mPLoc" + gene_id = db.Column(db.String(20), primary_key=True) + alias = db.Column(db.String(), primary_key=False) + lab_description = db.Column(db.String(), primary_key=False) + gfp = db.Column(db.String(), primary_key=False) + mass_spec = db.Column(db.String(), primary_key=False) + swissprot = db.Column(db.String(), primary_key=False) + amigo = db.Column(db.String(), primary_key=False) + annotation = db.Column(db.String(), primary_key=False) + pred_ipsort = db.Column(db.String(), primary_key=False) + pred_mitopred = db.Column(db.String(), primary_key=False) + pred_mitopred2 = db.Column(db.String(), primary_key=False) + pred_predator = db.Column(db.String(), primary_key=False) + pred_peroxp = db.Column(db.String(), primary_key=False) + pred_subloc = db.Column(db.String(), primary_key=False) + pred_targetp = db.Column(db.String(), primary_key=False) + pred_wolfpsort = db.Column(db.String(), primary_key=False) + pred_multiloc = db.Column(db.String(), primary_key=False) + pred_loctree = db.Column(db.String(), primary_key=False) + pred_mPLoc = db.Column(db.String(), primary_key=False) + + +class RGI_annotation(db.Model): + __bind_key__ = "rice_interactions" + __tablename__ = "RGI_annotation" + loc = db.Column(db.String(14), primary_key=True) + annotation = db.Column(db.String(), primary_key=True) + date = db.Column(db.Date(), primary_key=True) diff --git a/api/resources/gene_localizations.py b/api/resources/gene_localizations.py new file mode 100644 index 00000000..28464764 --- /dev/null +++ b/api/resources/gene_localizations.py @@ -0,0 +1,118 @@ +""" +Date: Nov 2021 +Author: Vincent Lau +Localizations (for various species and their respective genes) endpoint +""" + +from flask_restx import Namespace, Resource, fields +from flask import request +from api.models.rice_interactions import Rice_mPLoc as rice_loc_db +from markupsafe import escape +from sqlalchemy.exc import OperationalError +from api.utils.bar_utils import BARUtils +from marshmallow import Schema, ValidationError, fields as marshmallow_fields + +loc = Namespace("Localizations", description="Sub-cellular gene localzation endpoint", path="/loc") + + +# Validation is done in a different way to keep things simple +class GeneLocationsSchema(Schema): + species = marshmallow_fields.String(required=True) + genes = marshmallow_fields.List(cls_or_instance=marshmallow_fields.String) + + +@loc.route("//") +class Localizations(Resource): + @loc.param("species", _in="path", default="rice") + @loc.param("query_gene", _in="path", default="LOC_Os01g52560.1") + def get(self, species="", query_gene=""): + """ + Returns the protein-protein interactions for a particular query gene + Supported species: 'rice' + """ + + species = escape(species.lower()) + query_gene = escape(query_gene) + if species == "rice" and BARUtils.is_rice_gene_valid(query_gene, True): + try: + rows = rice_loc_db.query.filter_by(gene_id=query_gene).all() + if len(rows) == 0: + return ( + BARUtils.error_exit( + "There are no data found for the given gene" + ), + 400, + ) + else: + print(rows) + return { + "status": "success", + "result": { + "gene": rows[0].gene_id, + "predicted_location": rows[0].pred_mPLoc, + } + } + except OperationalError: + return BARUtils.error_exit("An internal error has occurred"), 500 + else: + return BARUtils.error_exit("Invalid species or gene ID"), 400 + + +loc_post_ex = loc.model( + "GeneIsoforms", + { + "species": fields.String(required=True, example="rice"), + "genes": fields.List( + required=True, + example=["LOC_Os01g01080.1", "LOC_Os01g52560.1"], + cls_or_instance=fields.String, + ), + }, +) + + +@loc.route("/") +class LocalizationsPost(Resource): + @loc.expect(loc_post_ex) + def post(self): + """ + Returns the protein-protein interactions for a particular multiple genes + Supported species: 'rice' + """ + + json_data = request.get_json() + data = {} + + # Validate json + try: + json_data = GeneLocationsSchema().load(json_data) + except ValidationError as err: + return BARUtils.error_exit(err.messages), 400 + + genes = json_data["genes"] + species = json_data["species"].lower() + + if species == "rice": + for gene in genes: + if not BARUtils.is_rice_gene_valid(gene, True): + return BARUtils.error_exit("Invalid gene id"), 400 + + try: + rows = rice_loc_db.query.filter(rice_loc_db.gene_id.in_(genes)).all() + except OperationalError: + return BARUtils.error_exit("An internal error has occurred."), 500 + else: + return BARUtils.error_exit("Invalid species"), 400 + + if len(rows) > 0: + for row in rows: + if row.gene_id in data: + data[row.gene_id].append(row.pred_mPLoc) + else: + data[row.gene_id] = [] + data[row.gene_id].append(row.pred_mPLoc) + + return BARUtils.success_exit(data) + + else: + return BARUtils.error_exit("No data for the given species/genes"), 400 diff --git a/api/resources/interactions.py b/api/resources/interactions.py new file mode 100644 index 00000000..622fcd2c --- /dev/null +++ b/api/resources/interactions.py @@ -0,0 +1,60 @@ +""" +Date: Nov 2021 +Author: Vincent Lau +Interactions (Protein-Protein, Protein-DNA, etc.) endpoint +""" + +from flask_restx import Namespace, Resource +from api.models.rice_interactions import Interactions as rice_interactions +from markupsafe import escape +from sqlalchemy.exc import OperationalError +from sqlalchemy import or_ +from api.utils.bar_utils import BARUtils + +itrns = Namespace("Interactions", description="Interactions (protein-protein, protein-DNA, etc) endpoint", path="/interactions") + + +@itrns.route("//") +class Interactions(Resource): + @itrns.param("species", _in="path", default="rice") + @itrns.param("query_gene", _in="path", default="LOC_Os01g52560") + def get(self, species="", query_gene=""): + """ + Returns the protein-protein interactions for a particular query gene + Supported species: 'rice' + """ + + species = escape(species.lower()) + query_gene = escape(query_gene) + if species == "rice" and BARUtils.is_rice_gene_valid(query_gene): + try: + rows = rice_interactions.query.filter(or_(rice_interactions.Protein1 == query_gene, rice_interactions.Protein2 == query_gene)).all() + if len(rows) == 0: + return ( + BARUtils.error_exit( + "There are no data found for the given gene" + ), + 400, + ) + else: + print(rows) + # res = [] + res = [ + { + "protein_1": i.Protein1, + "protein_2": i.Protein2, + "total_hits": i.Total_hits, + "Num_species": i.Num_species, + "Quality": i.Quality, + "pcc": i.Pcc, + } + for i in rows + ] + return { + "status": "success", + "result": res + } + except OperationalError: + return BARUtils.error_exit("An internal error has occurred"), 500 + else: + return BARUtils.error_exit("Invalid species or gene ID"), 400 diff --git a/api/utils/bar_utils.py b/api/utils/bar_utils.py index 3cae6fd1..1aaf8c6a 100644 --- a/api/utils/bar_utils.py +++ b/api/utils/bar_utils.py @@ -42,6 +42,20 @@ def is_poplar_gene_valid(gene): else: return False + @staticmethod + def is_rice_gene_valid(gene, isoform_id=False): + """This function verifies if rice gene is valid + :param gene: + :param isoform_id: True if you want to verifiy isoform ID + :return: True if valid + """ + if isoform_id and re.search(r"^LOC_Os\d{2}g\d{5}\.\d{1,2}$", gene, re.I): + return True + elif isoform_id is False and re.search(r"^LOC_Os\d{2}g\d{5}$", gene, re.I): + return True + else: + return False + @staticmethod def is_tomato_gene_valid(gene, isoform_id=False): """This function verifies if ITAG Solyc gene is valid diff --git a/config/BAR_API.cfg b/config/BAR_API.cfg index e9edd212..9a59a148 100644 --- a/config/BAR_API.cfg +++ b/config/BAR_API.cfg @@ -18,7 +18,8 @@ SQLALCHEMY_BINDS = { 'tomato_nssnp' : 'mysql://root:root@localhost/tomato_nssnp', 'eplant_poplar' : 'mysql://root:root@localhost/eplant_poplar', 'eplant_tomato' : 'mysql://root:root@localhost/eplant_tomato', - 'tomato_sequence' : 'mysql://root:root@localhost/tomato_sequence' + 'tomato_sequence' : 'mysql://root:root@localhost/tomato_sequence', + 'rice_interactions': 'mysql://root:root@localhost/rice_interactions' } ## API Manager variables diff --git a/config/databases/rice_interactions.sql b/config/databases/rice_interactions.sql new file mode 100644 index 00000000..e3560f75 --- /dev/null +++ b/config/databases/rice_interactions.sql @@ -0,0 +1,147 @@ +-- MySQL dump 10.13 Distrib 8.0.23, for Linux (x86_64) +-- +-- Host: localhost Database: rice_interactions +-- Author: Vincent Lau +-- Description: A replicate of the older rice_interactions database, datatypes kept the same. +-- ------------------------------------------------------ +-- Server version 8.0.23 + +/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; +/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; +/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; +/*!50503 SET NAMES utf8mb4 */; +/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; +/*!40103 SET TIME_ZONE='+00:00' */; +/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; +/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; +/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; +/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; + +-- +-- Current Database: `rice_interactions` +-- + +CREATE DATABASE /*!32312 IF NOT EXISTS*/ `rice_interactions` /*!40100 DEFAULT CHARACTER SET latin1 */; + +USE `rice_interactions`; + +-- +-- Table structure for table `interactions` +-- + +DROP TABLE IF EXISTS `interactions`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!50503 SET character_set_client = utf8 */; +CREATE TABLE `interactions` ( + `Protein1` varchar(14) NOT NULL, + `Protein2` varchar(14) NOT NULL, + `S_cerevisiae` tinyint NOT NULL DEFAULT 0, + `S_pombe` tinyint NOT NULL DEFAULT 0, + `Worm` tinyint NOT NULL DEFAULT 0, + `Fly` tinyint NOT NULL DEFAULT 0, + `Human` tinyint NOT NULL DEFAULT 0, + `Mouse` tinyint NOT NULL DEFAULT 0, + `E_coli` tinyint NOT NULL DEFAULT 0, + `Total_hits` smallint NOT NULL DEFAULT 1, + `Num_species` tinyint NOT NULL DEFAULT 1, + `Quality` smallint NOT NULL DEFAULT 1, + `Index` tinyint NOT NULL DEFAULT 0, + `Pcc` float DEFAULT NULL, + `Bind_id` tinytext DEFAULT NULL, + PRIMARY KEY (`Protein1`,`Protein2`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `isoforms` +-- + +LOCK TABLES `interactions` WRITE; +/*!40000 ALTER TABLE `interactions` DISABLE KEYS */; +INSERT INTO `interactions`(`Protein1`,`Protein2`,`Total_hits`,`Num_species`, `Quality`,`Pcc`) VALUES +('LOC_Os01g01080','LOC_Os01g52560',1,1,1,0.65), +('LOC_Os01g01080','LOC_Os01g62244',1,1,1,0), +('LOC_Os01g01080','LOC_Os01g70380',2,1,2,0.789), +('LOC_Os01g52560', 'LOC_Os01g73310',1,1,1,-0.116); +/*!40000 ALTER TABLE `interactions` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `Rice_mPLoc` +-- + +DROP TABLE IF EXISTS `Rice_mPLoc`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!50503 SET character_set_client = utf8 */; +CREATE TABLE `Rice_mPLoc` ( + `gene_id` varchar(20) NOT NULL, + `alias` text DEFAULT NULL, + `lab_description` text DEFAULT NULL, + `gfp` text DEFAULT NULL, + `mass_spec` text DEFAULT NULL, + `swissprot` text DEFAULT NULL, + `amigo` text DEFAULT NULL, + `annotation` text DEFAULT NULL, + `pred_ipsort` text DEFAULT NULL, + `pred_mitopred` text DEFAULT NULL, + `pred_mitopred2` text DEFAULT NULL, + `pred_predator` text DEFAULT NULL, + `pred_peroxp` text DEFAULT NULL, + `pred_subloc` text DEFAULT NULL, + `pred_targetp` text DEFAULT NULL, + `pred_wolfpsort` text DEFAULT NULL, + `pred_multiloc` text DEFAULT NULL, + `pred_loctree` text DEFAULT NULL, + `pred_mPLoc` text DEFAULT NULL, + PRIMARY KEY (`gene_id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `Rice_mPLoc` +-- + +LOCK TABLES `Rice_mPLoc` WRITE; +/*!40000 ALTER TABLE `Rice_mPLoc` DISABLE KEYS */; +INSERT INTO `Rice_mPLoc` (`gene_id`, `pred_mPLoc`) VALUES +('LOC_Os01g01080.1','Endoplasmic reticulum'), +('LOC_Os01g52560.1','Cellmembrane,Chloroplast'); +/*!40000 ALTER TABLE `Rice_mPLoc` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `RGI_annotation` +-- + +DROP TABLE IF EXISTS `RGI_annotation`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!50503 SET character_set_client = utf8 */; +CREATE TABLE `RGI_annotation` ( + `loc` varchar(14) NOT NULL, + `annotation` longtext NOT NULL, + `date` date NOT NULL DEFAULT '0000-00-00', + PRIMARY KEY (`loc`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `RGI_annotation` +-- + +LOCK TABLES `RGI_annotation` WRITE; +/*!40000 ALTER TABLE `RGI_annotation` DISABLE KEYS */; +INSERT INTO `RGI_annotation` VALUES ('LOC_Os01g01080','protein decarboxylase, putative, expressed', '2009-11-13'),('LOC_Os01g52560','protein Plant PDR ABC transporter associated domain containing protein, expressed', '2009-11-13'); +/*!40000 ALTER TABLE `RGI_annotation` ENABLE KEYS */; +UNLOCK TABLES; + +/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; + +/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; +/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; +/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; +/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; + +-- Dump completed on 2021-01-22 1:08:05 \ No newline at end of file diff --git a/config/init.sh b/config/init.sh index 3382a0a7..82cfd24b 100755 --- a/config/init.sh +++ b/config/init.sh @@ -18,6 +18,7 @@ mysql -u $DB_USER -p$DB_PASS < ./config/databases/tomato_nssnp.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/eplant_poplar.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/eplant_tomato.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/tomato_sequence.sql +mysql -u $DB_USER -p$DB_PASS < ./config/databases/rice_interactions.sql echo "Data are now loaded. Preparing API config" echo "Please manually edit config file!"