In [1]:
import requests
import webbrowser
import time
import pandas as pd
import numpy as np
import re
from io import StringIO

In [2]:
class reactome_gsa():
    
    def __init__(self, method="PADOG"):        
        self.datasets = []
        self.method = method
    
    
    def set_method(self, method):
        if method in ["PADOG", "Camera"]:
            self.method=method
            return None
        else:
            raise ValueError(method+" not in "+str(["PADOG", "Camera"])+".")
    
    
    def add_dataset(self, df, name, dtype, analysisGroup="analysisGroup", comparison=["+", "-"], samples="samples"):
        assert type(comparison) == list
        assert len(comparison) == 2
        for ds in self.datasets:
            if ds["name"] == name:
                raise ValueError("Dataset "+name+" already exists.")
        
        # Multiindex dataframe
        if str(type(df.columns)).endswith("MultiIndex'>"):
            assert type(analysisGroup) == str
            assert type(samples) == str
            self.add_dataset_multiindex(df, name, dtype, analysisGroup, comparison, samples)
        
        # Single index dataframe
        else:
            assert type(analysisGroup) == list
            assert len(analysisGroup) == len(df.columns)
            assert type(samples) == list
            assert len(samples) == len(df.columns)
            self.add_dataset_lists(df, name, dtype, analysisGroup, comparison, samples)
    
    
    def add_dataset_multiindex(self, df, name, dtype, analysisGroup, comparison, samples):
        analysisGroup = list(df.columns.get_level_values(analysisGroup))
        samples = list(df.columns.get_level_values(samples))
        df = df.copy()
        df.columns = samples
        self.add_dataset_lists(df, name, dtype, analysisGroup, comparison, samples)
    
    
    def add_dataset_lists(self, df, name, dtype, analysisGroup, comparison, samples):
        assert comparison[0] in analysisGroup
        assert comparison[1] in analysisGroup
        data = re.subn(" +", "\t", df.to_string())[0]
        self.datasets.append({
            "data": data,
            "design": {
                "analysisGroup": analysisGroup,
                "comparison": {"group1": comparison[0], "group2": comparison[1]},
                "samples": samples,
            },
            "name": name,
            "type": dtype
        })
    
    
    def submit_query(self):
        
        # submit query
        print("Submitting query to reactome ...\n")
        react = requests.post(
            "https://gsa.reactome.org/0.1/analysis",
            json = {"datasets": self.datasets,
                    "methodName": self.method},
            headers={"content-type": "application/json"}
        )
        
        # refresh status
        status = requests.get("https://gsa.reactome.org/0.1/status/"+react.text).json()["status"]
        while status == "running":
            time.sleep(5)
            status = requests.get("https://gsa.reactome.org/0.1/status/"+react.text).json()["status"]
        print(str(requests.get("https://gsa.reactome.org/0.1/status/"+react.text).json()).replace(",", ",\n"))
        
        # failure or succes:
        if status == "failed":
            print("THE ANALYSIS FAILED")
            return False
        elif status == "complete":
            self.result = requests.get("https://gsa.reactome.org/0.1/result/"+react.text).json()
            self.result_url = self.result["reactome_links"][0]["url"]
            webbrowser.open(self.result_url)
            print("Opening result in reactome browser.")
            return True
    
    def __repr__(self):
        return str([self.method, [str([el["name"], el["design"]["comparison"]]) for el in self.datasets]])

In [3]:
x = pd.DataFrame(np.repeat([[1,2,3,4,5,6], [3,4,5,6,5,3], [1,2,3,4,5,6], [1,2,3,4,5,6]], 5, axis=0),
                 index=["EGFR", "MCM2", "TP53", "CD19", "GLG1", "HNRNPK", "GOLGA2", "CLTA", "CLTB", "AP4M1",
                        "RUSC2", "ATG9A", "GRB2", "SHC1", "TGN46", "AP1B1", "CDK3", "MTORC1", "SERINC3", "SERINC5"],
                 columns=pd.MultiIndex.from_arrays([["S1", "S2", "S3", "S4", "S5", "S6"], ["a", "b", "a", "b", "a", "b"]], names=["sample", "condition"]))
x

sample,S1,S2,S3,S4,S5,S6
condition,a,b,a,b,a,b
EGFR,1,2,3,4,5,6
MCM2,1,2,3,4,5,6
TP53,1,2,3,4,5,6
CD19,1,2,3,4,5,6
GLG1,1,2,3,4,5,6
HNRNPK,3,4,5,6,5,3
GOLGA2,3,4,5,6,5,3
CLTA,3,4,5,6,5,3
CLTB,3,4,5,6,5,3
AP4M1,3,4,5,6,5,3


In [4]:
r = reactome_gsa()
r.set_method("Camera")
r.add_dataset(x, "bla", "rnaseq_counts", "condition", ["a","b"], "sample")
r

['Camera', ["['bla', {'group1': 'a', 'group2': 'b'}]"]]

In [None]:
r.submit_query()

Submitting query to reactome ...



In [None]:
webbrowser.open(r.result_url)

In [None]:
pd.read_csv(StringIO(r.result["results"][0]["pathways"]), sep='\t')