In [1]:
import json
import pandas as pd



string_detailed = '/Users/dsuveges/project_data/interactions/string_input/9606.protein.links.detailed.v11.0.txt.gz'
string_full = '/Users/dsuveges/project_data/interactions/string_input/9606.protein.links.full.v11.0.txt.gz'

In [None]:


class StringInteractions(object):
    """
    main interface of the StringInteractions module.
    * Manages the flow of accessing data from various sources + mapping
    * Manages the formatting of the resulting data accomodating the json schema
    """

    def __init__(self, yaml_dict):
        self.download = DownloadResource(PIS_OUTPUT_INTERACTIONS)
        self.gs_output_dir = yaml_dict.gs_output_dir
        self.output_folder = PIS_OUTPUT_INTERACTIONS
        self.yaml = yaml_dict
        self.string_url = yaml_dict.string_info.uri
        self.score_limit = yaml_dict.string_info.score_threshold
        self.ensembl_gtf_url = yaml_dict.string_info.additional_resouces.ensembl_ftp
        self.network_json_schema_url = yaml_dict.string_info.additional_resouces.network_json_schema.url
        self.output_string = yaml_dict.string_info.output_string
        self.output_protein_mapping = yaml_dict.string_info.additional_resouces.ensembl_ftp.output_protein_mapping
        self.list_files_downloaded = {}


    def getStringResources(self):
        # Fetch string network data and generate evidence json:
        ensembl_protein_mapping = self.get_ensembl_protein_mapping()
        self.list_files_downloaded[ensembl_protein_mapping] = {'resource': self.ensembl_gtf_url.resource,
                                                      'gs_output_dir': self.gs_output_dir }
        string_file = self.fetch_data()
        self.list_files_downloaded[string_file] = {'resource': self.yaml.string_info.resource,
                                                  'gs_output_dir': self.gs_output_dir }

        return self.list_files_downloaded


    def get_ensembl_protein_mapping(self):
        ensembl_file = self.download.ftp_download(self.ensembl_gtf_url)
        return ensembl_file

    def fetch_data(self):

        # Initialize fetch object:
        string = PrepareStringData(self.string_url, score_limit=self.score_limit)

        # Fetch network data:
        string.fetch_network_data()

        # Adding species information:
        string.map_organism()

        self.network_data = string.get_data()

        stringEntries = self.generate_json()

        output_file = self.output_folder+'/'+replace_suffix(self.output_string)

        logging.info('Saving table to: .... ' + output_file)

        # Save gzipped json file:
        with gzip.open(output_file, "wt") as f:
            stringEntries.apply(lambda x: f.write(str(x)+'\n'))

        return output_file


    def generate_json(self):

        sjg = StringJsonGenerator(self.network_json_schema_url)

        # Generate json objects:
        json_objects = self.network_data.apply(sjg.generate_network_object, axis=1)

        return json_objects
    
    