In [None]:
# Third-party modules
import pandas as pd
from tqdm import tqdm
from multitax import NcbiTx

# Custom modules
from src.misc import path
from src.misc.logger import logger
from src.sources.paper import Paper
from src.sources.scoring import Scoring
from src.entities.protein import Protein
from src.databases.network import Network
from src.entities.interactor import Interactor
from src.entities.protein_protein import ProteinProtein

In [None]:
ncbi_tx = NcbiTx()

### Database source

In [None]:
# Merge networks from database
intact = Network(db = 'IntAct', version = '2024-09-08', type = 'MADS_vs_MADS', standarized = True)
biogrid = Network(db = 'BioGRID', version = '4.4.238', type = 'MADS_vs_MADS', standarized = True)
pplappisite = Network(db = 'PlaPPISite', type = 'MADS_vs_MADS', standarized = True)

# NOT add negatives
#intact.add_negatives_per_species()
#biogrid.add_negatives_per_species()
#pplappisite.add_negatives_per_species()

# Add 'From' column
intact.df['From'] = 'IntAct'
biogrid.df['From'] = 'BioGRID'
pplappisite.df['From'] = 'PlaPPISite'

# Merge
databases = Network.merge(intact, biogrid, pplappisite)

# Remove duplicates
databases.df.drop_duplicates('Seq', inplace = True)

# Add, remove and reorder coulumns
databases.df['Interaction'] = 1
databases.df['Discrepancy'] = 0
databases.df['bioID_A'] = ''
databases.df['bioID_B'] = ''
databases.df.rename(columns = {'A': 'UniProtID_A', 'B': 'UniProtID_B'}, inplace = True)
databases.df.rename(columns = {'Species_A': 'TaxonID_A', 'Species_B': 'TaxonID_B'}, inplace = True)
databases.df.rename(columns = {'Seq_A': 'Sequence_A', 'Seq_B': 'Sequence_B'}, inplace = True)
taxonID2species = {taxonID:ncbi_tx.name_lineage(str(taxonID))[-1] for taxonID in pd.concat([databases.df['TaxonID_A'], databases.df['TaxonID_B']]).unique()}
databases.df['Species_A'] = databases.df['TaxonID_A'].map(taxonID2species)
databases.df['Species_B'] = databases.df['TaxonID_B'].map(taxonID2species)
databases.df = databases.df[['bioID_A', 'bioID_B', 'UniProtID_A', 'UniProtID_B', 'Sequence_A', 'Sequence_B', 'Interaction', 'Discrepancy', 'From', 'TaxonID_A', 'TaxonID_B', 'Species_A', 'Species_B']]


### Literature mining source