# mySQL Pandas Dataframe --> SQL Table
This script will create a pandas dataframe corresponding to a test SQL table I have created.

#### Code for creating in the database (written in MySQL workbench)
CREATE DATABASE test;

CREATE TABLE kinase_db (
	id INT PRIMARY KEY AUTO_INCREMENT, #ignore when making df
	db_name VARCHAR(255),
    db_link TEXT, 
	db_description VARCHAR(255), #to be added later
    notebooks TEXT,
    date_retrieved DATE,
    pubmed_ids TEXT,
    pubmed_links TEXT
);

CREATE TABLE kin_networks (
	id INT PRIMARY KEY AUTO_INCREMENT,
    kinase_nr INT,
    substrate_nr INT,
    unique_terms INT,
	avg_terms INT,
    GMT TEXT, 
    SIG TEXT, 
	FOREIGN KEY (database_fk) REFERENCES kinase_db(id)
);



In [1]:
import pandas as pd
import numpy as np

In [126]:
kinases = pd.DataFrame(columns = ['id', 'db_name', 'db_url', 'kinase_nr', 'substrate_nr'
                                  ,'unique_terms','avg_terms', 'date_retrieved'])

DROP DATABASE test; 
CREATE DATABASE test;
USE test;

CREATE TABLE kinase_db (
	id INT NOT NULL,
	db_name VARCHAR(255),
    #jupyter_notebook TEXT,
    kinase_nr INT,
    substrate_nr INT,
    unique_terms INT,
	avg_terms INT,
    #db_link TEXT, 
	#db_description VARCHAR(255),
    date_retrieved DATE,
    PRIMARY KEY(id)
);

CREATE TABLE kin_pubmed_data (
	id INT,
    pmid TEXT,
    link TEXT,
    database_fk INT,
	PRIMARY KEY(id),
    FOREIGN KEY(database_fk) REFERENCES kinase_db(id)
);

CREATE TABLE kin_processed_files (
	id INT,
    file_type TEXT,
    file_link TEXT,
    hover_name TEXT, 
    database_fk INT,
	PRIMARY KEY(id),
    FOREIGN KEY(database_fk) REFERENCES kinase_db(id)
);

In [127]:
kinases = kinases.append([{
    'id': 1,
    'db_name': 'Phospho.ELM',
    'db_url': "http://phospho.elm.eu.org/index.html", 
    #'jupyter-url': "http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/PhosphoELM/PhosphoELM%20Final%20Notebook-%20KEA3.ipynb", 
    
    'kinase_nr': 96,
    'substrate_nr': 949,
    'unique_terms': 990,
    'avg_terms': 17,
    'date_retrieved': '2017-7-07',
    
}, {
    'id': 2,
    'db_name': 'PhosphoSite',
    'db_url':"http://www.phosphosite.org/homeAction.action", 
    #'jupyter-url':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/PhosphoSite/PhosphoSite%20Final%20Notebook-KEA3.ipynb", 
    
    'kinase_nr': 544,
    'substrate_nr': 2774,
    'unique_terms': 2812,
    'avg_terms': 12,
    'date_retrieved': '2017-7-14', 
  
},

    {
    'id': 6,
    'db_name': 'RegPhos-Human',
    'db_url': "http://140.138.144.141/~RegPhos/", 
    #'jupyter-url': "http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/RegPhos/RegPhos%20-%20Human%20-%20KEA3.ipynb", 
    'kinase_nr': 86,
    'substrate_nr': 938,
    'unique_terms': 979,
    'avg_terms': 19,
    'date_retrieved': '2017-7-14',    

},
    
    {
    'id': 7,
    'db_name': 'RegPhos-Mouse',
    'db_url': "http://140.138.144.141/~RegPhos/", 
    #'jupyter-url': "http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/RegPhos/RegPhos%20-%20Mouse%20-%20KEA3.ipynb", 
    
    'kinase_nr': 43,
    'substrate_nr': 326,
    'unique_terms': 369,
    'avg_terms': 12,
    'date_retrieved': '2017-7-14',

},
    
    {
    'id':8,
    'db_name': 'NetworKIN',
    'db_url': "http://hprd.org/", 
    #'jupyter-url': "http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/HPRD/HPRD%20Final%20Notebook%20-%20KEA3.ipynb", 
    
    'kinase_nr': 181,
    'substrate_nr': 5094,
    'unique_terms': 5210,
    'avg_terms': 157,
    'date_retrieved': '2017-7-15',


},
    {
    'id': 9,
    'db_name': 'HPRD',
    'db_url': "http://networkin.info/", 
    #'jupyter-url': "http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/NetworKIN/NetworKIN-%20Working%20-%20KEA3.ipynb", 
    
    'kinase_nr': 80,
    'substrate_nr': 810,
    'unique_terms': 824,
    'avg_terms': 20,
    'date_retrieved': '2017-7-07',
    
}
], ignore_index = True)

In [128]:
articles = pd.DataFrame(columns = ['pmid', 'link', 'database_fk'])

In [129]:
articles = articles.append([{
    'id': 1,
    'pmid': '21062810',
    'link':"https://www.ncbi.nlm.nih.gov/pubmed/?term=21062810",
    'database_fk': 1,
    
},
{
    'id': 2,
    'pmid':'17962309',
    'link': "https://www.ncbi.nlm.nih.gov/pubmed/17962309",
    'database_fk': 1,
    },
    
    {
    'id': 3,
    'pmid': '15212693',
    'link': "https://www.ncbi.nlm.nih.gov/pubmed/15212693" ,
    'database_fk': 1,
    }, 
    
    {
    'id': 4,
    'pmid': '25514926',
    'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=25514926",
    'database_fk': 2,
        
    },
    {
    'id': 5,
    'pmid': '22135298',
    'link': "https://www.ncbi.nlm.nih.gov/pubmed/22135298",
    'database_fk': 2,

},
    {
    'id': 6,
    'pmid':'22096227',
    'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=22096227",
    'database_fk': 3,
    },
    
    {
    'id': 14,
    'pmid': '21037261',
    'link': "https://www.ncbi.nlm.nih.gov/pubmed/21037261",
    'database_fk': 6,
    },
    
    {
      'id': 15,
    'pmid': '24771658',
    'link':  "https://www.ncbi.nlm.nih.gov/pubmed/24771658",
    'database_fk': 6,
    },
    
    {
'id': 16,
    'pmid': '21037261',
    'link': "https://www.ncbi.nlm.nih.gov/pubmed/21037261",
    'database_fk': 7,
    },
    
    {
        'id': 17,
    'pmid': '24771658',
    'link': "https://www.ncbi.nlm.nih.gov/pubmed/24771658",
    'database_fk': 7,
    },
    
    {
'id': 18,
    'pmid': '17981841',
    'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=17981841",
        'database_fk': 8,

},
    {
    'id': 19,
    'pmid': '18988627',
    'link':"https://www.ncbi.nlm.nih.gov/pubmed/?term=18988627",
    'database_fk': 9,
    },
    
    {
    'id': 20, 
    'pmid':'16381900',
    'link':"https://www.ncbi.nlm.nih.gov/pubmed/16381900",
     'database_fk': 9 ,  
    },
    
    {
    'id': 21, 
    'pmid':'14681466',
    'link': "https://www.ncbi.nlm.nih.gov/pubmed/14681466",
    'database_fk': 9,
    }
    ], ignore_index = True)

In [130]:
kinases

Unnamed: 0,id,db_name,db_url,kinase_nr,substrate_nr,unique_terms,avg_terms,date_retrieved
0,1,Phospho.ELM,http://phospho.elm.eu.org/index.html,96,949,990,17,2017-7-07
1,2,PhosphoSite,http://www.phosphosite.org/homeAction.action,544,2774,2812,12,2017-7-14
2,6,RegPhos-Human,http://140.138.144.141/~RegPhos/,86,938,979,19,2017-7-14
3,7,RegPhos-Mouse,http://140.138.144.141/~RegPhos/,43,326,369,12,2017-7-14
4,8,NetworKIN,http://hprd.org/,181,5094,5210,157,2017-7-15
5,9,HPRD,http://networkin.info/,80,810,824,20,2017-7-07


In [131]:
articles

Unnamed: 0,database_fk,id,link,pmid
0,1,1.0,https://www.ncbi.nlm.nih.gov/pubmed/?term=2106...,21062810
1,1,2.0,https://www.ncbi.nlm.nih.gov/pubmed/17962309,17962309
2,1,3.0,https://www.ncbi.nlm.nih.gov/pubmed/15212693,15212693
3,2,4.0,https://www.ncbi.nlm.nih.gov/pubmed/?term=2551...,25514926
4,2,5.0,https://www.ncbi.nlm.nih.gov/pubmed/22135298,22135298
5,3,6.0,https://www.ncbi.nlm.nih.gov/pubmed/?term=2209...,22096227
6,6,14.0,https://www.ncbi.nlm.nih.gov/pubmed/21037261,21037261
7,6,15.0,https://www.ncbi.nlm.nih.gov/pubmed/24771658,24771658
8,7,16.0,https://www.ncbi.nlm.nih.gov/pubmed/21037261,21037261
9,7,17.0,https://www.ncbi.nlm.nih.gov/pubmed/24771658,24771658


In [132]:
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://root:***@localhost:3306/test')

In [133]:
kinases.to_sql('kinase_db', con= engine, if_exists = 'replace', index = False)

In [134]:
articles.to_sql('kin_pubmed_data', con=engine, if_exists = 'replace', index = False)

In [135]:
## Create third table 'kin_notebooks'
kin_notebooks = pd.DataFrame(columns = ['id', 'notebook_name',
                                 'notebook_link', 'database_fk'])

In [136]:
kin_notebooks = kin_notebooks.append([{
    
    'id': 1,
    'notebook_name':"Phospho.ELM Data Formatting to GMT and SIG",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/PhosphoELM/PhosphoELM%20Final%20Notebook-%20KEA3.ipynb",
    'database_fk': 1,
},
    {
    
    'id':2,
    'notebook_name':"PhosphoSite Data Formatting to GMT and SIG",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/PhosphoSite/PhosphoSite%20Final%20Notebook-KEA3.ipynb",
    'database_fk': 2,
},
    
    {
    
    'id':6,
    'notebook_name': "RegPhos Human Data Formatting to GMT and SIG", 
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/RegPhos/RegPhos%20-%20Human%20-%20KEA3.ipynb",
    'database_fk': 6,
},
    
    
    {
    
    'id':7,
    'notebook_name': "RegPhos Mouse Data Formatting to GMT and SIG", 
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/RegPhos/RegPhos%20-%20Mouse%20-%20KEA3.ipynb",
    'database_fk': 7,
},
    
    {
    
    'id':8,
    'notebook_name':"NetworKIN Data Formatting to GMT and SIG",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/NetworKIN/NetworKIN-%20Working%20-%20KEA3.ipynb",
    'database_fk': 8,
},
    
{
    
    'id':9,
    'notebook_name': "HPRD Data Formatting to GMT and SIG",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/HPRD/HPRD%20Final%20Notebook%20-%20KEA3.ipynb",
    'database_fk': 9,
},

], ignore_index = True)

In [137]:
kin_notebooks.to_sql('kin_notebooks', con=engine, if_exists = 'replace', index = False)

In [138]:
kin_processed_files = pd.DataFrame(columns = ['id', 'file_type', 'file_link', 'database_fk'])

In [139]:
kin_processed_files = kin_processed_files.append([{
    
    'id': 1,
    'file_type': 'GMT',
    'file_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/PhosphoELM/PhosphoELM.gmt",
    'database_fk': 1,
    
},
    
    {
    'id': 2,
    'file_type': 'SIG',
    'file_link': "http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/PhosphoELM/PhosphoELMsig.txt",
    'database_fk': 1,
},
    
    {
    
    'id': 3,
    'file_type': 'GMT',
    'file_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/PhosphoSite/PhosphoSite.gmt",
    'database_fk': 2,
    
},
    
    {
    'id': 4,
    'file_type': 'SIG',
    'file_link': "http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/PhosphoSite/PhosphoSitesig.txt",
    'database_fk': 2,
},

    {
    
    'id': 11,
    'file_type': 'GMT',
    'file_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/RegPhos/RegPhosHuman.gmt",
    'database_fk' :6,
    
},
    
    {
    'id' : 12,
    'file_type': 'SIG',
    'file_link': "http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/RegPhos/RegPhosHumansig.txt",
    'database_fk': 6,
},
    
     {
    
    'id': 13,
    'file_type': 'GMT',
    'file_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/RegPhos/RegPhosMouse.gmt",
    'database_fk': 7,
    
},
    
    {
    'id' : 14,
    'file_type': 'SIG',
    'file_link': "http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/RegPhos/RegPhosMousesig.txt",
    'database_fk': 7,
},
    
    {
    
    'id': 15,
    'file_type': 'GMT',
    'file_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/NetworKIN/NetworKIN.gmt",
    'database_fk': 8,
    
},
    
    {
    'id' : 16,
    'file_type': 'SIG',
    'file_link': "http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/NetworKIN/NetworKINsig.txt",
    'database_fk' : 8,
},
    
    {
    
    'id': 17,
    'file_type': 'GMT',
    'file_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/working/HPRD/HPRD_PTM.gmt",
    'database_fk' : 9,
    
},
    
    {
    'id' : 18,
    'file_type': 'SIG',
    'file_link': "http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/HPRD/HPRDsig.txt",
    'database_fk': 9,
}]
    , ignore_index = True)

In [140]:
kin_processed_files.to_sql('kin_processed_files', con=engine, if_exists = 'replace', index = False)

In [3]:
ppi_articles = pd.DataFrame(columns = ['id', 'pmid', 'link', 'database_fk'])

In [5]:
ppi_articles = ppi_articles.append([{

'id': 1,
'pmid': '27980099',
'link': 'https://www.ncbi.nlm.nih.gov/pubmed/27980099',
'database_fk': 1
},

{

'id': 2,
'pmid': '25428363',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/25428363",
'database_fk': 1
},

{

'id': 3,
'pmid':'23203989',
'link': 'https://www.ncbi.nlm.nih.gov/pubmed/23203989',
'database_fk': 1
},

{

'id': 4,
'pmid':'21071413',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/21071413",
'database_fk': 1
},

{

'id': 5,
'pmid': '18000002',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=18000002",
'database_fk': 1
},

{
'id': 6,
'pmid': '16381927',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/16381927",
'database_fk': 1
},

{

'id': 7,
'pmid': '14681454',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=14681454",
'database_fk': 2
},

{

'id': 8,
'pmid': '11125102',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=11125102",
'database_fk': 2
},

{

'id': 9,
'pmid': '11752321',	
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=11752321",
'database_fk': 2
},

{

'id': 10,
'pmid': '10592249',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=10592249",
'database_fk': 2
},

{

'id': 11,
'pmid': '23180781',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=23180781",
'database_fk': 3
},

{

'id': 12,
'pmid': '18766178',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/18766178",
'database_fk': 3
},

{

'id': 13,
'pmid': '24234451',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=24234451",
'database_fk': 4
},

{

'id': 14,
'pmid':'22121220',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=22121220",
'database_fk': 4
},

{

'id': 15,
'pmid':'19850723',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=19850723",
'database_fk': 4
},

{

'id': 16,
'pmid':'17145710',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=17145710",
'database_fk': 4
},

{

'id': 17,
'pmid':'14681455',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=14681455",
'database_fk': 4
},

{

'id': 17,
'pmid': '19176546',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=19176546",
'database_fk': 5
},

{

'id': 18,
'pmid': '23900247',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=23900247",
'database_fk': 6
},

{
      'id': 19,
     'pmid': '19897547',
     'link':"https://www.ncbi.nlm.nih.gov/pubmed/19897547",
     'database_fk': 7,
    },
    
    {
        'id': 20,
        'pmid': '17135203' ,
        'link': "https://www.ncbi.nlm.nih.gov/pubmed/17135203",
        'database_fk': 7,
    },
    {
        
        'id': 21,
        'pmid':'11911893',
        'link': "https://www.ncbi.nlm.nih.gov/pubmed/11911893",
        'database_fk': 7,
    },

{

'id': 22,
'pmid':'19154595',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=19154595",
'database_fk': 8
},
    
    {

'id': 23,
'pmid': 'No PubMed ID',
'link': "#",
'database_fk': 9
},
    
    {

'id': 24,
'pmid': '10592173',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=10592173",
'database_fk': 10
},
    
    {

'id': 25,
'pmid': 'No PubMed ID',
'link': "#",
'database_fk': 11
},
    
    {

'id': 26,
'pmid': 'No PubMed ID',
'link': "#",
'database_fk': 12
},
    
    {

'id': 27,
'pmid': '28514442',
'link': "https://www.ncbi.nlm.nih.gov/pubmed/?term=28514442",
'database_fk': 13
},

], ignore_index=True)

In [None]:
ppi_articles.to_sql('ppi_pubmed_data', con=engine, if_exists = 'replace', index = False)

In [None]:
ppi_notebooks = pd.DataFrame('id', 'notebook_name', 'notebook_link', 'database_fk')

In [None]:
ppi_notebooks = ppi_notebooks.append([{
    
    'id': 1,
    'notebook_name':"BioGrid Protein Interaction Network Data Processing",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/PPI/BioGrid/BioGrid%20Data%20Formatting.ipynb",
    'database_fk':  1
},
    
    {
    
    'id':2,
    'notebook_name':"DIP Protein Interaction Network Data Processing",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/PPI/DIP/DIP%20Data%20Formatting.ipynb",
    'database_fk':  2
},
    
    {
    
    'id':3,
    'notebook_name': "InnateDB Protein Interaction Network Data Processing",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/PPI/InnateDB/InnateDB%20Data%20Formatting.ipynb",
    'database_fk': 3  
},
    
    {
    
    'id': 4,
    'notebook_name':"IntAct Protein Interaction Network Data Processing",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/PPI/IntAct/IntAct.ipynb",
    'database_fk':4   
},
    
    {
    
    'id': 5,
    'notebook_name':"KEA Protein Interaction Network Data Processing",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/PPI/KEA/KEA%20Data%20Formatting.ipynb",
    'database_fk': 5  
},
    
    {
    
    'id': 6
    'notebook_name':"mentha Protein Interaction Network Data Processing",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/PPI/mentha/mentha%20Data%20Formatting.ipynb",
    'database_fk':6   
},
    {
    
    'id':7,
    'notebook_name':"MINT Protein Interaction Network Data Processing",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/PPI/MINT/MINT%20%28PPI%29%20Data%20Formatting.ipynb",
    'database_fk':   7
},
    
    {
    
    'id':8,
    'notebook_name':"SNAVI Protein Interaction Network Data Processing",
    'notebook_link':"http://nbviewer.jupyter.org/github/mlatif1/KEA3/blob/master/PPI/SAVI/SAVI%20Data%20Formatting.ipynb",
    'database_fk':   8
},
    
    {
    
    'id':9,
    'notebook_name':"BioCarta SIG to GMT Data Conversion",
    'notebook_link':"http://nbviewer.jupyter.org/github/MaayanLab/KEA3/blob/master/PPI/BioCarta/BioCarta%20SIG%20Conversion%20to%20GMT.ipynb",
    'database_fk':9
},
    
    {
    
    'id':10,
    'notebook_name':"KEGG SIG to GMT Data Conversion",
    'notebook_link':"http://nbviewer.jupyter.org/github/MaayanLab/KEA3/blob/master/PPI/KEGG/KEGG%20SIG%20Conversion.ipynb",
    'database_fk': 10, 
},
    
    {
    
    'id':11,
    'notebook_name':"huMAP SIG to GMT Data Conversion",
    'notebook_link':"http://nbviewer.jupyter.org/github/MaayanLab/KEA3/blob/master/PPI/humap/HuMap%20SIG%20to%20GMT%20Conversion.ipynb",
    'database_fk': 11  
},
    
    {
    
    'id':12,
    'notebook_name':"ppid SIG to GMT Data Conversion",
    'notebook_link':"http://nbviewer.jupyter.org/github/MaayanLab/KEA3/blob/master/PPI/ppid/ppid%20SIG%20Conversion%20to%20GMT.ipynb",
    'database_fk': 12
},
    
    {
    
    'id':13,
    'notebook_name':"BioPlex GMT to SIG Data Conversion",
    'notebook_link':"http://nbviewer.jupyter.org/github/MaayanLab/KEA3/blob/master/PPI/BioPlex_2017/BioPlex%20GMT%20to%20SIG%20Conversion.ipynb",
    'database_fk': 13 
},


]

In [None]:
ppi_notebooks.to_sql('ppi_notebooks', con=engine, if_exists = 'replace', index = False)

In [None]:
ppi_processed_files = pd.DataFrame(columns = ['id', 'file_type', 'file_link', 'database_fk'])

In [None]:
ppi_processed_files = ppi_processed_files.append([{
    
    'id': 1,
    'file_type': 'SIG',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/BioCarta/Biocarta.sig',
    'database_fk':9
    
},
    
    {
    
    'id':2,
    'file_type':'GMT',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/BioCarta/BioCarta_ppi.gmt',
    'database_fk':9
    
},
    
    {
    
    'id':3,
    'file_type': 'SIG (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/BioGrid/biogrid_filtered_ppi_2017_07_13.sig',
    'database_fk':1
    
},
    
    {
    
    'id':4,
    'file_type': 'GMT (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/BioGrid/biogrid_ppi_filtered.gmt',
    'database_fk':1
    
},
    
    {
    
    'id':5,
    'file_type': 'SIG (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/BioGrid/biogrid_unfiltered_ppi_2017_07_13.sig',
    'database_fk':1
    
},
    
    {
    
    'id':6,
    'file_type': 'GMT (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/BioGrid/biogrid_ppi_unfiltered.gmt',
    'database_fk':1
    
},
    
    {
    
    'id':7,
    'file_type':'SIG',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/BioPlex_2017/BioPlex.sig',
    'database_fk':13
    
},
    
    {
    
    'id':8,
    'file_type':'GMT',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/BioPlex_2017/BioPlex_ppi.%20gmt',
    'database_fk':13
    
},
    
    {
    
    'id':9,
    'file_type':'SIG (Filtered)',
    'file_link': 'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/DIP/dip_filtered_ppi_2017_07_12.sig',
    'database_fk':2
    
},
    
    {
    
    'id':10,
    'file_type':'GMT (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/DIP/dip_ppi_filtered.gmt',
    'database_fk':2
    
},
    
    {
    
    'id':11,
    'file_type': 'SIG (Unfiltered)',
    'file_link': 'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/DIP/dip_unfiltered_ppi_2017_07_12.sig',
    'database_fk':2
    
},
    
    {
    
    'id':12,
    'file_type': 'GMT (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/DIP/dip_ppi_unfiltered.gmt',
    'database_fk':2
    
},
    {
    
    'id':13,
    'file_type': 'SIG (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/InnateDB/innateDB_filtered_ppi_2017_07_12.sig',
    'database_fk':3
    
},
    
    {
    
    'id':14,
    'file_type': 'GMT (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/InnateDB/innatedb_ppi_filtered.gmt',
    'database_fk':3
    
},
    
     {
    
    'id':15,
    'file_type': 'SIG (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/InnateDB/innateDB_unfiltered_ppi_2017_07_12.sig',
    'database_fk':3
    
},
    
    {
    
    'id':16,
    'file_type': 'GMT (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/InnateDB/innatedb_ppi_unfiltered.gmt',
    'database_fk':3
    
},
    
         {
    
    'id':17,
    'file_type': 'SIG (Filtered)',
    'file_link':'https://github.com/MaayanLab/KEA3/blob/master/PPI/IntAct/intact_filtered_ppi_2017_07_13.sig',
    'database_fk':4
    
},
    
    {
    
    'id':18,
    'file_type': 'GMT (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/IntAct/intact_ppi_filtered.gmt',
    'database_fk':4
    
},
    
         {
    
    'id':19,
    'file_type': 'SIG (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/IntAct/intact_unfiltered_ppi_2017_07_12.sig',
    'database_fk':4
    
},
    
    {
    
    'id':20,
    'file_type': 'GMT (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/IntAct/intact_ppi_unfiltered.gmt',
    'database_fk':4
    
},
    
             {
    
    'id':21,
    'file_type': 'SIG (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/KEA/kea_filtered_ppi_2017_07_12.sig',
    'database_fk':5
    
},
    
    {
    
    'id':22,
    'file_type': 'GMT (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/KEA/kea_ppi_filtered.gmt',
    'database_fk':5
    
},
    
         {
    
    'id':23,
    'file_type': 'SIG (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/KEA/kea_unfiltered_ppi_2017_07_12.sig',
    'database_fk':5
    
},
    
    {
    
    'id':24,
    'file_type': 'GMT (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/KEA/kea_ppi_unfiltered.gmt',
    'database_fk':5
    
},
    
             {
    
    'id':25,
    'file_type': 'SIG',
    'file_link':'https://github.com/MaayanLab/KEA3/blob/master/PPI/KEGG/KEGG.sig',
    'database_fk':10
    
},
    
    {
    
    'id':26,
    'file_type': 'GMT',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/KEGG/KEGG_ppi.gmt',
    'database_fk':10
    
},
    
    {
    
    'id':27,
    'file_type': 'SIG (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/MINT/mint_filtered_ppi_2017_07_12.sig',
    'database_fk':7
    
},
    
    {
    
    'id':28,
    'file_type': 'GMT (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/MINT/mint_ppi_filtered.gmt',
    'database_fk':7
    
},
    
         {
    
    'id':29,
    'file_type': 'SIG (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/MINT/mint_unfiltered_ppi_2017_07_12.sig',
    'database_fk':7
    
},
    
    {
    
    'id':30,
    'file_type': 'GMT (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/MINT/mint_ppi_unfiltered.gmt',
    'database_fk':7
    
},
    
             {
    
    'id':31,
    'file_type': 'SIG (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/SAVI/snavi_filtered_ppi_2017_07_12.sig',
    'database_fk':8
    
},
    
    {
    
    'id':32,
    'file_type': 'GMT (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/SAVI/snavi_ppi_filtered.gmt',
    'database_fk':8
    
},
    
         {
    
    'id':33,
    'file_type': 'SIG (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/SAVI/snavi_unfiltered_ppi_2017_07_12.sig',
    'database_fk':8
    
},
    
    {
    
    'id':34,
    'file_type': 'GMT (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/SAVI/snavi_ppi_unfiltered.gmt',
    'database_fk':8
    
},
    
    {
    
    'id':35,
    'file_type': 'SIG',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/humap/huMAP.sig',
    'database_fk':11
    
},
    
    {
    
    'id':36,
    'file_type': 'GMT',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/humap/huMAP_ppi.gmt',
    'database_fk':11
    
},
    
    {
    
    'id':37,
    'file_type': 'SIG (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/mentha/mentha_filtered_ppi_2017_07_13.sig',
    'database_fk':6
    
},
    
    {
    
    'id':38,
    'file_type': 'GMT (Filtered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/mentha/mentha_ppi_filtered.gmt',
    'database_fk':6
    
},
    
         {
    
    'id':39,
    'file_type': 'SIG (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/mentha/mentha_unfiltered_ppi_2017_07_13.sig',
    'database_fk':6
    
},
    
    {
    
    'id':40,
    'file_type': 'GMT (Unfiltered)',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/mentha/mentha_ppi_unfiltered.gmt',
    'database_fk':6
    
},
    
    {
    
    'id':41,
    'file_type': 'SIG',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/ppid/ppid.sig',
    'database_fk':12
    
},
    
    {
    
    'id':42,
    'file_type': 'GMT',
    'file_link':'https://raw.githubusercontent.com/MaayanLab/KEA3/master/PPI/ppid/ppid_ppi.gmt',
    'database_fk':12
    
},

],ignore_index = True)

In [None]:
ppi_db = pd.DataFrame(columns = ['id', 'db_name', 'db_url', 'protein_nr', 'interaction_nr', 'hub_nr', 'avg_terms',
                                'date_retrieved'])

In [None]:
ppi_nr = pd.DataFrame(columns = ['id', 'type', 'protein_nr', 'interaction_nr', 'hub_nr', 'avg_terms', 'database_fk'])

In [None]:
ppi_nr = ppi_nr.append([{
 
    'id': 1,
    'type': 'Filtered',
    'protein_nr':7496,
    'interaction_nr':63022,
    'hub_nr':3493,
    'avg_terms':18,
    'database_fk':1
    
},

{
 
    'id':2, 
    'type': 'Unfiltered',
    'protein_nr':16009,
    'interaction_nr':470726, 
    'hub_nr':11428,
    'avg_terms':41,
    'database_fk':1
    
},

{
 
    'id':3,
    'type': 'Unfiltered',
    'protein_nr':2059,
    'interaction_nr':5119,
    'hub_nr':582,
    'avg_terms':9,
    'database_fk':2
    
},

{
 
    'id':4,
    'type': 'Filtered',
    'protein_nr':1552, 
    'interaction_nr':3170,
    'hub_nr':395,
    'avg_terms':8,
    'database_fk':2
    
},

{
 
    'id':5,
    'type': 'Filtered',
    'protein_nr':1752,
    'interaction_nr':6051,
    'hub_nr':507,
    'avg_terms':12,
    'database_fk':3
    
},

{
 
    'id':6,
    'type': 'Unfiltered',
    'protein_nr':4378,
    'interaction_nr':13876,
    'hub_nr':879,
    'avg_terms':16,
    'database_fk':3
    
},

{
 
    'id':7,
    'type': 'Filtered',
    'protein_nr':3941,
    'interaction_nr':13197,
    'hub_nr':1225,
    'avg_terms':11,
    'database_fk':4
    
},

{
 
    'id':8,
    'type': 'Unfiltered',
    'protein_nr':14522,
    'interaction_nr':299352,
    'hub_nr':9490,
    'avg_terms':32,
    'database_fk':4
    
},

{
 
    'id':9,
    'type': 'Filtered',
    'protein_nr':1575,
    'interaction_nr':5388,
    'hub_nr': 325,
    'avg_terms':17,
    'database_fk':5
    
},

{
 
    'id':10,
    'type': 'Unfiltered',
    'protein_nr':3121,
    'interaction_nr':16762,
    'hub_nr':848,
    'avg_terms':20,
    'database_fk':5
    
},

{
 
    'id':11,
    'type': 'Filtered',
    'protein_nr':9053,
    'interaction_nr':96569,
    'hub_nr':4766,
    'avg_terms':20,
    'database_fk':6
    
},

{
 
    'id':12,
    'type': 'Unfiltered',
    'protein_nr':15719,
    'interaction_nr':512389,
    'hub_nr':11856,
    'avg_terms':43,
    'database_fk':6
    
},

{
 
    'id':13,
    'type': 'Filtered',
    'protein_nr':2203,
    'interaction_nr':5517,
    'hub_nr':554,
    'avg_terms':10,
    'database_fk':7
    
},

{
 
    'id':14,
    'type': 'Unfiltered'
    'protein_nr':6902,
    'interaction_nr':35603,
    'hub_nr':2379,
    'avg_terms':15,
    'database_fk':7
    
},

{
 
    'id':15,
    'type': 'Filtered',
    'protein_nr':468,
    'interaction_nr':1622,
    'hub_nr':165,
    'avg_terms':10,
    'database_fk':8
    
},

{
 
    'id':16,
    'type': 'Unfiltered',
    'protein_nr':481,
    'interaction_nr':1675,
    'hub_nr':169,
    'avg_terms':10,
    'database_fk':8
    
},

{
 
    'id':17,
    'type': 'Filtered',
    'protein_nr':
    'interaction_nr':
    'hub_nr':
    'avg_terms':
    'database_fk':9
    
},

{
 
    'id':18,
    'type': 'Unfiltered',
    'protein_nr':
    'interaction_nr':
    'hub_nr':
    'avg_terms':
    'database_fk':9
    
},])

In [None]:
ppi_db = ppi_db.append([{
    
    'id': 1,
    'db_name': 'BioGrid',
    'db_url': "https://thebiogrid.org/",
    'date_retrieved': '2017-1-25'
    
    
},
    {
    
    'id': 2,
    'db_name': 'DIP',
    'db_url':"http://dip.doe-mbi.ucla.edu/dip/Main.cgi",
    'date_retrieved': '2017-1-25'
    
    
},
    {
    
    'id':3, 
    'db_name': 'InnateDB',
    'db_url':"http://www.innatedb.com/",
    'date_retrieved': '2017-1-25'
    
    
},
    {
    
    'id':4,
    'db_name': 'IntAct',
    'db_url':"http://www.ebi.ac.uk/intact/",

    'date_retrieved': '2017-1-26'
    
    
},
    {
    
    'id':5,
    'db_name':'KEA',
    'db_url':"http://www.maayanlab.net/KEA2/",

    'date_retrieved':'2017-2-01'
    
    
},
    {
    
    'id':6,
    'db_name':'mentha',
    'db_url':"http://mentha.uniroma2.it/about.php",

    'date_retrieved':'2017-1-26'
    
    
},
    {
    
    'id':7,
    'db_name':'MINT',
    'db_url':"http://mint.bio.uniroma2.it/index.py",

    'date_retrieved':'2017-1-23'
    
    
},
    {
    
    'id':8,
    'db_name': 'SNAVI',
    'db_url':"https://code.google.com/archive/p/snavi/",
    'date_retrieved':'2017-2-01'
    
    
},
    {
    
    'id':9,
    'db_name':"BioCarta",
    'db_url':"https://cgap.nci.nih.gov/Pathways/BioCarta_Pathways",
    'date_retrieved': 'Pre-2017'
    
    
},
    {
    
    'id':10,
    'db_name': 'KEGG',
    'db_url':"http://www.genome.jp/kegg/kegg1.html",
    'date_retrieved': 'Pre-2017'
    
    
},
    
    {
    
    'id':11,
    'db_name': 'hu.MAP',
    'db_url':"http://proteincomplexes.org/",
    'date_retrieved':'Pre-2017'
    
    
},
    {
    
    'id':12,
    'db_name': 'ppid',
    'date_retrieved':'2017-6-09'
    
    
},
    
    {
    
    'id':13,
    'db_name': 'BioPlex',
    'db_url': "http://bioplex.hms.harvard.edu/",
    'date_retrieved':'2017-7-09'
    
    
},

    ], ignore_index = True)