In [1]:
import os
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.types import Integer, Text, String, Float, DateTime
from datetime import datetime

In [2]:
def build_DB_URI(db_type, db_lib, user_id, password, db_name,  db_location='localhost', port='5432' ):
    '''
        A method which generates a DB_URI for SQL-Alchemey. Assumption that this will be
        used with Postgresql, however written to be generic.

        arg:

        db_type     --> the type of database, e.g 'postgres', 'mysql'

        db_lib      --> the appropriate sql-alchemy plughin for 
                        db_type, e.g 'psycopg2' or 'pymysql'

        user_id     --> the user name for the database, who has 
                        appropriate permissions

        password    --> the password for the db-user-id.
        db_name     --> the name of the db, e.g. 'esomeprazole'
        db_location --> the address / URL for the database. DEFAULT = localhost
        port        --> the port for the database. DEFAULT = 5432
        
        returns:
        db_URI     --> The URI for SQL-Alchemy of the form:
                       postgres+psycop2://user_id:password@db_location:5432/db_name

    '''
    
    db_URI = db_type+'+'+db_lib+'://'+user_id+':'+password+'@'+db_location+':'+port+'/'+db_name

    return db_URI

In [3]:
db_type = 'postgres'
db_lib = 'psycopg2'
user_id = 'bhima'
password= ''
db_name = 'openfda'

db_URI = build_DB_URI(db_type, db_lib, user_id, password, db_name)
db_engine = create_engine(db_URI, echo=False)
db_engine.connect()
connection= db_engine.connect()

In [6]:
sql_query = 'SELECT  m.safetyreportid, m.activesubstancename, m.openfda_generic_name, m.reactionmeddrapt, m.primarysource_reportercountry FROM merged_table m ORDER BY  m.safetyreportid;'

In [7]:
df = pd.read_sql(sql_query, 
                     con=db_engine)

In [None]:
df.to_sql('drug_reaction_and_country',
                    db_engine,
                    if_exists='replace',
                    schema='public',
                    index=False,
                    chunksize=1000) 

In [None]:
df.describe()

In [None]:
drug_and_raction_grouped = df.groupby(['reactionmeddrapt', 'activesubstancename'])
print(drug_and_raction_grouped.agg(np.size))

In [None]:
drug_and_raction_df = df[['activesubstancename', 'reactionmeddrapt']].drop_duplicates()

In [None]:
drug_and_raction_df.describe()

In [None]:
drug_and_raction_df.groupby(['reactionmeddrapt', 'activesubstancename']).groups

In [None]:
genericname_and_raction_grouped = df.groupby(['activesubstancename', 'openfda_generic_name'])
print(genericname_and_raction_grouped.agg(np.size))

In [None]:
genericname_and_raction_df = df[['activesubstancename', 'openfda_generic_name']].drop_duplicates()

In [None]:
genericname_and_raction_df.describe()

In [None]:
genericname_and_raction_df.groupby(['reactionmeddrapt', 'openfda_generic_name']).groups

In [None]:
genericname_and_raction_grouped = df.groupby(['reactionmeddrapt', 'primarysource_reportercountry'])
print(genericname_and_raction_grouped.agg(np.size))

In [12]:
country_and_reaction_df = df[['reactionmeddrapt', 'primarysource_reportercountry']].drop_duplicates()

In [13]:

country_and_reaction_df.describe()

Unnamed: 0,reactionmeddrapt,primarysource_reportercountry
count,54110,54110
unique,9613,141
top,Pyrexia,US
freq,82,7771


In [None]:
country_and_reaction_df.groupby(['reactionmeddrapt', 'primarysource_reportercountry']).groups