In [1]:
import os
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.types import Integer, Text, String, Float
#import psycopg2



In [2]:
def build_DB_URI(db_type, db_lib, user_id, password, db_name,  db_location='localhost', port='5432' ):
    '''
        A method which generates a DB_URI for SQL-Alchemey. Assumption that this will be
        used with Postgresql, however written to be generic.

        arg:

        db_type     --> the type of database, e.g 'postgres', 'mysql'

        db_lib      --> the appropriate sql-alchemy plughin for 
                        db_type, e.g 'psycopg2' or 'pymysql'

        user_id     --> the user name for the database, who has 
                        appropriate permissions

        password    --> the password for the db-user-id.
        db_name     --> the name of the db, e.g. 'esomeprazole'
        db_location --> the address / URL for the database. DEFAULT = localhost
        port        --> the port for the database. DEFAULT = 5432
        
        returns:
        db_URI     --> The URI for SQL-Alchemy of the form:
                       postgres+psycop2://user_id:password@db_location:5432/db_name

    '''
    
    db_URI = db_type+'+'+db_lib+'://'+user_id+':'+password+'@'+db_location+':'+port+'/'+db_name

    return db_URI

====

In [3]:
def get_db_cols(df):
    '''
    A method which converts the col-dtype from Pandas/Numpy 
    to the SQLAlchemy equivelent. 
    args:
    
    df ---> A pandas DataFrame
    
    
    returns:
    
    db_cols --> a dictionary with column-name as key and SQL-Alchemy
                data type as values.
    
    '''
    
    col_info = dict(df.dtypes)
    db_cols = {}
    for k in col_info:

        if col_info[k] == 'object':
            db_cols[k] = String

        elif col_info[k] == 'int64':
            db_cols[k] = Integer

        elif col_info[k] == 'float64':
            db_cols[k] = Float  
        elif col_info[k] == 'string':
            db_cols[k] = String   
        else:
            print('Unaccounted for type:')
            print(k, col_info[k])
            return None
    return db_cols

====

In [4]:
def load_csv_file_as_df(data_file_path, file_name):
    
    df = pd.read_csv(data_file_path+file_name)
    
    new_columns = [column.replace(' ', '_').lower() for column in df]
    df.columns = new_columns
    return df

====

In [5]:
def get_file_names(data_file_path, file_name_pattern):

    all_file_list = os.listdir(data_file_path)

    all_file_list.sort()
    print(len(all_file_list))
    
    file_list = []

    for f in all_file_list:
        
        if file_name_pattern in f:
            file_list.append(f)
       
    return  file_list       

====

In [6]:

def clean_up_column_values(df, set_max_val=True, max_unique_col_vals=99):
    '''
    A method that aims to clean up columns in a data frame. When importing data from
    a CSV sometimes NaN values are put in empty spaces of a column containing strings.
    
    Args:
    ======
    
    df                    --> the data frame which needs values to be cleaned up.
    
    set_max_val           --> a boolean which allows the user to decide if they want to go
                              through all unique values in a column.
                              
    max_unique_col_values --> some columns have many unique values and it would take a
                              long time to check every value, so there is an option. 
                              
   Returns:
   ========
   
   df                    --> the data frame which has been cleaned up.
    
    '''
    
    for col in list(df.columns):

        #print('before', col, df[col].dtype)
        if df[col].dtype == 'object':


            #print('starting on a object-dtype:..')

            vals = pd.unique(df[col])

            col_dtype = np.nan


            if set_max_val and len(vals) > max_unique_col_vals:
                continue

            for v in vals:
                if col_dtype != np.nan:
                    col_dtype = type(v)
                elif col_dytpe != type(v):
                    raise TypeError("There are several data-types in this column:", col, vals, val_type)      

            if col_dtype == str:
                #print('got a string', col)
                df[col].fillna('', inplace=True)
                df[col] = df[col].astype(str)
            if col_dtype == 'int64':
                print('got an int', col)
                df[col].fillna(0, inplace=True)
                df[col] = df[col].astype(int)
            if col_dtype == 'float64':
                print('got a float', col)
                df[col].fillna(0.0, inplace=True)
                df[col] = df[col].astype(float)
    return df


In [7]:
def load_csv_data_to_db(filename_pattern_and_tablename_dict, data_file_path):
    '''
    
    
    '''
    
    current_pattern = ''
    last_file = ''
    total_files = 0
    loaded_files = []
   # print('outside first for')
    # 1. iterate through list of patterns, to load all file-types into the database.
    for pattern in filename_pattern_and_tablename_dict.keys():
        
        print('complete:', current_pattern, 'total number of files:', total_files)
        print('last_file', last_file)
        current_pattern = pattern
        # 2. Get the list of files from the data-folder:
        data_file_list = get_file_names(data_file_path, pattern)
                  
        table_name = filename_pattern_and_tablename_dict[pattern]
                
        # 3. load data into a data frame
        file_counter = 1
       # print('in first loop, outside second.. ')
        for data_file in data_file_list:
            last_file = data_file
            if data_file not in loaded_files:
                loaded_files.append(data_file)
#             print('top of second loop.')
            df = load_csv_file_as_df(data_file_path, data_file)
    
            df = clean_up_column_values(df)

            # Get the columns data types from the data frame and convert 
            # to SQL-Alchemy friend types.

            db_cols = get_db_cols(df)
            
#             if db_cols == None:
                
#                 print(data_file, f)
#                 return None

            if file_counter == 1:
                pass #DON'T FORGET THAT YOU HAVE PUT THIS HERE!!
                df.to_sql(table_name,
                                   db_engine,
                                   if_exists='replace',
                                schema='public',
                                   index=False,
                                   chunksize=1000,
                                   dtype=db_cols)            
                print('if counter = 1', data_file)
                
                
            else:
                try:
                    df.to_sql(table_name,
                                       db_engine,
                                       if_exists='append',
                                       schema='public',
                                       index=False,
                                       chunksize=1000,
                                       dtype=db_cols)
                    print('going through the list.. ', data_file)
                except:
                    print('skipped this file: ', data_file)
                    continue
            file_counter += 1
            total_files = file_counter
    return loaded_files

====

In [8]:
db_type = 'postgres'
db_lib = 'psycopg2'
user_id = 'bhima'
password= ''
db_name = 'openfda'

db_URI = build_DB_URI(db_type, db_lib, user_id, password, db_name)
db_engine = create_engine(db_URI, echo=False)
db_engine.connect()
connection= db_engine.connect()

In [9]:
filename_pattern_and_tablename_dict = {'patient.csv':'patients', \
                      'reaction.csv':'reactions', 'openfda.csv':'open_fda', 'drug.csv':'drugs'} #{'drug.csv':'drugs', 

#location of the data files:
data_file_path = '../Data/csv/'

loaded_files = load_csv_data_to_db(filename_pattern_and_tablename_dict, data_file_path)

complete:  total number of files: 0
last_file 
380
if counter = 1 2004q1_drug-event-0001-of-0001.json.patient.csv
going through the list..  2004q2_drug-event-0001-of-0001.json.patient.csv
going through the list..  2004q3_drug-event-0001-of-0001.json.patient.csv
going through the list..  2004q4_drug-event-0001-of-0001.json.patient.csv
going through the list..  2005q1_drug-event-0001-of-0001.json.patient.csv
going through the list..  2005q2_drug-event-0001-of-0001.json.patient.csv
going through the list..  2005q3_drug-event-0001-of-0001.json.patient.csv
going through the list..  2005q4_drug-event-0001-of-0001.json.patient.csv
going through the list..  2006q1_drug-event-0001-of-0001.json.patient.csv
going through the list..  2006q2_drug-event-0001-of-0001.json.patient.csv
going through the list..  2006q3_drug-event-0001-of-0001.json.patient.csv
going through the list..  2006q4_drug-event-0001-of-0001.json.patient.csv
going through the list..  2007q1_drug-event-0001-of-0001.json.patient.cs

  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2012q1_drug-event-0001-of-0002.json.patient.csv
skipped this file:  2012q1_drug-event-0002-of-0002.json.patient.csv
skipped this file:  2012q2_drug-event-0001-of-0002.json.patient.csv
skipped this file:  2012q2_drug-event-0002-of-0002.json.patient.csv
skipped this file:  2012q3_drug-event-0001-of-0002.json.patient.csv
skipped this file:  2012q3_drug-event-0002-of-0002.json.patient.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2012q4_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2012q4_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2012q4_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2013q1_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2013q1_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2013q1_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2013q2_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2013q2_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2013q2_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2013q3_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2013q3_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2013q3_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2013q4_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2013q4_drug-event-0002-of-0003.json.patient.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2013q4_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2014q1_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2014q1_drug-event-0002-of-0003.json.patient.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2014q1_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2014q2_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2014q2_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2014q2_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2014q3_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2014q3_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2014q3_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2014q4_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2014q4_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2014q4_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2015q1_drug-event-0001-of-0004.json.patient.csv
skipped this file:  2015q1_drug-event-0002-of-0004.json.patient.csv
skipped this file:  2015q1_drug-event-0003-of-0004.json.patient.csv
skipped this file:  2015q1_drug-event-0004-of-0004.json.patient.csv
skipped this file:  2015q2_drug-event-0001-of-00

  exec(code_obj, self.user_global_ns, self.user_ns)


going through the list..  2012q4_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2012q4_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2012q4_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2013q1_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2013q1_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2013q1_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2013q2_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2013q2_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2013q2_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2013q3_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2013q3_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2013q3_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2013q4_drug-event-0001-of-0003.json.reaction.csv
going through the list.. 

going through the list..  2014q1_drug-event-0002-of-0003.json.openfda.csv
skipped this file:  2014q1_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2014q2_drug-event-0001-of-0003.json.openfda.csv
going through the list..  2014q2_drug-event-0002-of-0003.json.openfda.csv
going through the list..  2014q2_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2014q3_drug-event-0001-of-0003.json.openfda.csv
going through the list..  2014q3_drug-event-0002-of-0003.json.openfda.csv
going through the list..  2014q3_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2014q4_drug-event-0001-of-0003.json.openfda.csv
going through the list..  2014q4_drug-event-0002-of-0003.json.openfda.csv
going through the list..  2014q4_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2015q1_drug-event-0001-of-0004.json.openfda.csv
going through the list..  2015q1_drug-event-0002-of-0004.json.openfda.csv
going through the list..  2015q1_drug-event-

  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2012q2_drug-event-0001-of-0002.json.drug.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2012q2_drug-event-0002-of-0002.json.drug.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2012q3_drug-event-0001-of-0002.json.drug.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2012q3_drug-event-0002-of-0002.json.drug.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2012q4_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2012q4_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2012q4_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2013q1_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2013q1_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2013q1_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2013q2_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2013q2_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2013q2_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2013q3_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2013q3_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2013q3_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2013q4_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2013q4_drug-event-0002-of-0003.json.drug.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2013q4_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2014q1_drug-event-0001-of-0003.json.drug.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2014q1_drug-event-0002-of-0003.json.drug.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2014q1_drug-event-0003-of-0003.json.drug.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2014q2_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2014q2_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2014q2_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2014q3_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2014q3_drug-event-0002-of-0003.json.drug.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2014q3_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2014q4_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2014q4_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2014q4_drug-event-0003-of-0003.json.drug.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2015q1_drug-event-0001-of-0004.json.drug.csv
skipped this file:  2015q1_drug-event-0002-of-0004.json.drug.csv
skipped this file:  2015q1_drug-event-0003-of-0004.json.drug.csv
skipped this file:  2015q1_drug-event-0004-of-0004.json.drug.csv


  exec(code_obj, self.user_global_ns, self.user_ns)


skipped this file:  2015q2_drug-event-0001-of-0004.json.drug.csv
skipped this file:  2015q2_drug-event-0002-of-0004.json.drug.csv
skipped this file:  2015q2_drug-event-0003-of-0004.json.drug.csv
skipped this file:  2015q2_drug-event-0004-of-0004.json.drug.csv
skipped this file:  2015q3_drug-event-0001-of-0005.json.drug.csv
skipped this file:  2015q3_drug-event-0002-of-0005.json.drug.csv
skipped this file:  2015q3_drug-event-0003-of-0005.json.drug.csv
skipped this file:  2015q3_drug-event-0004-of-0005.json.drug.csv
skipped this file:  2015q3_drug-event-0005-of-0005.json.drug.csv
going through the list..  all_other_drug-event-0001-of-0001.json.drug.csv


"complete:  total number of files: 0
last_file 
380
if counter = 1 2004q1_drug-event-0001-of-0001.json.patient.csv
going through the list..  2004q2_drug-event-0001-of-0001.json.patient.csv
going through the list..  2004q3_drug-event-0001-of-0001.json.patient.csv
going through the list..  2004q4_drug-event-0001-of-0001.json.patient.csv
going through the list..  2005q1_drug-event-0001-of-0001.json.patient.csv
going through the list..  2005q2_drug-event-0001-of-0001.json.patient.csv
going through the list..  2005q3_drug-event-0001-of-0001.json.patient.csv
going through the list..  2005q4_drug-event-0001-of-0001.json.patient.csv
going through the list..  2006q1_drug-event-0001-of-0001.json.patient.csv
going through the list..  2006q2_drug-event-0001-of-0001.json.patient.csv
going through the list..  2006q3_drug-event-0001-of-0001.json.patient.csv
going through the list..  2006q4_drug-event-0001-of-0001.json.patient.csv
going through the list..  2007q1_drug-event-0001-of-0001.json.patient.csv
going through the list..  2007q2_drug-event-0001-of-0001.json.patient.csv
going through the list..  2007q3_drug-event-0001-of-0002.json.patient.csv
going through the list..  2007q3_drug-event-0002-of-0002.json.patient.csv
going through the list..  2007q4_drug-event-0001-of-0002.json.patient.csv
going through the list..  2007q4_drug-event-0002-of-0002.json.patient.csv
going through the list..  2008q1_drug-event-0001-of-0002.json.patient.csv
going through the list..  2008q1_drug-event-0002-of-0002.json.patient.csv
going through the list..  2008q2_drug-event-0001-of-0002.json.patient.csv
going through the list..  2008q2_drug-event-0002-of-0002.json.patient.csv
going through the list..  2008q3_drug-event-0001-of-0001.json.patient.csv
going through the list..  2008q4_drug-event-0001-of-0001.json.patient.csv
going through the list..  2009q1_drug-event-0001-of-0002.json.patient.csv
going through the list..  2009q1_drug-event-0002-of-0002.json.patient.csv
going through the list..  2009q2_drug-event-0001-of-0002.json.patient.csv
going through the list..  2009q2_drug-event-0002-of-0002.json.patient.csv
going through the list..  2009q3_drug-event-0001-of-0002.json.patient.csv
going through the list..  2009q3_drug-event-0002-of-0002.json.patient.csv
going through the list..  2009q4_drug-event-0001-of-0002.json.patient.csv
going through the list..  2009q4_drug-event-0002-of-0002.json.patient.csv
going through the list..  2010q1_drug-event-0001-of-0002.json.patient.csv
going through the list..  2010q1_drug-event-0002-of-0002.json.patient.csv
going through the list..  2010q2_drug-event-0001-of-0002.json.patient.csv
going through the list..  2010q2_drug-event-0002-of-0002.json.patient.csv
going through the list..  2010q3_drug-event-0001-of-0002.json.patient.csv
going through the list..  2010q3_drug-event-0002-of-0002.json.patient.csv
going through the list..  2010q4_drug-event-0001-of-0002.json.patient.csv
going through the list..  2010q4_drug-event-0002-of-0002.json.patient.csv
going through the list..  2011q1_drug-event-0001-of-0002.json.patient.csv
going through the list..  2011q1_drug-event-0002-of-0002.json.patient.csv
going through the list..  2011q2_drug-event-0001-of-0002.json.patient.csv
going through the list..  2011q2_drug-event-0002-of-0002.json.patient.csv
going through the list..  2011q3_drug-event-0001-of-0002.json.patient.csv
going through the list..  2011q3_drug-event-0002-of-0002.json.patient.csv
going through the list..  2011q4_drug-event-0001-of-0002.json.patient.csv
going through the list..  2011q4_drug-event-0002-of-0002.json.patient.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (5,15,20) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2012q1_drug-event-0001-of-0002.json.patient.csv
skipped this file:  2012q1_drug-event-0002-of-0002.json.patient.csv
skipped this file:  2012q2_drug-event-0001-of-0002.json.patient.csv
skipped this file:  2012q2_drug-event-0002-of-0002.json.patient.csv
skipped this file:  2012q3_drug-event-0001-of-0002.json.patient.csv
skipped this file:  2012q3_drug-event-0002-of-0002.json.patient.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (0) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2012q4_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2012q4_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2012q4_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2013q1_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2013q1_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2013q1_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2013q2_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2013q2_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2013q2_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2013q3_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2013q3_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2013q3_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2013q4_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2013q4_drug-event-0002-of-0003.json.patient.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (11) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2013q4_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2014q1_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2014q1_drug-event-0002-of-0003.json.patient.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (1,11,12) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2014q1_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2014q2_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2014q2_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2014q2_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2014q3_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2014q3_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2014q3_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2014q4_drug-event-0001-of-0003.json.patient.csv
skipped this file:  2014q4_drug-event-0002-of-0003.json.patient.csv
skipped this file:  2014q4_drug-event-0003-of-0003.json.patient.csv
skipped this file:  2015q1_drug-event-0001-of-0004.json.patient.csv
skipped this file:  2015q1_drug-event-0002-of-0004.json.patient.csv
skipped this file:  2015q1_drug-event-0003-of-0004.json.patient.csv
skipped this file:  2015q1_drug-event-0004-of-0004.json.patient.csv
skipped this file:  2015q2_drug-event-0001-of-0004.json.patient.csv
skipped this file:  2015q2_drug-event-0002-of-0004.json.patient.csv
skipped this file:  2015q2_drug-event-0003-of-0004.json.patient.csv
skipped this file:  2015q2_drug-event-0004-of-0004.json.patient.csv
skipped this file:  2015q3_drug-event-0001-of-0005.json.patient.csv
skipped this file:  2015q3_drug-event-0002-of-0005.json.patient.csv
skipped this file:  2015q3_drug-event-0003-of-0005.json.patient.csv
skipped this file:  2015q3_drug-event-0004-of-0005.json.patient.csv
skipped this file:  2015q3_drug-event-0005-of-0005.json.patient.csv
going through the list..  all_other_drug-event-0001-of-0001.json.patient.csv
complete: patient.csv total number of files: 50
last_file all_other_drug-event-0001-of-0001.json.patient.csv
380
if counter = 1 2004q1_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2004q2_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2004q3_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2004q4_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2005q1_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2005q2_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2005q3_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2005q4_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2006q1_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2006q2_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2006q3_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2006q4_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2007q1_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2007q2_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2007q3_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2007q3_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2007q4_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2007q4_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2008q1_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2008q1_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2008q2_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2008q2_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2008q3_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2008q4_drug-event-0001-of-0001.json.reaction.csv
going through the list..  2009q1_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2009q1_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2009q2_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2009q2_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2009q3_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2009q3_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2009q4_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2009q4_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2010q1_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2010q1_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2010q2_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2010q2_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2010q3_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2010q3_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2010q4_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2010q4_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2011q1_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2011q1_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2011q2_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2011q2_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2011q3_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2011q3_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2011q4_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2011q4_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2012q1_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2012q1_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2012q2_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2012q2_drug-event-0002-of-0002.json.reaction.csv
going through the list..  2012q3_drug-event-0001-of-0002.json.reaction.csv
going through the list..  2012q3_drug-event-0002-of-0002.json.reaction.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (1) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

going through the list..  2012q4_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2012q4_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2012q4_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2013q1_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2013q1_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2013q1_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2013q2_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2013q2_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2013q2_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2013q3_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2013q3_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2013q3_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2013q4_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2013q4_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2013q4_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2014q1_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2014q1_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2014q1_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2014q2_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2014q2_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2014q2_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2014q3_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2014q3_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2014q3_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2014q4_drug-event-0001-of-0003.json.reaction.csv
going through the list..  2014q4_drug-event-0002-of-0003.json.reaction.csv
going through the list..  2014q4_drug-event-0003-of-0003.json.reaction.csv
going through the list..  2015q1_drug-event-0001-of-0004.json.reaction.csv
going through the list..  2015q1_drug-event-0002-of-0004.json.reaction.csv
going through the list..  2015q1_drug-event-0003-of-0004.json.reaction.csv
going through the list..  2015q1_drug-event-0004-of-0004.json.reaction.csv
going through the list..  2015q2_drug-event-0001-of-0004.json.reaction.csv
going through the list..  2015q2_drug-event-0002-of-0004.json.reaction.csv
going through the list..  2015q2_drug-event-0003-of-0004.json.reaction.csv
going through the list..  2015q2_drug-event-0004-of-0004.json.reaction.csv
going through the list..  2015q3_drug-event-0001-of-0005.json.reaction.csv
going through the list..  2015q3_drug-event-0002-of-0005.json.reaction.csv
going through the list..  2015q3_drug-event-0003-of-0005.json.reaction.csv
going through the list..  2015q3_drug-event-0004-of-0005.json.reaction.csv
going through the list..  2015q3_drug-event-0005-of-0005.json.reaction.csv
going through the list..  all_other_drug-event-0001-of-0001.json.reaction.csv
complete: reaction.csv total number of files: 96
last_file all_other_drug-event-0001-of-0001.json.reaction.csv
380
if counter = 1 2004q1_drug-event-0001-of-0001.json.openfda.csv
going through the list..  2004q2_drug-event-0001-of-0001.json.openfda.csv
going through the list..  2004q3_drug-event-0001-of-0001.json.openfda.csv
going through the list..  2004q4_drug-event-0001-of-0001.json.openfda.csv
going through the list..  2005q1_drug-event-0001-of-0001.json.openfda.csv
going through the list..  2005q2_drug-event-0001-of-0001.json.openfda.csv
going through the list..  2005q3_drug-event-0001-of-0001.json.openfda.csv
skipped this file:  2005q4_drug-event-0001-of-0001.json.openfda.csv
skipped this file:  2006q1_drug-event-0001-of-0001.json.openfda.csv
skipped this file:  2006q2_drug-event-0001-of-0001.json.openfda.csv
skipped this file:  2006q3_drug-event-0001-of-0001.json.openfda.csv
skipped this file:  2006q4_drug-event-0001-of-0001.json.openfda.csv
going through the list..  2007q1_drug-event-0001-of-0001.json.openfda.csv
skipped this file:  2007q2_drug-event-0001-of-0001.json.openfda.csv
going through the list..  2007q3_drug-event-0001-of-0002.json.openfda.csv
skipped this file:  2007q3_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2007q4_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2007q4_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2008q1_drug-event-0001-of-0002.json.openfda.csv
skipped this file:  2008q1_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2008q2_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2008q2_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2008q3_drug-event-0001-of-0001.json.openfda.csv
going through the list..  2008q4_drug-event-0001-of-0001.json.openfda.csv
going through the list..  2009q1_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2009q1_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2009q2_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2009q2_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2009q3_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2009q3_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2009q4_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2009q4_drug-event-0002-of-0002.json.openfda.csv
skipped this file:  2010q1_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2010q1_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2010q2_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2010q2_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2010q3_drug-event-0001-of-0002.json.openfda.csv
skipped this file:  2010q3_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2010q4_drug-event-0001-of-0002.json.openfda.csv
skipped this file:  2010q4_drug-event-0002-of-0002.json.openfda.csv
skipped this file:  2011q1_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2011q1_drug-event-0002-of-0002.json.openfda.csv
skipped this file:  2011q2_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2011q2_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2011q3_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2011q3_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2011q4_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2011q4_drug-event-0002-of-0002.json.openfda.csv
skipped this file:  2012q1_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2012q1_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2012q2_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2012q2_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2012q3_drug-event-0001-of-0002.json.openfda.csv
going through the list..  2012q3_drug-event-0002-of-0002.json.openfda.csv
going through the list..  2012q4_drug-event-0001-of-0003.json.openfda.csv
skipped this file:  2012q4_drug-event-0002-of-0003.json.openfda.csv
going through the list..  2012q4_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2013q1_drug-event-0001-of-0003.json.openfda.csv
going through the list..  2013q1_drug-event-0002-of-0003.json.openfda.csv
going through the list..  2013q1_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2013q2_drug-event-0001-of-0003.json.openfda.csv
going through the list..  2013q2_drug-event-0002-of-0003.json.openfda.csv
going through the list..  2013q2_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2013q3_drug-event-0001-of-0003.json.openfda.csv
skipped this file:  2013q3_drug-event-0002-of-0003.json.openfda.csv
going through the list..  2013q3_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2013q4_drug-event-0001-of-0003.json.openfda.csv
going through the list..  2013q4_drug-event-0002-of-0003.json.openfda.csv
going through the list..  2013q4_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2014q1_drug-event-0001-of-0003.json.openfda.csv

going through the list..  2014q1_drug-event-0002-of-0003.json.openfda.csv
skipped this file:  2014q1_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2014q2_drug-event-0001-of-0003.json.openfda.csv
going through the list..  2014q2_drug-event-0002-of-0003.json.openfda.csv
going through the list..  2014q2_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2014q3_drug-event-0001-of-0003.json.openfda.csv
going through the list..  2014q3_drug-event-0002-of-0003.json.openfda.csv
going through the list..  2014q3_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2014q4_drug-event-0001-of-0003.json.openfda.csv
going through the list..  2014q4_drug-event-0002-of-0003.json.openfda.csv
going through the list..  2014q4_drug-event-0003-of-0003.json.openfda.csv
going through the list..  2015q1_drug-event-0001-of-0004.json.openfda.csv
going through the list..  2015q1_drug-event-0002-of-0004.json.openfda.csv
going through the list..  2015q1_drug-event-0003-of-0004.json.openfda.csv
skipped this file:  2015q1_drug-event-0004-of-0004.json.openfda.csv
going through the list..  2015q2_drug-event-0001-of-0004.json.openfda.csv
going through the list..  2015q2_drug-event-0002-of-0004.json.openfda.csv
going through the list..  2015q2_drug-event-0003-of-0004.json.openfda.csv
going through the list..  2015q2_drug-event-0004-of-0004.json.openfda.csv
going through the list..  2015q3_drug-event-0001-of-0005.json.openfda.csv
going through the list..  2015q3_drug-event-0002-of-0005.json.openfda.csv
going through the list..  2015q3_drug-event-0003-of-0005.json.openfda.csv
skipped this file:  2015q3_drug-event-0004-of-0005.json.openfda.csv
going through the list..  2015q3_drug-event-0005-of-0005.json.openfda.csv
skipped this file:  all_other_drug-event-0001-of-0001.json.openfda.csv
complete: openfda.csv total number of files: 76
last_file all_other_drug-event-0001-of-0001.json.openfda.csv
380
if counter = 1 2004q1_drug-event-0001-of-0001.json.drug.csv
going through the list..  2004q2_drug-event-0001-of-0001.json.drug.csv
going through the list..  2004q3_drug-event-0001-of-0001.json.drug.csv
going through the list..  2004q4_drug-event-0001-of-0001.json.drug.csv
going through the list..  2005q1_drug-event-0001-of-0001.json.drug.csv
going through the list..  2005q2_drug-event-0001-of-0001.json.drug.csv
going through the list..  2005q3_drug-event-0001-of-0001.json.drug.csv
going through the list..  2005q4_drug-event-0001-of-0001.json.drug.csv
going through the list..  2006q1_drug-event-0001-of-0001.json.drug.csv
going through the list..  2006q2_drug-event-0001-of-0001.json.drug.csv
going through the list..  2006q3_drug-event-0001-of-0001.json.drug.csv
going through the list..  2006q4_drug-event-0001-of-0001.json.drug.csv
going through the list..  2007q1_drug-event-0001-of-0001.json.drug.csv
going through the list..  2007q2_drug-event-0001-of-0001.json.drug.csv
going through the list..  2007q3_drug-event-0001-of-0002.json.drug.csv
going through the list..  2007q3_drug-event-0002-of-0002.json.drug.csv
going through the list..  2007q4_drug-event-0001-of-0002.json.drug.csv
going through the list..  2007q4_drug-event-0002-of-0002.json.drug.csv
going through the list..  2008q1_drug-event-0001-of-0002.json.drug.csv
going through the list..  2008q1_drug-event-0002-of-0002.json.drug.csv
going through the list..  2008q2_drug-event-0001-of-0002.json.drug.csv
going through the list..  2008q2_drug-event-0002-of-0002.json.drug.csv
going through the list..  2008q3_drug-event-0001-of-0001.json.drug.csv
going through the list..  2008q4_drug-event-0001-of-0001.json.drug.csv
going through the list..  2009q1_drug-event-0001-of-0002.json.drug.csv
going through the list..  2009q1_drug-event-0002-of-0002.json.drug.csv
going through the list..  2009q2_drug-event-0001-of-0002.json.drug.csv
going through the list..  2009q2_drug-event-0002-of-0002.json.drug.csv
going through the list..  2009q3_drug-event-0001-of-0002.json.drug.csv
going through the list..  2009q3_drug-event-0002-of-0002.json.drug.csv
going through the list..  2009q4_drug-event-0001-of-0002.json.drug.csv
going through the list..  2009q4_drug-event-0002-of-0002.json.drug.csv
going through the list..  2010q1_drug-event-0001-of-0002.json.drug.csv
going through the list..  2010q1_drug-event-0002-of-0002.json.drug.csv
going through the list..  2010q2_drug-event-0001-of-0002.json.drug.csv
going through the list..  2010q2_drug-event-0002-of-0002.json.drug.csv
going through the list..  2010q3_drug-event-0001-of-0002.json.drug.csv
going through the list..  2010q3_drug-event-0002-of-0002.json.drug.csv
going through the list..  2010q4_drug-event-0001-of-0002.json.drug.csv
going through the list..  2010q4_drug-event-0002-of-0002.json.drug.csv
going through the list..  2011q1_drug-event-0001-of-0002.json.drug.csv
going through the list..  2011q1_drug-event-0002-of-0002.json.drug.csv
going through the list..  2011q2_drug-event-0001-of-0002.json.drug.csv
going through the list..  2011q2_drug-event-0002-of-0002.json.drug.csv
going through the list..  2011q3_drug-event-0001-of-0002.json.drug.csv
going through the list..  2011q3_drug-event-0002-of-0002.json.drug.csv
going through the list..  2011q4_drug-event-0001-of-0002.json.drug.csv
going through the list..  2011q4_drug-event-0002-of-0002.json.drug.csv
skipped this file:  2012q1_drug-event-0001-of-0002.json.drug.csv
skipped this file:  2012q1_drug-event-0002-of-0002.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (11,16) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2012q2_drug-event-0001-of-0002.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (11,16,19) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2012q2_drug-event-0002-of-0002.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (28) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2012q3_drug-event-0001-of-0002.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (16) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2012q3_drug-event-0002-of-0002.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (1,28) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2012q4_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2012q4_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2012q4_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2013q1_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2013q1_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2013q1_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2013q2_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2013q2_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2013q2_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2013q3_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2013q3_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2013q3_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2013q4_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2013q4_drug-event-0002-of-0003.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (3,16,28) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2013q4_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2014q1_drug-event-0001-of-0003.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (16,28) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2014q1_drug-event-0002-of-0003.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (3,28) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2014q1_drug-event-0003-of-0003.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (19,28) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2014q2_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2014q2_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2014q2_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2014q3_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2014q3_drug-event-0002-of-0003.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (16,19,28) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2014q3_drug-event-0003-of-0003.json.drug.csv
skipped this file:  2014q4_drug-event-0001-of-0003.json.drug.csv
skipped this file:  2014q4_drug-event-0002-of-0003.json.drug.csv
skipped this file:  2014q4_drug-event-0003-of-0003.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (6,7,16,19,28) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2015q1_drug-event-0001-of-0004.json.drug.csv
skipped this file:  2015q1_drug-event-0002-of-0004.json.drug.csv
skipped this file:  2015q1_drug-event-0003-of-0004.json.drug.csv
skipped this file:  2015q1_drug-event-0004-of-0004.json.drug.csv

/opt/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3331: DtypeWarning: Columns (6,7,28) have mixed types.Specify dtype option on import or set low_memory=False.
  exec(code_obj, self.user_global_ns, self.user_ns)

skipped this file:  2015q2_drug-event-0001-of-0004.json.drug.csv
skipped this file:  2015q2_drug-event-0002-of-0004.json.drug.csv
skipped this file:  2015q2_drug-event-0003-of-0004.json.drug.csv
skipped this file:  2015q2_drug-event-0004-of-0004.json.drug.csv
skipped this file:  2015q3_drug-event-0001-of-0005.json.drug.csv
skipped this file:  2015q3_drug-event-0002-of-0005.json.drug.csv
skipped this file:  2015q3_drug-event-0003-of-0005.json.drug.csv
skipped this file:  2015q3_drug-event-0004-of-0005.json.drug.csv
skipped this file:  2015q3_drug-event-0005-of-0005.json.drug.csv
going through the list..  all_other_drug-event-0001-of-0001.json.drug.csv"


In [None]:
df.primarysourcecoauntry.unique()

In [None]:
df.receiver_receiverorganization.unique()

In [None]:
df.occurcountry.unique()

In [None]:
df.sender_senderorganization.unique()

In [None]:
vals = df.duplicate.unique()

for v in vals:
    if type(v) == float:
        print(type(v))

In [None]:
vals = pd.unique(df['sender_senderorganization'])

In [None]:
type(vals[0])

In [None]:
print(df_cols[5])
print(df_cols[15])
print(df_cols[20])
    

In [None]:
df[df_cols[5]].dtype   

In [None]:
df[df_cols[5]].fillna('', inplace=True)

In [None]:
df.occurcountry.unique()

In [None]:
df.primarysourcecountry.unique()

In [None]:
df.receiver_receiverorganization.unique()

In [None]:
df['primarysourcecountry'] = df['primarysourcecountry'].astype("string")

In [None]:
df['primarysourcecountry'].dtype

In [None]:
type(df)

In [None]:
data_file_path = '../Data/csv/'
file_name = '2012q1_drug-event-0001-of-0002.json.patient.csv'

df = load_csv_file_as_df(data_file_path, file_name)

col_dtypes = []
#df.fillna('', inplace=True)

for col in list(df.columns):
    
    #print('before', col, df[col].dtype)
    if df[col].dtype == 'object':
        
        
        #print('starting on a object-dtype:..')
        
        vals = pd.unique(df[col])
        
        col_dtype = np.nan
        
        
        if len(vals) > 99:
            continue
        
        for v in vals:
            if col_dtype != np.nan:
                col_dtype = type(v)
            elif col_dytpe != type(v):
                raise TypeError("There are several data-types in this column:", col, vals, val_type)      
          
               
#         print('got col_dtypes', col_dtype) 
#         if len(val_type) > 2:
#             raise TypeError("There are several data-types in this column:", col, vals, val_type)          
         
        if col_dtype == str:
            print('got a string', col)
            df[col].fillna('', inplace=True)
            df[col] = df[col].astype(str)
        if col_dtype == 'int64':
            print('got an int', col)
            df[col].fillna(0, inplace=True)
            df[col] = df[col].astype(int)
        if col_dtype == 'float64':
            print('got a float', col)
            df[col].fillna(0.0, inplace=True)
            df[col] = df[col].astype(float)
            
        
        
#     for v in vals:
#         print(type(v))
# #         if len(vals) > 1:
#             print(vals)
#         else:
#             df[col] = df[col].astype(type(v[0]))

       
#     if str(df[col].dtype) == 'object':
        
#         df[col] = df[col].astype("string")
#         print('after', col, df[col].dtype)
