In [71]:
import requests
import json
import pandas as pd
import numpy as np

# Calling the API and Getting the Results

__Note:__ Unless otherwise specified, the API will return only one matching record for a search. You can specify the number of records to be returned by using the limit parameter. The maximum limit allowed is 1000 for any single API call. If no limit is set, the API will return one matching record.

In [2]:
# Using the url to get 1000 records available within the database
url = 'https://api.fda.gov/food/event.json?limit=1000'

r = requests.get(url)

if r.status_code == 200:
    print('API called sucessfully')
else:
    print('API not called')

# Taking a look at the raw data
#print(r.text)

# Loading the raw_data as json and Storing it as a variable
raw_data = json.loads(r.text)

# Getting only the results and store it as a variable
results = raw_data['results']
results

API called sucessfully


[{'report_number': '174513',
  'outcomes': ['Other Outcome'],
  'date_created': '20140311',
  'reactions': ['DIARRHOEA', 'NAUSEA'],
  'date_started': '20140304',
  'consumer': {'age': '31', 'age_unit': 'year(s)', 'gender': 'Female'},
  'products': [{'role': 'SUSPECT',
    'name_brand': 'ORGANIC WHOLE CASHEWS, UNSALTED',
    'industry_code': '23',
    'industry_name': 'Nuts/Edible Seed'},
   {'role': 'CONCOMITANT',
    'name_brand': 'VITAMIN B6',
    'industry_code': '54',
    'industry_name': 'Vit/Min/Prot/Unconv Diet(Human/Animal)'}]},
 {'report_number': '177200',
  'outcomes': ['Other Serious or Important Medical Event',
   'Visited Emergency Room'],
  'date_created': '20140617',
  'reactions': ['ABDOMINAL PAIN UPPER',
   'BLOOD BILIRUBIN INCREASED',
   'CHOLELITHIASIS',
   'DECREASED APPETITE',
   'FAECES DISCOLOURED',
   'LIVER FUNCTION TEST ABNORMAL',
   'NAUSEA',
   'URINE ODOUR ABNORMAL'],
  'date_started': '20140512',
  'consumer': {'age': '58', 'age_unit': 'year(s)', 'gender':

# Transforming the Raw data into a Data Frame

In [3]:
# Checking the 'reulsts' type
print(type(results))

# Taking a look at the len results
print(len(results))

<class 'list'>
1000


In [4]:
results

[{'report_number': '174513',
  'outcomes': ['Other Outcome'],
  'date_created': '20140311',
  'reactions': ['DIARRHOEA', 'NAUSEA'],
  'date_started': '20140304',
  'consumer': {'age': '31', 'age_unit': 'year(s)', 'gender': 'Female'},
  'products': [{'role': 'SUSPECT',
    'name_brand': 'ORGANIC WHOLE CASHEWS, UNSALTED',
    'industry_code': '23',
    'industry_name': 'Nuts/Edible Seed'},
   {'role': 'CONCOMITANT',
    'name_brand': 'VITAMIN B6',
    'industry_code': '54',
    'industry_name': 'Vit/Min/Prot/Unconv Diet(Human/Animal)'}]},
 {'report_number': '177200',
  'outcomes': ['Other Serious or Important Medical Event',
   'Visited Emergency Room'],
  'date_created': '20140617',
  'reactions': ['ABDOMINAL PAIN UPPER',
   'BLOOD BILIRUBIN INCREASED',
   'CHOLELITHIASIS',
   'DECREASED APPETITE',
   'FAECES DISCOLOURED',
   'LIVER FUNCTION TEST ABNORMAL',
   'NAUSEA',
   'URINE ODOUR ABNORMAL'],
  'date_started': '20140512',
  'consumer': {'age': '58', 'age_unit': 'year(s)', 'gender':

In [70]:
# Initialiaze an empty dictionary to store the data
df = []

# For loop to iterate each list element
for i in range(0,len(results)):

    # Try to store the data as variables
    try:
        report_number = results[i]['report_number']
        outcomes = results[i]['outcomes']
        date_created = results[i]['date_created']
        reactions = results[i]['reactions']
        consumer_age = results[i]['consumer']['age']
        consumer_age_unit = results[i]['consumer']['age_unit']
        consumer_gender = results[i]['consumer']['gender']
         
        """
Since there are reports linked to multiple products, we will create a list containing the product data
and add it to the final dataframe. Subsequently, a new column 'products' will be created which will contain a list of all associated products.
We will then use the 'explode' function in Python to split each element of the list into separate rows, representing individual products, and then
extract the product details for analysis.
        """


        # Initiliaze an empty list to store the products data
        products = []

        # getting products data
        for product in results[i]['products']:
            product_role = product['role']
            product_name_brand = product['name_brand']
            product_industry_code = product['industry_code']
            product_industry_name = product['industry_name']

        # append products data into the empty list
            products.append({
                'product_role': product_role,
                'product_name_brand': product_name_brand,
                'product_industry_code': product_industry_code,
                'product_industry_name': product_industry_name
            })

    # If there is no data store it as missing data (nan)
    except:
        report_number = np.nan
        outcomes = np.nan
        date_created = np.nan
        reactions = np.nan
        consumer_age = np.nan
        consumer_age_unit = np.nan
        consumer_gender = np.nan

    # Append the variables in the dictionary
    df.append({
        'report_number':report_number,
        'outcomes':outcomes,
        'date_created':date_created,
        'reactions': reactions,
        'consumer_age':consumer_age,
        'consumer_age_unit':consumer_age_unit,
        'consumer_gender':consumer_gender,
        'products': products})

# Transforming the dictionary into a pandas dataframe
df = pd.DataFrame(df)


# Using 'explode' function to create a row for each element in the 'products' column list
df = df.explode('products')

# Getting the products 'role', 'brand_name' etc... 
df['product_role'] = df['products'].apply(lambda x: x['product_role'])
df['product_name_brand'] = df['products'].apply(lambda x: x['product_name_brand'])
df['product_industry_code'] = df['products'].apply(lambda x: x['product_industry_code'])
df['product_industry_name'] = df['products'].apply(lambda x: x['product_industry_name'])

# Droping the 'products' column as we don't need it anymore
df = df.drop(columns = 'products')

# The 'outcomes' and 'reactions' are also columns  where those values are inside a list
# That said let's use the 'explode function again to create a row for each element inside the list
df = df.explode('outcomes')
df = df.explode('reactions')

df

Unnamed: 0,report_number,outcomes,date_created,reactions,consumer_age,consumer_age_unit,consumer_gender,product_role,product_name_brand,product_industry_code,product_industry_name
0,174513,Other Outcome,20140311,DIARRHOEA,31,year(s),Female,SUSPECT,"ORGANIC WHOLE CASHEWS, UNSALTED",23,Nuts/Edible Seed
0,174513,Other Outcome,20140311,NAUSEA,31,year(s),Female,SUSPECT,"ORGANIC WHOLE CASHEWS, UNSALTED",23,Nuts/Edible Seed
0,174513,Other Outcome,20140311,DIARRHOEA,31,year(s),Female,CONCOMITANT,VITAMIN B6,54,Vit/Min/Prot/Unconv Diet(Human/Animal)
0,174513,Other Outcome,20140311,NAUSEA,31,year(s),Female,CONCOMITANT,VITAMIN B6,54,Vit/Min/Prot/Unconv Diet(Human/Animal)
1,177200,Other Serious or Important Medical Event,20140617,ABDOMINAL PAIN UPPER,58,year(s),Female,SUSPECT,PURITAN'S PRIDE MILK THISTLE EXTRACT 1000MG RA...,54,Vit/Min/Prot/Unconv Diet(Human/Animal)
...,...,...,...,...,...,...,...,...,...,...,...
998,197322,Other Serious or Important Medical Event,20160613,FLATULENCE,63,year(s),Female,CONCOMITANT,B-COMPLEX,54,Vit/Min/Prot/Unconv Diet(Human/Animal)
998,197322,Other Serious or Important Medical Event,20160613,IRRITABLE BOWEL SYNDROME,63,year(s),Female,CONCOMITANT,B-COMPLEX,54,Vit/Min/Prot/Unconv Diet(Human/Animal)
998,197322,Other Serious or Important Medical Event,20160613,MIGRAINE,63,year(s),Female,CONCOMITANT,B-COMPLEX,54,Vit/Min/Prot/Unconv Diet(Human/Animal)
998,197322,Other Serious or Important Medical Event,20160613,SNEEZING,63,year(s),Female,CONCOMITANT,B-COMPLEX,54,Vit/Min/Prot/Unconv Diet(Human/Animal)
