# Pivot Tables for Devices and Defects

List the devices from 2020 and 2021 along with their defects and the number of times each defect was reported.

- Use the `GENERIC_NAME` column for the device.
- Use the `DEFECT_PROBLEM_TEXT` column for the defect text.

- Use the 'complete' data from the working directories for each year
    - `./2020_reprocessed/2020_data_complete.csv`
    - `./2021_reprocessed/2020_data_complete.csv`

## Create table for 2020 Data
- Read the data into a pandas dataframe
- Create a dataframe for the devices using the `GENERIC_NAME` column of the dataframe, removing duplicates as needed
- Create a dataframe for the defects using the `DEFECT_PROBLEM_TEXT` column of the dataframe, removing duplicates as needed

In [1]:
import pandas as pd

data_file_2020 = './2020_reprocessed/2020_data_complete.csv'

# Read the data into a pandas dataframe
data_2020 = pd.read_csv(data_file_2020,           # The data file being read, from the variable assignment above
                   on_bad_lines='warn', # This tells Pandas to only warn on bad lines vs causing an error
                   dtype = 'str')       # This tells Pandas to treat all numbers as words

# Remove unwanted columns
unwanted_columns = [
    'MDR_REPORT_KEY',
    'MDR_TEXT_KEY',
    'TEXT_TYPE_CODE',
    'PATIENT_SEQUENCE_NUMBER',
    'DATE_REPORT',
    'FOI_TEXT',
    'DEVICE_SEQUENCE_NO',
    'BRAND_NAME',
    'MANUFACTURER_D_NAME',
    'MODEL_NUMBER',
    'DEVICE_AVAILABILITY',
    'DEVICE_REPORT_PRODUCT_CODE',
    'REPORT_NUMBER',
    'REPORT_SOURCE_CODE',
    'NUMBER_DEVICES_IN_EVENT',
    'DATE_RECEIVED',
    'INITIAL_REPORT_TO_FDA',
    'MANUFACTURER_G1_NAME',
    'REMEDIAL_ACTION',
    'EVENT_TYPE',
    'MANUFACTURER_NAME',
    'TYPE_OF_REPORT',
    'SUMMARY_REPORT',
    'NOE_SUMMARIZED',
    'UDI-DI',
    'UDI-PUBLIC',
]

data_2020.drop(unwanted_columns, axis=1, inplace=True)


In [2]:
print(f"data_2020 creation complete: {data_2020.shape}")

data_2020 creation complete: (3856740, 3)


In [3]:
# Preview the data
data_2020.head()

Unnamed: 0,DEVICE_PROBLEM_CODE,DEVICE_PROBLEM_TEXT,GENERIC_NAME
0,2993,Adverse Event Without Identified Device or Use...,DEFIBRILLATION LEAD
1,2993,Adverse Event Without Identified Device or Use...,DEFIBRILLATION LEAD
2,1332,Failure to Interrogate,IMPLANTABLE CARDIOVERTER DEFIBRILLATOR
3,1332,Failure to Interrogate,IMPLANTABLE CARDIOVERTER DEFIBRILLATOR
4,1332,Failure to Interrogate,IMPLANTABLE CARDIOVERTER DEFIBRILLATOR


In [4]:
# Create a dataframe for the devices using the `GENERIC_NAME` column of the dataframe, removing duplicates as needed
generic_names_2020 = pd.DataFrame(data_2020['GENERIC_NAME'].value_counts())

# Create a dataframe for the defects using the `DEFECT_PROBLEM_TEXT` column of the dataframe, removing duplicates as needed
defects_2020 = pd.DataFrame(data_2020['DEVICE_PROBLEM_TEXT'].value_counts())


In [5]:
# Preview the data
generic_names_2020.head()

Unnamed: 0,GENERIC_NAME
CONTINUOUS GLUCOSE MONITOR,496771
ENDOSSEOUS DENTAL IMPLANT,315749
"PUMP, INFUSION",312885
"ARTIFICIAL PANCREAS DEVICE SYSTEM, SINGLE HORMONAL CONTROL",269489
"PUMP, INFUSION, INSULIN, TO BE USED WITH INVASIVE GLUCOSE SENSOR",205467


In [6]:
generic_names_2020.shape

(15203, 1)

In [7]:
generic_names_2020.sum()

GENERIC_NAME    3843862
dtype: int64

In [8]:
# Preview the data
defects_2020.head()

Unnamed: 0,DEVICE_PROBLEM_TEXT
Adverse Event Without Identified Device or Use Problem,331253
Failure to Osseointegrate,231019
Patient Device Interaction Problem,192077
Wireless Communication Problem,168761
No Device Output,163702


In [9]:
defects_2020.shape

(474, 1)

## Create table for 2021 Data
- Read the data into a pandas dataframe
- Create a dataframe for the devices using the `GENERIC_NAME` column of the dataframe, removing duplicates as needed
- Create a dataframe for the defects using the `DEFECT_PROBLEM_TEXT` column of the dataframe, removing duplicates as needed

In [10]:
import pandas as pd

data_file_2021 = './2021_reprocessed/2021_data_complete.csv'

# Read the data into a pandas dataframe
data_2021 = pd.read_csv(data_file_2021,           # The data file being read, from the variable assignment above
                   on_bad_lines='warn', # This tells Pandas to only warn on bad lines vs causing an error
                   dtype = 'str')       # This tells Pandas to treat all numbers as words

# Remove unwanted columns
unwanted_columns = [
    'MDR_REPORT_KEY',
    'MDR_TEXT_KEY',
    'TEXT_TYPE_CODE',
    'PATIENT_SEQUENCE_NUMBER',
    'DATE_REPORT',
    'FOI_TEXT',
    'DEVICE_SEQUENCE_NO',
    'BRAND_NAME',
    'MANUFACTURER_D_NAME',
    'MODEL_NUMBER',
    'DEVICE_AVAILABILITY',
    'DEVICE_REPORT_PRODUCT_CODE',
    'REPORT_NUMBER',
    'REPORT_SOURCE_CODE',
    'NUMBER_DEVICES_IN_EVENT',
    'DATE_RECEIVED',
    'INITIAL_REPORT_TO_FDA',
    'MANUFACTURER_G1_NAME',
    'REMEDIAL_ACTION',
    'EVENT_TYPE',
    'MANUFACTURER_NAME',
    'TYPE_OF_REPORT',
    'SUMMARY_REPORT',
    'NOE_SUMMARIZED',
    'UDI-DI',
    'UDI-PUBLIC',
]

data_2021.drop(unwanted_columns, axis=1, inplace=True)


In [11]:
print(f"data_2021 creation complete: {data_2021.shape}")

data_2021 creation complete: (4454884, 3)


In [12]:
# Preview the data
data_2021.head()

Unnamed: 0,DEVICE_PROBLEM_CODE,DEVICE_PROBLEM_TEXT,GENERIC_NAME
0,1535,"Incorrect, Inadequate or Imprecise Resultor Re...",CORONAVIRUS ANTIGEN DETECTION SYSTEM
1,1535,"Incorrect, Inadequate or Imprecise Resultor Re...",CORONAVIRUS ANTIGEN DETECTION SYSTEM
2,1069,Break,"PUMP, INFUSION"
3,1135,Crack,"PUMP, INFUSION"
4,1153,Degraded,"PUMP, INFUSION"


In [13]:
# Create a dataframe for the devices using the `GENERIC_NAME` column of the dataframe, removing duplicates as needed
generic_names_2021 = pd.DataFrame(data_2021['GENERIC_NAME'].value_counts())

# Create a dataframe for the defects using the `DEFECT_PROBLEM_TEXT` column of the dataframe, removing duplicates as needed
defects_2021 = pd.DataFrame(data_2021['DEVICE_PROBLEM_TEXT'].value_counts())


In [14]:
# Preview the data
generic_names_2021.head(1000)

Unnamed: 0,GENERIC_NAME
CONTINUOUS GLUCOSE MONITOR,534114
"PUMP, INFUSION",526177
ENDOSSEOUS DENTAL IMPLANT,461672
"ARTIFICIAL PANCREAS DEVICE SYSTEM, SINGLE HORMONAL CONTROL",200411
FLASH GLUCOSE MONITORING SYSTEM,188914
...,...
VIDEO DUODENOSCOPE,150
INSULIN DELIVERY DEVICE,149
TEMPORARY NONROLLER TYPE LEFT HEART SUPPORT BLOOD PUMP,148
PRIMACONNEX TC TAPERED RD 4.1X10,148


In [15]:
generic_names_2021.shape

(14161, 1)

In [16]:
generic_names_2021.sum()

GENERIC_NAME    4439679
dtype: int64

In [17]:
# Preview the data
defects_2021.head()

Unnamed: 0,DEVICE_PROBLEM_TEXT
Failure to Osseointegrate,336298
Adverse Event Without Identified Device or Use Problem,314336
Break,252603
Wireless Communication Problem,228677
"Incorrect, Inadequate or Imprecise Resultor Readings",195978


In [18]:
defects_2021.shape

(476, 1)

# youtube video 

-https://www.youtube.com/watch?v=O60CFmcWyEY Nested for loops

## Example of using nested for loops to look up data in a dataframe

In [19]:
# import the YAML library for reporting
import yaml

# initialize the report
report = {}

# initialize the data; first column is th device, second column is the text
sample_data = {
    0: ['pump','fire'],
    1: ['pump', 'blew up'],
    2: ['knee', 'popped'],
    3: ['knee', 'popped'],
    4: ['foot', 'toe'],
    5: ['leg', 'green']
}

# create lists of the unique values in the data
generic_device = ['pump', 'knee', 'foot', 'leg']
text = ['fire', 'blew up', 'popped', 'toe', 'green']

# the next few lines initialize the values in the report to 0
for i in generic_device:
    report[i] = {}
    
# for label, content in generic_names.iterrows():
#    report[label] = {}

for i in generic_device:
    for j in text:
        report[i][j] = 0

# for label, content in generic_names.iterrows():
#    for i, j in defects.iterrows():
#       report[label][i] = 0

# The processing starts here:
# for each device...
for i in generic_device:
    
    # and each defect....
    for j in text:
        
        # look at every row of data....
        for row in sample_data:
            
            # and if both the device and the text are present in this row, update the report by adding 1
            if i in sample_data[row] and j in sample_data[row]:
                report[i][j] += 1

# print the report
print(yaml.dump(report, default_flow_style=False))

foot:
  blew up: 0
  fire: 0
  green: 0
  popped: 0
  toe: 1
knee:
  blew up: 0
  fire: 0
  green: 0
  popped: 2
  toe: 0
leg:
  blew up: 0
  fire: 0
  green: 1
  popped: 0
  toe: 0
pump:
  blew up: 1
  fire: 1
  green: 0
  popped: 0
  toe: 0



In [22]:
import yaml

# initalize the report 
report = {}

for label, content in generic_names_2020.head().iterrows():
    report[label] = {}
    
report['GENERIC_NAME'] = {}  

for label, content in generic_names_2020.head().iterrows():
    for i,j in defects_2020.iterrows():
        report[label][i] = 0    

print(report)
#for label, content in generic_names.iterrows():
#    for i,j in defects.iterrows():  
#        pass

from datetime import datetime
import pytz

# datetime object containing current date and time for the US/Pacific time zone
now = datetime.now(pytz.timezone('US/Pacific'))

# Format date and time like 2022-10-31 5:49 PM
date_time_string = now.strftime("%Y-%m-%d %I:%M %p") 

print(f"{date_time_string} Notebook has completed.")

{'CONTINUOUS GLUCOSE MONITOR': {'Adverse Event Without Identified Device or Use Problem': 0, 'Failure to Osseointegrate': 0, 'Patient Device Interaction Problem': 0, 'Wireless Communication Problem': 0, 'No Device Output': 0, 'Break': 0, 'Loss of Osseointegration': 0, 'Insufficient Information': 0, 'Crack': 0, 'Power Problem': 0, 'Mechanical Problem': 0, 'Appropriate Term/Code Not Available': 0, 'Device Displays Incorrect Message': 0, 'Imprecision': 0, 'Connection Problem': 0, 'No Apparent Adverse Event': 0, 'Fluid Leak': 0, 'Obstruction of Flow': 0, 'Material Rupture': 0, 'Battery Problem': 0, 'Pumping Stopped': 0, 'Incorrect, Inadequate or Imprecise Resultor Readings': 0, 'No Display/Image': 0, 'Fracture': 0, 'Device Difficult to Program or Calibrate': 0, 'Display or Visual Feedback Problem': 0, 'Over-Sensing': 0, 'Detachment of Device or Device Component': 0, 'Defective Device': 0, 'Device Sensing Problem': 0, 'Leak/Splash': 0, 'Device Dislodged or Dislocated': 0, 'Physical Resistan

In [21]:
from datetime import datetime
import pytz

# datetime object containing current date and time for the US/Pacific time zone
now = datetime.now(pytz.timezone('US/Pacific'))

# Format date and time like 2022-10-31 5:49 PM
date_time_string = now.strftime("%Y-%m-%d %I:%M %p") 

print(f"{date_time_string} Notebook has completed.")

2022-10-23 11:16 PM Notebook has completed.
