# Pivot Tables for Devices and Defects

List the devices from 2020 and 2021 along with their defects and the number of times each defect was reported.

- Use the `GENERIC_NAME` column for the device.
- Use the `DEFECT_PROBLEM_TEXT` column for the defect text.

- Use the 'complete' data from the working directories for each year
    - `./2020_reprocessed/2020_data_complete.csv`
    - `./2021_reprocessed/2020_data_complete.csv`

## Create table for 2020 Data
- Read the data into a pandas dataframe
- Create a dataframe for the devices using the `GENERIC_NAME` column of the dataframe, removing duplicates as needed
- Create a dataframe for the defects using the `DEFECT_PROBLEM_TEXT` column of the dataframe, removing duplicates as needed

In [None]:
import pandas as pd

data_file = './2020_reprocessed/2020_data_complete.csv'

# Read the data into a pandas dataframe
data = pd.read_csv(data_file,           # The data file being read, from the variable assignment above
                   on_bad_lines='warn', # This tells Pandas to only warn on bad lines vs causing an error
                   dtype = 'str')       # This tells Pandas to treat all numbers as words

# Remove unwanted columns
unwanted_columns = [
    'MDR_REPORT_KEY',
    'MDR_TEXT_KEY',
    'TEXT_TYPE_CODE',
    'PATIENT_SEQUENCE_NUMBER',
    'DATE_REPORT',
    'FOI_TEXT',
    'DEVICE_SEQUENCE_NO',
    'BRAND_NAME',
    'MANUFACTURER_D_NAME',
    'MODEL_NUMBER',
    'DEVICE_AVAILABILITY',
    'DEVICE_REPORT_PRODUCT_CODE',
    'REPORT_NUMBER',
    'REPORT_SOURCE_CODE',
    'NUMBER_DEVICES_IN_EVENT',
    'DATE_RECEIVED',
    'INITIAL_REPORT_TO_FDA',
    'MANUFACTURER_G1_NAME',
    'REMEDIAL_ACTION',
    'EVENT_TYPE',
    'MANUFACTURER_NAME',
    'TYPE_OF_REPORT',
    'SUMMARY_REPORT',
    'NOE_SUMMARIZED',
    'UDI-DI',
    'UDI-PUBLIC',
]

data.drop(unwanted_columns, axis=1, inplace=True)


In [None]:
print(f"data frame creation complete: {data.shape}")

In [None]:
# Preview the data
data.head()

In [None]:
# Create a dataframe for the devices using the `GENERIC_NAME` column of the dataframe, removing duplicates as needed
generic_names = pd.DataFrame(data['GENERIC_NAME'].value_counts())

# Create a dataframe for the defects using the `DEFECT_PROBLEM_TEXT` column of the dataframe, removing duplicates as needed
defects = pd.DataFrame(data['DEVICE_PROBLEM_TEXT'].value_counts())


In [None]:
# Preview the data
generic_names.head(1000)

In [None]:
generic_names.shape

In [None]:
generic_names.sum()

In [None]:
# Preview the data
defects.head()

# youtube video 

-https://www.youtube.com/watch?v=O60CFmcWyEY Nested for loops

In [None]:
## Example of using nested for loops to look up data in a dataframe

In [None]:
# import the YAML library for reporting
import yaml

# initialize the report
report = {}

# initialize the data; first column is th device, second column is the text
sample_data = {
    0: ['pump','fire'],
    1: ['pump', 'blew up'],
    2: ['knee', 'popped'],
    3: ['knee', 'popped'],
    4: ['foot', 'toe'],
    5: ['leg', 'green']
}

# create lists of the unique values in the data
generic_device = ['pump', 'knee', 'foot', 'leg']
text = ['fire', 'blew up', 'popped', 'toe', 'green']

# the next few lines initialize the values in the report to 0
for i in generic_device:
    report[i] = {}
    
# for label, content in generic_names.iterrows():
#    report[label] = {}

for i in generic_device:
    for j in text:
        report[i][j] = 0

# for label, content in generic_names.iterrows():
#    for i, j in defects.iterrows():
#       report[label][i] = 0

# The processing starts here:
# for each device...
for i in generic_device:
    
    # and each defect....
    for j in text:
        
        # look at every row of data....
        for row in sample_data:
            
            # and if both the device and the text are present in this row, update the report by adding 1
            if i in sample_data[row] and j in sample_data[row]:
                report[i][j] += 1

# print the report
print(yaml.dump(report, default_flow_style=False))

In [None]:
import yaml

# initalize the report 
report = {}

for label, content in generic_names.iterrows():
    report[label] = {}
    
report['GENERIC_NAME'] = {}  

for label, content in generic_names.iterrows():
    for i,j in defects.iterrows():
        report[label][i] = 0    
        
for label, content in generic_names.iterrows():
    for i,j in defects.iterrows():  
        pass