## Import required libraries

In [2]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings
filterwarnings('ignore')

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 350)

## Cleaning, Feature Engineering and Merging
In this notebook, we will clean the data for 4 tables, as follows:

**Uring Drug Screen** - Scheduled urine drug screen taken once per week for 24 weeks for 8 different drug classes <br><br>
**Self Reported Use** - Scheduled survey collection every 4 weeks, includes self reported drug use from the previous 4 weeks <br><br>
**Medication Doses** - Patients received methadone or buprenorphine, dosed daily for 24 weeks.<br><br>
**Demographics** - Includes pateints sex, ethnicity and race<br><br>
**RSA** - Research session attendance - records attendance for each patient for 24 weeks<br><br>

After each table is cleaned, we will create features to improve the data quality and then merge all tables to create a high quality dataset to feed into a machine learning model to make predictions.

## Helper Functions
Below are some reusable functions listed as follows:

**Clean Dataframes** - Annotates columns, drops erroneous columns and transforms appropriate data types where required<br><br>
**Backfill Nulls** - There is missing data that results from human error.  We will fill these in with the previous rows datapoint for imputation strategy for certain features<br><br>
**Aggregate columns** - For some clinical data, there are multiple rows per patient.  We will reduce rows with aggregation to improve data quality.<br><br>
**Flatten Dataframes** - This function will reshape and structure the data so that each patient is reflected as a single row, with all clinical data in properly encoded columns<br><br>
**Merge Dataframes** - Once all dataframes are transformed, they will be merged and processed for machine learning


## Clean Dataframes
Will apply appropriate cleaning tasks

In [3]:
def clean_df(df, keep_cols, rename_cols):
    """
    Clean the given DataFrame by dropping unnecessary columns, renaming columns, and reordering columns.

    Parameters:
    df (pandas.DataFrame): The DataFrame to be cleaned.
    keep_cols (list): A list of column names to keep in the DataFrame.
    rename_cols (dict): A dictionary mapping old column names to new column names.

    Returns:
    pandas.DataFrame: The cleaned DataFrame.
    """
    # drop columns that are not on keep_cols list
    df = df.drop(columns=[col for col in df.columns if col not in keep_cols])
    
    # cleans the VISIT column, removing text and converting to integers for ordinal value
    if 'VISIT' in df.columns:
        # remove 'VISIT' from VISIT column
        df['VISIT'] = df['VISIT'].str.replace('VISIT','')
        
        # if VISIT column contains 'BASELINE' replace with 0
        df['VISIT'] = df['VISIT'].str.replace('BASELINE','0')

        # remove WK in visit column then convert to int
        df['VISIT'] = df['VISIT'].str.replace('WK','')
        
        # change VISIT column to int
        df['VISIT'] = df['VISIT'].astype(int)
    else:
        pass

    
    # rename columns
    df = df.rename(columns=rename_cols)

    # bring the last column (patient_id) to first position as it will serve as the primary key column for all dfs
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df = df[cols]


    return df


## Backfill Nulls
In columns for medication and total dose, there are missing values from human error in data entry

In [4]:
def backfill_nulls(df, cols):
    """
    Backfill null values in the given columns with the last non-null value.

    Parameters:
    df (pandas.DataFrame): The DataFrame to be cleaned.
    cols (list): A list of column names to backfill.

    Returns:
    pandas.DataFrame: The cleaned DataFrame.
    """
    for col in cols:
        df[col] = df[col].fillna(method='bfill')
    return df

## Aggregate columns
We will aggregate data stored in multiple rows to improve data quality.

In [5]:
def agg_df(df, index, agg):
    """
    Aggregate the given DataFrame by grouping by the given index and aggregating the given columns.

    Parameters:
    df (pandas.DataFrame): The DataFrame to be cleaned.
    index (list): A list of column names to group by.
    agg (dict): A dictionary mapping column names to aggregation functions.

    Returns:
    pandas.DataFrame: The aggregated DataFrame.
    """
    df = df.groupby(index).agg(agg)
    df = df.reset_index()
    return df   

## Flatten Dataframes
- This function will create a new DF for each week of treatment
- In each dataframe, the columns will be encoded to reflect clinical data for that week of treatment
- Each DF will then be merged to final dataset


In [6]:
def flatten_dataframe(df,start,stop,step):
    """
    Flattens a dataframe by creating separate dataframes for each week of clinical data,
    renaming columns with the corresponding week number, and merging all dataframes into one,
    reshaping dataframe to 1 row per patient, with all clinical data properly encoded into columns.

    Args:
        df (pandas.DataFrame): The input dataframe.
        start (int): The starting week number.
        stop (int): The stopping week number.
        step (int): The step size between weeks.

    Returns:
        pandas.DataFrame: The flattened dataframe.

    """
    # create a new dataframe for every week of clinical data
    # the name of the dataframe will be VISIT+number of visit
    for i in range(start,stop+1,step):
        globals()['VISIT%s' % i] = df[df['VISIT']==i]

    # for each dataframe beteween start and stop
    # add the value in VISIT to the end of the name of each column +"_"+"visit"
    for i in range(start,stop+1,step):
        for col in globals()['VISIT%s' % i].columns:
            if col != 'patdeid':
                globals()['VISIT%s' % i][col+'_'+str(i)] = globals()['VISIT%s' % i][col]
                # after columns are annoted, drop original columns            
                globals()['VISIT%s' % i] = globals()['VISIT%s' % i].drop(columns=col)
            else:
                pass

    # merge all dfs using left merge on patdeid
    for i in range(start,stop+1,step):
        if i == start:
            df = pd.merge(globals()['VISIT%s' % i], globals()['VISIT%s' % (i+step)], on='patdeid', how='left')
        elif i < stop:
            df = pd.merge(df, globals()['VISIT%s' % (i+step)], on='patdeid', how='inner')
        else:
            pass

            # drop erroneous visit columns, as the visit is encoded in each column
            df = df.drop(columns=[col for col in df.columns if col.startswith('VISIT')], axis=1)
            

            return df

## Merge Dataframes
- This function will merge a group of dataframes with common keys
- The dataframes must be stored on a list and will be treated as one iterable

In [7]:
# create function to merge dataframes using functools reduce
def merge_dfs(dfs):
    """
    Merge the given list of DataFrames into one DataFrame.

    Parameters:
    dfs (list): A list of DataFrames to be merged.

    Returns:
    pandas.DataFrame: The merged DataFrame.
    """
    from functools import reduce
    df = reduce(lambda left,right: pd.merge(left,right,on='patdeid'), dfs)
    return df

## Import the raw CSV files


In [8]:
# medication doses
med = pd.read_csv('../unlabeled_data/T_FRDOS.csv')

# urine drug screens
uds = pd.read_csv('../unlabeled_data/T_FRUDSAB.csv')

# self reported drug use
sru = pd.read_csv('../unlabeled_data/T_FRTFB.csv')

# patient demographics
demo = pd.read_csv('../unlabeled_data/T_FRDEM.csv')

# research session attendance (RSA)
rsa = pd.read_csv('../unlabeled_data/T_FRRSA.csv')

## Transform the medication doses data

In [9]:
# define parameters
med_cols = ['patdeid','VISIT','DOS002','DOS005']
rename_cols = {'DOS002':'medication','DOS005':'total_dose'}

# execute clean function
med = clean_df(med, med_cols, rename_cols)

med

Unnamed: 0,patdeid,VISIT,medication,total_dose
0,1,0,2.0,8.0
1,1,1,2.0,16.0
2,1,1,2.0,24.0
3,1,1,2.0,24.0
4,1,1,2.0,32.0
...,...,...,...,...
160903,1931,24,2.0,8.0
160904,1931,24,2.0,8.0
160905,1931,24,2.0,8.0
160906,1931,24,2.0,8.0


## Backfill Nulls
There is missing data that comes from human error, for the medication doses and admin location.  We will backfill those nulls with the previous rows data to maintain accuracy.

In [10]:
# define paramteres
back_fill_cols = ['total_dose','medication']

# execute backfill function
med = backfill_nulls(med, back_fill_cols)

# observe backfill columns in context with patient and visit data for first 25 rows
med.loc[:,['patdeid','VISIT','total_dose','medication']][:25] 

Unnamed: 0,patdeid,VISIT,total_dose,medication
0,1,0,8.0,2.0
1,1,1,16.0,2.0
2,1,1,24.0,2.0
3,1,1,24.0,2.0
4,1,1,32.0,2.0
5,1,1,32.0,2.0
6,1,1,32.0,2.0
7,1,2,32.0,2.0
8,1,2,32.0,2.0
9,1,2,32.0,2.0


## Aggregate columns
Data for medication doses was recorded daily, adding multiple rows per patient.  We will consolidate the data and aggregate total dose on a weekly basis, to improve data quality.

In [11]:
# define parameters
index_columns = ['patdeid','VISIT']

# dictionary with columsn to aggregate and aggregation functions
agg_dict = {'total_dose':'sum','medication':'first'}

# execute aggregation
med = agg_df(med, index_columns, agg_dict)

print(f'New shape for aggregated dataframe, reduced from 106,000 rows to {med.shape[0]}')
med[:5]

New shape for aggregated dataframe, reduced from 106,000 rows to 23528


Unnamed: 0,patdeid,VISIT,total_dose,medication
0,1,0,8.0,2.0
1,1,1,160.0,2.0
2,1,2,320.0,2.0
3,1,3,192.0,2.0
4,1,4,384.0,2.0


### Create df for medication type
We will create a separate dataframe for medication type; we will merge this later, which will help with applying filters for analysis

In [12]:
# create function to create df for medication column 
medication = med[['patdeid','medication']].drop_duplicates(subset='patdeid').reset_index(drop=True)

print(f'Medication dataframe shape{medication.shape}')
medication[:5]

Medication dataframe shape(1315, 2)


Unnamed: 0,patdeid,medication
0,1,2.0
1,2,2.0
2,3,1.0
3,4,2.0
4,6,2.0


## Feature Engineering
To improve data quality, we will segment the weekly medication doses into separate columns by medication (methadone or buprenorphine)


In [13]:
def med_features(df):
    """
    Process the medication dataframe by creating separate columns for methadone dose and buprenorphine dose,
    filling null values with 0, and dropping unnecessary columns.

    Parameters:
    df (pandas.DataFrame): The medication dataframe.

    Returns:
    pandas.DataFrame: The processed dataframe.
    """
    # create new columns for methadone and buprenorphine dose
    df['meds_methadone'] = df.loc[df.medication==1.0]['total_dose']
    df['meds_buprenorphine'] = df.loc[df.medication==2.0]['total_dose']

    # fill null values with 0
    df.meds_methadone.fillna(0, inplace=True)
    df.meds_buprenorphine.fillna(0, inplace=True)

    # drop original columns to remove redundancy
    df = df.drop(columns=['total_dose','medication'])

    return df


In [14]:
med = med_features(med)

med[:5]

Unnamed: 0,patdeid,VISIT,meds_methadone,meds_buprenorphine
0,1,0,0.0,8.0
1,1,1,0.0,160.0
2,1,2,0.0,320.0
3,1,3,0.0,192.0
4,1,4,0.0,384.0


## Flatten dataframe for machine learning
- This function will create a dataframe for each week of treatment
- The columns in each dataframe will be updated to reflect the week of treatment for interpretability
- Once the columns are properly encoded, the dataframes will be merged backed together

In [15]:
# parameters for merge function - 
start = 0 # start merge at week 0 reflects the baseline assessment
stop = 24 # stop merge at week 24, final week of treatment
step = 1 # looks for dataframes to merge in increments of 1 week

# execute flatten function
med = flatten_dataframe(med,start,stop,step)

# observe shape and first 5 rows of flattened dataframe
print(f'Shape of flattened dataframe: {med.shape}')
med[:5]

Shape of flattened dataframe: (746, 51)


Unnamed: 0,patdeid,meds_methadone_0,meds_buprenorphine_0,meds_methadone_1,meds_buprenorphine_1,meds_methadone_2,meds_buprenorphine_2,meds_methadone_3,meds_buprenorphine_3,meds_methadone_4,meds_buprenorphine_4,meds_methadone_5,meds_buprenorphine_5,meds_methadone_6,meds_buprenorphine_6,meds_methadone_7,meds_buprenorphine_7,meds_methadone_8,meds_buprenorphine_8,meds_methadone_9,meds_buprenorphine_9,meds_methadone_10,meds_buprenorphine_10,meds_methadone_11,meds_buprenorphine_11,meds_methadone_12,meds_buprenorphine_12,meds_methadone_13,meds_buprenorphine_13,meds_methadone_14,meds_buprenorphine_14,meds_methadone_15,meds_buprenorphine_15,meds_methadone_16,meds_buprenorphine_16,meds_methadone_17,meds_buprenorphine_17,meds_methadone_18,meds_buprenorphine_18,meds_methadone_19,meds_buprenorphine_19,meds_methadone_20,meds_buprenorphine_20,meds_methadone_21,meds_buprenorphine_21,meds_methadone_22,meds_buprenorphine_22,meds_methadone_23,meds_buprenorphine_23,meds_methadone_24,meds_buprenorphine_24
0,1,0.0,8.0,0.0,160.0,0.0,320.0,0.0,192.0,0.0,384.0,0.0,96.0,0.0,32.0,0.0,352.0,0.0,128.0,0.0,256.0,0.0,192.0,0.0,224.0,0.0,384.0,0.0,32.0,0.0,224.0,0.0,224.0,0.0,240.0,0.0,152.0,0.0,182.0,0.0,240.0,0.0,210.0,0.0,180.0,0.0,246.0,0.0,128.0,0.0,166.0
1,2,0.0,8.0,0.0,48.0,0.0,48.0,0.0,60.0,0.0,48.0,0.0,84.0,0.0,84.0,0.0,72.0,0.0,36.0,0.0,72.0,0.0,76.0,0.0,96.0,0.0,88.0,0.0,40.0,0.0,64.0,0.0,144.0,0.0,64.0,0.0,48.0,0.0,40.0,0.0,40.0,0.0,40.0,0.0,72.0,0.0,60.0,0.0,72.0,0.0,68.0
2,3,30.0,0.0,170.0,0.0,310.0,0.0,420.0,0.0,360.0,0.0,480.0,0.0,310.0,0.0,455.0,0.0,455.0,0.0,345.0,0.0,600.0,0.0,380.0,0.0,560.0,0.0,720.0,0.0,600.0,0.0,360.0,0.0,640.0,0.0,600.0,0.0,600.0,0.0,800.0,0.0,600.0,0.0,670.0,0.0,630.0,0.0,510.0,0.0,540.0,0.0
3,4,0.0,16.0,0.0,152.0,0.0,192.0,0.0,160.0,0.0,64.0,0.0,256.0,0.0,256.0,0.0,160.0,0.0,128.0,0.0,192.0,0.0,128.0,0.0,128.0,0.0,288.0,0.0,128.0,0.0,160.0,0.0,96.0,0.0,224.0,0.0,128.0,0.0,192.0,0.0,192.0,0.0,256.0,0.0,32.0,0.0,160.0,0.0,128.0,0.0,32.0
4,14,230.0,0.0,330.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,300.0,0.0,300.0,0.0,350.0,0.0,300.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,300.0,0.0,350.0,0.0,350.0,0.0


### Medication table properly transformed

## Transform the urine drug screen data

In [16]:
# define parameters
uds_cols = ['patdeid','VISIT', 'UDS005', 'UDS006', 'UDS007', 'UDS008', 'UDS009', 'UDS010', 'UDS011', 'UDS012', 'UDS013']

rename_cols = {'UDS005':'test_Amphetamines', 'UDS006':'test_Benzodiazepines','UDS007':'test_Methadone', 'UDS008':'test_Oxycodone', 'UDS009':'test_Cocaine', 'UDS010':'test_Methamphetamine', 'UDS011':'test_Opiate300', 'UDS012':'test_Cannabinoids', 'UDS013':'test_Propoxyphene'}

# execute clean function
uds = clean_df(uds, uds_cols, rename_cols)

# observe shape and first 5 rows of cleaned dataframe
print(f'Shape of cleaned dataframe: {uds.shape}')
uds[:5]

Shape of cleaned dataframe: (24930, 11)


Unnamed: 0,patdeid,VISIT,test_Propoxyphene,test_Amphetamines,test_Cannabinoids,test_Benzodiazepines,test_Methadone,test_Oxycodone,test_Cocaine,test_Methamphetamine,test_Opiate300
0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1,1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1,4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


## We will capture features from the dataset before we reshape
We will capture the total number of weeks each patient showed up for a urine drug screen

In [17]:
# create total visits column, populate total visits with number of unique visits for each patient
uds['total_visits'] = uds.groupby('patdeid')['VISIT'].transform('nunique')

# create dataframe with patdeid as key and total visits columns
total_visits = uds[['patdeid','total_visits']].drop_duplicates(subset='patdeid').reset_index(drop=True)

# drop total visits column from original dataframe
uds = uds.drop(columns='total_visits')

# observe shape and first 5 rows of total_visits
print(f'Shape of total_visits dataframe: {total_visits.shape}')
total_visits[:5]

Shape of total_visits dataframe: (1917, 2)


Unnamed: 0,patdeid,total_visits
0,1,26
1,2,26
2,3,26
3,4,26
4,5,1


## Dataframe is ready to be flattened
Each clinical data point will be encoded with week of treatment

In [18]:
# define parameters 
start = 0 # start merge at week 0 reflects the baseline assessment
stop = 24 # stop merge at week 24, final week of treatment
step = 1 # looks for dataframes to merge in increments of 1 week

uds = flatten_dataframe(uds,start,stop,step)

# review the first few rows and shape
print(f'Shape of uds dataframe: {uds.shape}')
uds[:5]

Shape of uds dataframe: (753, 226)


Unnamed: 0,patdeid,test_Propoxyphene_0,test_Amphetamines_0,test_Cannabinoids_0,test_Benzodiazepines_0,test_Methadone_0,test_Oxycodone_0,test_Cocaine_0,test_Methamphetamine_0,test_Opiate300_0,test_Propoxyphene_1,test_Amphetamines_1,test_Cannabinoids_1,test_Benzodiazepines_1,test_Methadone_1,test_Oxycodone_1,test_Cocaine_1,test_Methamphetamine_1,test_Opiate300_1,test_Propoxyphene_2,test_Amphetamines_2,test_Cannabinoids_2,test_Benzodiazepines_2,test_Methadone_2,test_Oxycodone_2,test_Cocaine_2,test_Methamphetamine_2,test_Opiate300_2,test_Propoxyphene_3,test_Amphetamines_3,test_Cannabinoids_3,test_Benzodiazepines_3,test_Methadone_3,test_Oxycodone_3,test_Cocaine_3,test_Methamphetamine_3,test_Opiate300_3,test_Propoxyphene_4,test_Amphetamines_4,test_Cannabinoids_4,test_Benzodiazepines_4,test_Methadone_4,test_Oxycodone_4,test_Cocaine_4,test_Methamphetamine_4,test_Opiate300_4,test_Propoxyphene_5,test_Amphetamines_5,test_Cannabinoids_5,test_Benzodiazepines_5,test_Methadone_5,test_Oxycodone_5,test_Cocaine_5,test_Methamphetamine_5,test_Opiate300_5,test_Propoxyphene_6,test_Amphetamines_6,test_Cannabinoids_6,test_Benzodiazepines_6,test_Methadone_6,test_Oxycodone_6,test_Cocaine_6,test_Methamphetamine_6,test_Opiate300_6,test_Propoxyphene_7,test_Amphetamines_7,test_Cannabinoids_7,test_Benzodiazepines_7,test_Methadone_7,test_Oxycodone_7,test_Cocaine_7,test_Methamphetamine_7,test_Opiate300_7,test_Propoxyphene_8,test_Amphetamines_8,test_Cannabinoids_8,test_Benzodiazepines_8,test_Methadone_8,test_Oxycodone_8,test_Cocaine_8,test_Methamphetamine_8,test_Opiate300_8,test_Propoxyphene_9,test_Amphetamines_9,test_Cannabinoids_9,test_Benzodiazepines_9,test_Methadone_9,test_Oxycodone_9,test_Cocaine_9,test_Methamphetamine_9,test_Opiate300_9,test_Propoxyphene_10,test_Amphetamines_10,test_Cannabinoids_10,test_Benzodiazepines_10,test_Methadone_10,test_Oxycodone_10,test_Cocaine_10,test_Methamphetamine_10,test_Opiate300_10,test_Propoxyphene_11,test_Amphetamines_11,test_Cannabinoids_11,test_Benzodiazepines_11,test_Methadone_11,test_Oxycodone_11,test_Cocaine_11,test_Methamphetamine_11,test_Opiate300_11,test_Propoxyphene_12,test_Amphetamines_12,test_Cannabinoids_12,test_Benzodiazepines_12,test_Methadone_12,test_Oxycodone_12,test_Cocaine_12,test_Methamphetamine_12,test_Opiate300_12,test_Propoxyphene_13,test_Amphetamines_13,test_Cannabinoids_13,test_Benzodiazepines_13,test_Methadone_13,test_Oxycodone_13,test_Cocaine_13,test_Methamphetamine_13,test_Opiate300_13,test_Propoxyphene_14,test_Amphetamines_14,test_Cannabinoids_14,test_Benzodiazepines_14,test_Methadone_14,test_Oxycodone_14,test_Cocaine_14,test_Methamphetamine_14,test_Opiate300_14,test_Propoxyphene_15,test_Amphetamines_15,test_Cannabinoids_15,test_Benzodiazepines_15,test_Methadone_15,test_Oxycodone_15,test_Cocaine_15,test_Methamphetamine_15,test_Opiate300_15,test_Propoxyphene_16,test_Amphetamines_16,test_Cannabinoids_16,test_Benzodiazepines_16,test_Methadone_16,test_Oxycodone_16,test_Cocaine_16,test_Methamphetamine_16,test_Opiate300_16,test_Propoxyphene_17,test_Amphetamines_17,test_Cannabinoids_17,test_Benzodiazepines_17,test_Methadone_17,test_Oxycodone_17,test_Cocaine_17,test_Methamphetamine_17,test_Opiate300_17,test_Propoxyphene_18,test_Amphetamines_18,test_Cannabinoids_18,test_Benzodiazepines_18,test_Methadone_18,test_Oxycodone_18,test_Cocaine_18,test_Methamphetamine_18,test_Opiate300_18,test_Propoxyphene_19,test_Amphetamines_19,test_Cannabinoids_19,test_Benzodiazepines_19,test_Methadone_19,test_Oxycodone_19,test_Cocaine_19,test_Methamphetamine_19,test_Opiate300_19,test_Propoxyphene_20,test_Amphetamines_20,test_Cannabinoids_20,test_Benzodiazepines_20,test_Methadone_20,test_Oxycodone_20,test_Cocaine_20,test_Methamphetamine_20,test_Opiate300_20,test_Propoxyphene_21,test_Amphetamines_21,test_Cannabinoids_21,test_Benzodiazepines_21,test_Methadone_21,test_Oxycodone_21,test_Cocaine_21,test_Methamphetamine_21,test_Opiate300_21,test_Propoxyphene_22,test_Amphetamines_22,test_Cannabinoids_22,test_Benzodiazepines_22,test_Methadone_22,test_Oxycodone_22,test_Cocaine_22,test_Methamphetamine_22,test_Opiate300_22,test_Propoxyphene_23,test_Amphetamines_23,test_Cannabinoids_23,test_Benzodiazepines_23,test_Methadone_23,test_Oxycodone_23,test_Cocaine_23,test_Methamphetamine_23,test_Opiate300_23,test_Propoxyphene_24,test_Amphetamines_24,test_Cannabinoids_24,test_Benzodiazepines_24,test_Methadone_24,test_Oxycodone_24,test_Cocaine_24,test_Methamphetamine_24,test_Opiate300_24
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0
2,3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
3,4,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,,,,,,,,,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,,,,,,,,,,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
4,14,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0


## Feature Engineering
We will create 3 features, to track treatment outcomes from tests<br><br>
**TNT** (Total negative tests) - we will count the total number of negative opiate tests per patient over 24 weeks<br><br>
**CNT** (Consecutive negative tests) - Count of consecutive weeks with negative test, with longer periods producing clinical benefit<br><br>
**responder** - A patient showing a response to treatment, by reaching pre-defined abstinence window, which is the final 4 weeks of treatment.  If patient shows four consecutive clean urine tests, they are a resopnder.

In [19]:
def uds_features(df):
    """
    Creates features (columns) from opiate test data, listed as follows:
    1) 'TNT' - Total Negative tests - counts total negative tests per patient
    
    3) 'CNT' - Consecutive Negative tests - counts number of consecutive weeks of negative tests
    
    2) 'responder' - A responder is defined as a patient who successfully meets the abstinence window
    with 4 consecutive clean urine tests at the final 4 weeks of treatment

    Parameters:
    df (pandas.DataFrame): The DataFrame containing the opiates data.

    Returns:
    pandas.DataFrame: The processed DataFrame.
    """
    # create df for opiates tests
    tests = df.loc[:, ['patdeid'] + [col for col in df.columns if 'test_Opiate300' in col]]

    # remove the prefix from the column names
    tests.columns = tests.columns.str.replace('test_Opiate300_', '')

    # null values will be treated as positive urine tests and filled with 1.0
    tests = tests.fillna(1.0)

    # create column tnt (total negative tests) counts total negative tests for each patient
    tests['TNT'] = (tests.iloc[:, 1:] == 0.0).astype(int).sum(axis=1)

    # create column 'CNT' (consecutive negative tests)
    tests['CNT'] = None

    # convert each column into a list
    # evaluate the values and count how many times 0.0 occurs consecutively
    # update the count in tests['CNT'] column

    # import itertools library
    import itertools

    for i in range(0,tests.shape[0]):
        values = [len(list(v)) for k, v in itertools.groupby(tests.iloc[i,1:26]) if k == 0.0]
        tests['CNT'][i] = max(values) if values else 0

    # create column 'responder' - defined as a patient that reaches abstinent window
    # observe the number in columns 21 - 24 if the sum is equal to zero then value in responder column is 1.0 else 0.0
    tests['responder'] = np.where((tests.iloc[:, 21:26].sum(axis=1) == 0), 1.0, 0.0).astype(int)


    return tests


In [20]:
# set options so that column width is NOT truncated
pd.set_option('display.max_columns', None)

# execute uds features function
tests = uds_features(uds)

# review shape and first 5 rows
print(f'Shape of opiates dataframe: {tests.shape}')
tests.sample(15) # sample 15 rows

Shape of opiates dataframe: (753, 29)


Unnamed: 0,patdeid,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,TNT,CNT,responder
422,1095,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,-5.0,0.0,0.0,0.0,0.0,15,4,0
492,1261,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23,21,1
539,1364,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23,23,1
98,253,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,23,21,0
490,1257,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,15,10,0
585,1486,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0,0,0
203,542,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,3,1,0
558,1408,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,1,0
741,1906,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21,13,1
552,1390,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17,15,1


## Create outcome dataframe
We will take the ```TNT```,```CNT```and ```responder``` columns and merge them to dataset later on

In [20]:
# outcome dataframe

outcome = tests.loc[:,['patdeid','TNT','CNT','responder']]

print(f'The shape of the outcome dataframe is {outcome.shape}') 

outcome[:5]

The shape of the outcome dataframe is (753, 4)


Unnamed: 0,patdeid,TNT,CNT,responder
0,1,20,8,1
1,2,6,4,0
2,3,0,0,0
3,4,4,1,0
4,14,15,14,0


### Urine drug screen data successfully transformed

## Transform Self Reported use table

In [21]:
# define parameters
sru_cols = ['VISIT','TFB001A','TFB002A','TFB003A','TFB004A','TFB005A','TFB006A','TFB007A','TFB008A','TFB009A','TFB010A','patdeid']

new_columns = {
    'TFB001A':'survey_alcohol',
    'TFB002A':'survey_cannabis','TFB003A':'survey_cocaine',
    'TFB010A':'survey_oxycodone','TFB009A':'survey_methadone','TFB004A':'survey_amphetamine','TFB005A':'survey_methamphetamine','TFB006A':'survey_opiates','TFB007A':'survey_benzodiazepines','TFB008A':'survey_propoxyphene'}

# execute clean function
sru = clean_df(sru, sru_cols, new_columns)

print(f'Shape of sru dataframe: {sru.shape}')
sru[:5]

Shape of sru dataframe: (100518, 12)


Unnamed: 0,patdeid,VISIT,survey_cannabis,survey_cocaine,survey_alcohol,survey_oxycodone,survey_methadone,survey_amphetamine,survey_methamphetamine,survey_opiates,survey_benzodiazepines,survey_propoxyphene
0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


### Aggregate columns
The self reported use surveys were entered manually with multiple rows per patient.  We will aggregate the survey results and reduce the number of rows in the dataset

In [22]:
index_cols = ['patdeid','VISIT']
agg_dict = {col:'sum' for col in sru.columns if col not in ['patdeid','VISIT','total_visits']}

# execute aggregation
sru = agg_df(sru, index_cols, agg_dict)

print(f'New shape for aggregated dataframe, reduced from 100518 rows to {sru.shape[0]}')
sru[:5]

New shape for aggregated dataframe, reduced from 100518 rows to 6008


Unnamed: 0,patdeid,VISIT,survey_cannabis,survey_cocaine,survey_alcohol,survey_oxycodone,survey_methadone,survey_amphetamine,survey_methamphetamine,survey_opiates,survey_benzodiazepines,survey_propoxyphene
0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0
1,1,4,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0
2,1,8,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0
4,2,4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0


Now we will flatten the SRU dataset creating columns for all results, reducing rows to one row per patient

In [23]:
# flatten dataframe for sru
start = 0 # start merge at week 0 reflects the baseline assessment
stop = 24 # stop merge at week 24, final week of treatment
step = 4 # looks for dataframes to merge in increments of 4 weeks

sru = flatten_dataframe(sru,start,stop,step)

# review the first few rows and shape
print(f'Shape of sru dataframe: {sru.shape}')
sru[:5]

Shape of sru dataframe: (357, 71)


Unnamed: 0,patdeid,survey_cannabis_0,survey_cocaine_0,survey_alcohol_0,survey_oxycodone_0,survey_methadone_0,survey_amphetamine_0,survey_methamphetamine_0,survey_opiates_0,survey_benzodiazepines_0,survey_propoxyphene_0,survey_cannabis_4,survey_cocaine_4,survey_alcohol_4,survey_oxycodone_4,survey_methadone_4,survey_amphetamine_4,survey_methamphetamine_4,survey_opiates_4,survey_benzodiazepines_4,survey_propoxyphene_4,survey_cannabis_8,survey_cocaine_8,survey_alcohol_8,survey_oxycodone_8,survey_methadone_8,survey_amphetamine_8,survey_methamphetamine_8,survey_opiates_8,survey_benzodiazepines_8,survey_propoxyphene_8,survey_cannabis_12,survey_cocaine_12,survey_alcohol_12,survey_oxycodone_12,survey_methadone_12,survey_amphetamine_12,survey_methamphetamine_12,survey_opiates_12,survey_benzodiazepines_12,survey_propoxyphene_12,survey_cannabis_16,survey_cocaine_16,survey_alcohol_16,survey_oxycodone_16,survey_methadone_16,survey_amphetamine_16,survey_methamphetamine_16,survey_opiates_16,survey_benzodiazepines_16,survey_propoxyphene_16,survey_cannabis_20,survey_cocaine_20,survey_alcohol_20,survey_oxycodone_20,survey_methadone_20,survey_amphetamine_20,survey_methamphetamine_20,survey_opiates_20,survey_benzodiazepines_20,survey_propoxyphene_20,survey_cannabis_24,survey_cocaine_24,survey_alcohol_24,survey_oxycodone_24,survey_methadone_24,survey_amphetamine_24,survey_methamphetamine_24,survey_opiates_24,survey_benzodiazepines_24,survey_propoxyphene_24
0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0
1,3,0.0,23.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,9.0,18.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,8.0,4.0,0.0,0.0,0.0,0.0,28.0,1.0,0.0,0.0,5.0,4.0,0.0,0.0,0.0,1.0,26.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,28.0,1.0,0.0
2,4,1.0,2.0,0.0,1.0,0.0,0.0,0.0,30.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,8.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
3,24,0.0,8.0,0.0,0.0,22.0,0.0,0.0,30.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0


### Self reported use data successfully transofrmed

## Transform the Demo dataset

In [24]:
# define parameters
new_cols = ['patdeid','DEM002','DEM003A','DEM004A','DEM004B','DEM004C','DEM004D','DEM004E']

# rename cols
rename_cols = {'DEM002':'Sex','DEM003A':'Ethnicity','DEM004A':'race_amer_ind','DEM004B':'race_Asian','DEM004C':'race_Black','DEM004D':'race_pacific_isl','DEM004E':'race_White'}

# execute aggregation
demo = clean_df(demo, new_cols, rename_cols)

# imput nulls with 0.0
demo = demo.fillna(0.0)

print(f'New shape for demo df {demo.shape}')
demo[:5]


New shape for demo df (1920, 8)


Unnamed: 0,patdeid,Sex,Ethnicity,race_amer_ind,race_Asian,race_Black,race_pacific_isl,race_White
0,1,1.0,2.0,0.0,0.0,0.0,0.0,5.0
1,2,1.0,2.0,0.0,0.0,0.0,0.0,5.0
2,3,1.0,2.0,0.0,0.0,0.0,0.0,5.0
3,4,2.0,2.0,0.0,0.0,0.0,0.0,5.0
4,5,1.0,2.0,0.0,0.0,0.0,0.0,5.0


In [25]:

def clean_columns(df, cols):
    """
    Create dummy variables for the given columns in the given DataFrame.

    Parameters:
    df (pandas.DataFrame): The DataFrame containing the columns to be dummified.
    cols (list): A list of column names to be dummified.

    Returns:
    pandas.DataFrame: The DataFrame with the dummy variables added.
    """
    for col in cols:
        df[col] = np.where(df[col]>0.0, 1.0, 0.0)
    return df

In [26]:
# define parameters
clean_cols = ['race_amer_ind','race_Asian','race_Black','race_pacific_isl','race_White']

# execute function
demo = clean_columns(demo, clean_cols)

demo[:25]

Unnamed: 0,patdeid,Sex,Ethnicity,race_amer_ind,race_Asian,race_Black,race_pacific_isl,race_White
0,1,1.0,2.0,0.0,0.0,0.0,0.0,1.0
1,2,1.0,2.0,0.0,0.0,0.0,0.0,1.0
2,3,1.0,2.0,0.0,0.0,0.0,0.0,1.0
3,4,2.0,2.0,0.0,0.0,0.0,0.0,1.0
4,5,1.0,2.0,0.0,0.0,0.0,0.0,1.0
5,6,2.0,2.0,0.0,0.0,0.0,0.0,1.0
6,7,1.0,2.0,0.0,1.0,0.0,0.0,0.0
7,8,2.0,2.0,0.0,0.0,0.0,0.0,1.0
8,9,1.0,2.0,1.0,0.0,0.0,0.0,1.0
9,10,1.0,2.0,0.0,0.0,0.0,0.0,1.0


### All tables have been appropriately cleaned and enriched with new features
Data has been properly transofrmed and is ready for merge

In [27]:
# set parameters for merge

# add medication, urine drug screen, self reported use, demographics, medication type and outcome to merge
dfs = [med, uds, sru, demo, medication, total_visits, outcome]

# merge dataframes
new_df = merge_dfs(dfs)

# review shape and columns
print(f'Shape of new dataframe: {new_df.shape}')

new_df

Shape of new dataframe: (353, 358)


Unnamed: 0,patdeid,meds_methadone_0,meds_buprenorphine_0,meds_methadone_1,meds_buprenorphine_1,meds_methadone_2,meds_buprenorphine_2,meds_methadone_3,meds_buprenorphine_3,meds_methadone_4,meds_buprenorphine_4,meds_methadone_5,meds_buprenorphine_5,meds_methadone_6,meds_buprenorphine_6,meds_methadone_7,meds_buprenorphine_7,meds_methadone_8,meds_buprenorphine_8,meds_methadone_9,meds_buprenorphine_9,meds_methadone_10,meds_buprenorphine_10,meds_methadone_11,meds_buprenorphine_11,meds_methadone_12,meds_buprenorphine_12,meds_methadone_13,meds_buprenorphine_13,meds_methadone_14,meds_buprenorphine_14,meds_methadone_15,meds_buprenorphine_15,meds_methadone_16,meds_buprenorphine_16,meds_methadone_17,meds_buprenorphine_17,meds_methadone_18,meds_buprenorphine_18,meds_methadone_19,meds_buprenorphine_19,meds_methadone_20,meds_buprenorphine_20,meds_methadone_21,meds_buprenorphine_21,meds_methadone_22,meds_buprenorphine_22,meds_methadone_23,meds_buprenorphine_23,meds_methadone_24,meds_buprenorphine_24,test_Propoxyphene_0,test_Amphetamines_0,test_Cannabinoids_0,test_Benzodiazepines_0,test_Methadone_0,test_Oxycodone_0,test_Cocaine_0,test_Methamphetamine_0,test_Opiate300_0,test_Propoxyphene_1,test_Amphetamines_1,test_Cannabinoids_1,test_Benzodiazepines_1,test_Methadone_1,test_Oxycodone_1,test_Cocaine_1,test_Methamphetamine_1,test_Opiate300_1,test_Propoxyphene_2,test_Amphetamines_2,test_Cannabinoids_2,test_Benzodiazepines_2,test_Methadone_2,test_Oxycodone_2,test_Cocaine_2,test_Methamphetamine_2,test_Opiate300_2,test_Propoxyphene_3,test_Amphetamines_3,test_Cannabinoids_3,test_Benzodiazepines_3,test_Methadone_3,test_Oxycodone_3,test_Cocaine_3,test_Methamphetamine_3,test_Opiate300_3,test_Propoxyphene_4,test_Amphetamines_4,test_Cannabinoids_4,test_Benzodiazepines_4,test_Methadone_4,test_Oxycodone_4,test_Cocaine_4,test_Methamphetamine_4,test_Opiate300_4,test_Propoxyphene_5,test_Amphetamines_5,test_Cannabinoids_5,test_Benzodiazepines_5,test_Methadone_5,test_Oxycodone_5,test_Cocaine_5,test_Methamphetamine_5,test_Opiate300_5,test_Propoxyphene_6,test_Amphetamines_6,test_Cannabinoids_6,test_Benzodiazepines_6,test_Methadone_6,test_Oxycodone_6,test_Cocaine_6,test_Methamphetamine_6,test_Opiate300_6,test_Propoxyphene_7,test_Amphetamines_7,test_Cannabinoids_7,test_Benzodiazepines_7,test_Methadone_7,test_Oxycodone_7,test_Cocaine_7,test_Methamphetamine_7,test_Opiate300_7,test_Propoxyphene_8,test_Amphetamines_8,test_Cannabinoids_8,test_Benzodiazepines_8,test_Methadone_8,test_Oxycodone_8,test_Cocaine_8,test_Methamphetamine_8,test_Opiate300_8,test_Propoxyphene_9,test_Amphetamines_9,test_Cannabinoids_9,test_Benzodiazepines_9,test_Methadone_9,test_Oxycodone_9,test_Cocaine_9,test_Methamphetamine_9,test_Opiate300_9,test_Propoxyphene_10,test_Amphetamines_10,test_Cannabinoids_10,test_Benzodiazepines_10,test_Methadone_10,test_Oxycodone_10,test_Cocaine_10,test_Methamphetamine_10,test_Opiate300_10,test_Propoxyphene_11,test_Amphetamines_11,test_Cannabinoids_11,test_Benzodiazepines_11,test_Methadone_11,test_Oxycodone_11,test_Cocaine_11,test_Methamphetamine_11,test_Opiate300_11,test_Propoxyphene_12,test_Amphetamines_12,test_Cannabinoids_12,test_Benzodiazepines_12,test_Methadone_12,test_Oxycodone_12,test_Cocaine_12,test_Methamphetamine_12,test_Opiate300_12,test_Propoxyphene_13,test_Amphetamines_13,test_Cannabinoids_13,test_Benzodiazepines_13,test_Methadone_13,test_Oxycodone_13,test_Cocaine_13,test_Methamphetamine_13,test_Opiate300_13,test_Propoxyphene_14,test_Amphetamines_14,test_Cannabinoids_14,test_Benzodiazepines_14,test_Methadone_14,test_Oxycodone_14,test_Cocaine_14,test_Methamphetamine_14,test_Opiate300_14,test_Propoxyphene_15,test_Amphetamines_15,test_Cannabinoids_15,test_Benzodiazepines_15,test_Methadone_15,test_Oxycodone_15,test_Cocaine_15,test_Methamphetamine_15,test_Opiate300_15,test_Propoxyphene_16,test_Amphetamines_16,test_Cannabinoids_16,test_Benzodiazepines_16,test_Methadone_16,test_Oxycodone_16,test_Cocaine_16,test_Methamphetamine_16,test_Opiate300_16,test_Propoxyphene_17,test_Amphetamines_17,test_Cannabinoids_17,test_Benzodiazepines_17,test_Methadone_17,test_Oxycodone_17,test_Cocaine_17,test_Methamphetamine_17,test_Opiate300_17,test_Propoxyphene_18,test_Amphetamines_18,test_Cannabinoids_18,test_Benzodiazepines_18,test_Methadone_18,test_Oxycodone_18,test_Cocaine_18,test_Methamphetamine_18,test_Opiate300_18,test_Propoxyphene_19,test_Amphetamines_19,test_Cannabinoids_19,test_Benzodiazepines_19,test_Methadone_19,test_Oxycodone_19,test_Cocaine_19,test_Methamphetamine_19,test_Opiate300_19,test_Propoxyphene_20,test_Amphetamines_20,test_Cannabinoids_20,test_Benzodiazepines_20,test_Methadone_20,test_Oxycodone_20,test_Cocaine_20,test_Methamphetamine_20,test_Opiate300_20,test_Propoxyphene_21,test_Amphetamines_21,test_Cannabinoids_21,test_Benzodiazepines_21,test_Methadone_21,test_Oxycodone_21,test_Cocaine_21,test_Methamphetamine_21,test_Opiate300_21,test_Propoxyphene_22,test_Amphetamines_22,test_Cannabinoids_22,test_Benzodiazepines_22,test_Methadone_22,test_Oxycodone_22,test_Cocaine_22,test_Methamphetamine_22,test_Opiate300_22,test_Propoxyphene_23,test_Amphetamines_23,test_Cannabinoids_23,test_Benzodiazepines_23,test_Methadone_23,test_Oxycodone_23,test_Cocaine_23,test_Methamphetamine_23,test_Opiate300_23,test_Propoxyphene_24,test_Amphetamines_24,test_Cannabinoids_24,test_Benzodiazepines_24,test_Methadone_24,test_Oxycodone_24,test_Cocaine_24,test_Methamphetamine_24,test_Opiate300_24,survey_cannabis_0,survey_cocaine_0,survey_alcohol_0,survey_oxycodone_0,survey_methadone_0,survey_amphetamine_0,survey_methamphetamine_0,survey_opiates_0,survey_benzodiazepines_0,survey_propoxyphene_0,survey_cannabis_4,survey_cocaine_4,survey_alcohol_4,survey_oxycodone_4,survey_methadone_4,survey_amphetamine_4,survey_methamphetamine_4,survey_opiates_4,survey_benzodiazepines_4,survey_propoxyphene_4,survey_cannabis_8,survey_cocaine_8,survey_alcohol_8,survey_oxycodone_8,survey_methadone_8,survey_amphetamine_8,survey_methamphetamine_8,survey_opiates_8,survey_benzodiazepines_8,survey_propoxyphene_8,survey_cannabis_12,survey_cocaine_12,survey_alcohol_12,survey_oxycodone_12,survey_methadone_12,survey_amphetamine_12,survey_methamphetamine_12,survey_opiates_12,survey_benzodiazepines_12,survey_propoxyphene_12,survey_cannabis_16,survey_cocaine_16,survey_alcohol_16,survey_oxycodone_16,survey_methadone_16,survey_amphetamine_16,survey_methamphetamine_16,survey_opiates_16,survey_benzodiazepines_16,survey_propoxyphene_16,survey_cannabis_20,survey_cocaine_20,survey_alcohol_20,survey_oxycodone_20,survey_methadone_20,survey_amphetamine_20,survey_methamphetamine_20,survey_opiates_20,survey_benzodiazepines_20,survey_propoxyphene_20,survey_cannabis_24,survey_cocaine_24,survey_alcohol_24,survey_oxycodone_24,survey_methadone_24,survey_amphetamine_24,survey_methamphetamine_24,survey_opiates_24,survey_benzodiazepines_24,survey_propoxyphene_24,Sex,Ethnicity,race_amer_ind,race_Asian,race_Black,race_pacific_isl,race_White,medication,total_visits,TNT,CNT,responder
0,2,0.0,8.0,0.0,48.0,0.0,48.0,0.0,60.0,0.0,48.0,0.0,84.0,0.0,84.0,0.0,72.0,0.0,36.0,0.0,72.0,0.0,76.0,0.0,96.0,0.0,88.0,0.0,40.0,0.0,64.0,0.0,144.0,0.0,64.0,0.0,48.0,0.0,40.0,0.0,40.0,0.0,40.0,0.0,72.0,0.0,60.0,0.0,72.0,0.0,68.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,2.0,26,6,4,0
1,3,30.0,0.0,170.0,0.0,310.0,0.0,420.0,0.0,360.0,0.0,480.0,0.0,310.0,0.0,455.0,0.0,455.0,0.0,345.0,0.0,600.0,0.0,380.0,0.0,560.0,0.0,720.0,0.0,600.0,0.0,360.0,0.0,640.0,0.0,600.0,0.0,600.0,0.0,800.0,0.0,600.0,0.0,670.0,0.0,630.0,0.0,510.0,0.0,540.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,23.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,9.0,18.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,8.0,4.0,0.0,0.0,0.0,0.0,28.0,1.0,0.0,0.0,5.0,4.0,0.0,0.0,0.0,1.0,26.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,28.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,26,0,0,0
2,4,0.0,16.0,0.0,152.0,0.0,192.0,0.0,160.0,0.0,64.0,0.0,256.0,0.0,256.0,0.0,160.0,0.0,128.0,0.0,192.0,0.0,128.0,0.0,128.0,0.0,288.0,0.0,128.0,0.0,160.0,0.0,96.0,0.0,224.0,0.0,128.0,0.0,192.0,0.0,192.0,0.0,256.0,0.0,32.0,0.0,160.0,0.0,128.0,0.0,32.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,,,,,,,,,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,,,,,,,,,,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,30.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,8.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,1.0,2.0,26,4,1,0
3,24,90.0,0.0,730.0,0.0,1070.0,0.0,600.0,0.0,1080.0,0.0,840.0,0.0,840.0,0.0,860.0,0.0,780.0,0.0,910.0,0.0,780.0,0.0,1300.0,0.0,650.0,0.0,910.0,0.0,910.0,0.0,1170.0,0.0,910.0,0.0,540.0,0.0,1260.0,0.0,1120.0,0.0,840.0,0.0,490.0,0.0,1540.0,0.0,140.0,0.0,1540.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,,,,,,,,,,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,22.0,0.0,0.0,30.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,26,22,13,0
4,27,0.0,8.0,0.0,40.0,0.0,72.0,0.0,40.0,0.0,40.0,0.0,48.0,0.0,40.0,0.0,40.0,0.0,16.0,0.0,56.0,0.0,8.0,0.0,48.0,0.0,24.0,0.0,32.0,0.0,40.0,0.0,40.0,0.0,32.0,0.0,32.0,0.0,24.0,0.0,0.0,0.0,4.0,0.0,40.0,0.0,8.0,0.0,60.0,0.0,24.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,26,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
348,1898,0.0,12.0,0.0,88.0,0.0,112.0,0.0,80.0,0.0,176.0,0.0,64.0,0.0,64.0,0.0,144.0,0.0,60.0,0.0,120.0,0.0,140.0,0.0,120.0,0.0,120.0,0.0,140.0,0.0,100.0,0.0,160.0,0.0,120.0,0.0,140.0,0.0,84.0,0.0,192.0,0.0,144.0,0.0,120.0,0.0,192.0,0.0,96.0,0.0,192.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,,,,,,,,,,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,-5.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,1.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,26,9,3,0
349,1905,140.0,0.0,360.0,0.0,360.0,0.0,270.0,0.0,490.0,0.0,550.0,0.0,560.0,0.0,560.0,0.0,610.0,0.0,630.0,0.0,630.0,0.0,660.0,0.0,700.0,0.0,720.0,0.0,770.0,0.0,770.0,0.0,770.0,0.0,770.0,0.0,820.0,0.0,840.0,0.0,720.0,0.0,840.0,0.0,840.0,0.0,670.0,0.0,4465.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,4.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,26,9,5,0
350,1908,40.0,0.0,255.0,0.0,275.0,0.0,275.0,0.0,355.0,0.0,60.0,0.0,180.0,0.0,180.0,0.0,210.0,0.0,300.0,0.0,300.0,0.0,240.0,0.0,520.0,0.0,70.0,0.0,450.0,0.0,320.0,0.0,480.0,0.0,480.0,0.0,320.0,0.0,740.0,0.0,540.0,0.0,540.0,0.0,450.0,0.0,630.0,0.0,990.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,26,6,3,0
351,1909,30.0,0.0,490.0,0.0,350.0,0.0,300.0,0.0,350.0,0.0,350.0,0.0,350.0,0.0,700.0,0.0,350.0,0.0,350.0,0.0,50.0,0.0,50.0,0.0,1950.0,0.0,50.0,0.0,50.0,0.0,50.0,0.0,700.0,0.0,50.0,0.0,50.0,0.0,350.0,0.0,1300.0,0.0,50.0,0.0,50.0,0.0,50.0,0.0,690.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,6.0,2.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,26,1,1,0


In [28]:
new_df.loc[new_df.total_visits==new_df.total_visits.max()].responder.value_counts()

responder
0    270
1     82
Name: count, dtype: int64

### Tables Successfully Merged

In [29]:
# drop the patdeid column
# new_df = new_df.drop(columns='patdeid')

In [30]:
# export new df to csv
new_df.to_csv('../notebooks/data/merged_data.csv', index=False)

In [31]:
pd.set_option('display.max_rows', None)

new_df.isnull().sum()

patdeid                       0
meds_methadone_0              0
meds_buprenorphine_0          0
meds_methadone_1              1
meds_buprenorphine_1          1
meds_methadone_2              0
meds_buprenorphine_2          0
meds_methadone_3              0
meds_buprenorphine_3          0
meds_methadone_4              0
meds_buprenorphine_4          0
meds_methadone_5              0
meds_buprenorphine_5          0
meds_methadone_6              0
meds_buprenorphine_6          0
meds_methadone_7              0
meds_buprenorphine_7          0
meds_methadone_8              0
meds_buprenorphine_8          0
meds_methadone_9              0
meds_buprenorphine_9          0
meds_methadone_10             0
meds_buprenorphine_10         0
meds_methadone_11             0
meds_buprenorphine_11         0
meds_methadone_12             0
meds_buprenorphine_12         0
meds_methadone_13             0
meds_buprenorphine_13         0
meds_methadone_14             0
meds_buprenorphine_14         0
meds_met

In [32]:
# inpute nulls as follws
for col in new_df.columns:
    if 'survey' in col: # for surveys inpute nulls with 0.0, indicates no survey collected
        new_df[col] = new_df[col].fillna(0.0)
    elif 'test' in col: # for tests inpute nulls with 1.0, missed test considered positive
        new_df[col] = new_df[col].fillna(1.0)
    elif 'meds' in col: # for meds inpute nulls with 0.0, indicates no medication dispensed
        new_df[col] = new_df[col].fillna(0.0)


In [33]:
new_df.shape

(353, 358)

In [38]:
new_df[:5]

Unnamed: 0,patdeid,meds_methadone_0,meds_buprenorphine_0,meds_methadone_1,meds_buprenorphine_1,meds_methadone_2,meds_buprenorphine_2,meds_methadone_3,meds_buprenorphine_3,meds_methadone_4,meds_buprenorphine_4,meds_methadone_5,meds_buprenorphine_5,meds_methadone_6,meds_buprenorphine_6,meds_methadone_7,meds_buprenorphine_7,meds_methadone_8,meds_buprenorphine_8,meds_methadone_9,meds_buprenorphine_9,meds_methadone_10,meds_buprenorphine_10,meds_methadone_11,meds_buprenorphine_11,meds_methadone_12,meds_buprenorphine_12,meds_methadone_13,meds_buprenorphine_13,meds_methadone_14,meds_buprenorphine_14,meds_methadone_15,meds_buprenorphine_15,meds_methadone_16,meds_buprenorphine_16,meds_methadone_17,meds_buprenorphine_17,meds_methadone_18,meds_buprenorphine_18,meds_methadone_19,meds_buprenorphine_19,meds_methadone_20,meds_buprenorphine_20,meds_methadone_21,meds_buprenorphine_21,meds_methadone_22,meds_buprenorphine_22,meds_methadone_23,meds_buprenorphine_23,meds_methadone_24,meds_buprenorphine_24,test_Propoxyphene_0,test_Amphetamines_0,test_Cannabinoids_0,test_Benzodiazepines_0,test_Methadone_0,test_Oxycodone_0,test_Cocaine_0,test_Methamphetamine_0,test_Opiate300_0,test_Propoxyphene_1,test_Amphetamines_1,test_Cannabinoids_1,test_Benzodiazepines_1,test_Methadone_1,test_Oxycodone_1,test_Cocaine_1,test_Methamphetamine_1,test_Opiate300_1,test_Propoxyphene_2,test_Amphetamines_2,test_Cannabinoids_2,test_Benzodiazepines_2,test_Methadone_2,test_Oxycodone_2,test_Cocaine_2,test_Methamphetamine_2,test_Opiate300_2,test_Propoxyphene_3,test_Amphetamines_3,test_Cannabinoids_3,test_Benzodiazepines_3,test_Methadone_3,test_Oxycodone_3,test_Cocaine_3,test_Methamphetamine_3,test_Opiate300_3,test_Propoxyphene_4,test_Amphetamines_4,test_Cannabinoids_4,test_Benzodiazepines_4,test_Methadone_4,test_Oxycodone_4,test_Cocaine_4,test_Methamphetamine_4,test_Opiate300_4,test_Propoxyphene_5,test_Amphetamines_5,test_Cannabinoids_5,test_Benzodiazepines_5,test_Methadone_5,test_Oxycodone_5,test_Cocaine_5,test_Methamphetamine_5,test_Opiate300_5,test_Propoxyphene_6,test_Amphetamines_6,test_Cannabinoids_6,test_Benzodiazepines_6,test_Methadone_6,test_Oxycodone_6,test_Cocaine_6,test_Methamphetamine_6,test_Opiate300_6,test_Propoxyphene_7,test_Amphetamines_7,test_Cannabinoids_7,test_Benzodiazepines_7,test_Methadone_7,test_Oxycodone_7,test_Cocaine_7,test_Methamphetamine_7,test_Opiate300_7,test_Propoxyphene_8,test_Amphetamines_8,test_Cannabinoids_8,test_Benzodiazepines_8,test_Methadone_8,test_Oxycodone_8,test_Cocaine_8,test_Methamphetamine_8,test_Opiate300_8,test_Propoxyphene_9,test_Amphetamines_9,test_Cannabinoids_9,test_Benzodiazepines_9,test_Methadone_9,test_Oxycodone_9,test_Cocaine_9,test_Methamphetamine_9,test_Opiate300_9,test_Propoxyphene_10,test_Amphetamines_10,test_Cannabinoids_10,test_Benzodiazepines_10,test_Methadone_10,test_Oxycodone_10,test_Cocaine_10,test_Methamphetamine_10,test_Opiate300_10,test_Propoxyphene_11,test_Amphetamines_11,test_Cannabinoids_11,test_Benzodiazepines_11,test_Methadone_11,test_Oxycodone_11,test_Cocaine_11,test_Methamphetamine_11,test_Opiate300_11,test_Propoxyphene_12,test_Amphetamines_12,test_Cannabinoids_12,test_Benzodiazepines_12,test_Methadone_12,test_Oxycodone_12,test_Cocaine_12,test_Methamphetamine_12,test_Opiate300_12,test_Propoxyphene_13,test_Amphetamines_13,test_Cannabinoids_13,test_Benzodiazepines_13,test_Methadone_13,test_Oxycodone_13,test_Cocaine_13,test_Methamphetamine_13,test_Opiate300_13,test_Propoxyphene_14,test_Amphetamines_14,test_Cannabinoids_14,test_Benzodiazepines_14,test_Methadone_14,test_Oxycodone_14,test_Cocaine_14,test_Methamphetamine_14,test_Opiate300_14,test_Propoxyphene_15,test_Amphetamines_15,test_Cannabinoids_15,test_Benzodiazepines_15,test_Methadone_15,test_Oxycodone_15,test_Cocaine_15,test_Methamphetamine_15,test_Opiate300_15,test_Propoxyphene_16,test_Amphetamines_16,test_Cannabinoids_16,test_Benzodiazepines_16,test_Methadone_16,test_Oxycodone_16,test_Cocaine_16,test_Methamphetamine_16,test_Opiate300_16,test_Propoxyphene_17,test_Amphetamines_17,test_Cannabinoids_17,test_Benzodiazepines_17,test_Methadone_17,test_Oxycodone_17,test_Cocaine_17,test_Methamphetamine_17,test_Opiate300_17,test_Propoxyphene_18,test_Amphetamines_18,test_Cannabinoids_18,test_Benzodiazepines_18,test_Methadone_18,test_Oxycodone_18,test_Cocaine_18,test_Methamphetamine_18,test_Opiate300_18,test_Propoxyphene_19,test_Amphetamines_19,test_Cannabinoids_19,test_Benzodiazepines_19,test_Methadone_19,test_Oxycodone_19,test_Cocaine_19,test_Methamphetamine_19,test_Opiate300_19,test_Propoxyphene_20,test_Amphetamines_20,test_Cannabinoids_20,test_Benzodiazepines_20,test_Methadone_20,test_Oxycodone_20,test_Cocaine_20,test_Methamphetamine_20,test_Opiate300_20,test_Propoxyphene_21,test_Amphetamines_21,test_Cannabinoids_21,test_Benzodiazepines_21,test_Methadone_21,test_Oxycodone_21,test_Cocaine_21,test_Methamphetamine_21,test_Opiate300_21,test_Propoxyphene_22,test_Amphetamines_22,test_Cannabinoids_22,test_Benzodiazepines_22,test_Methadone_22,test_Oxycodone_22,test_Cocaine_22,test_Methamphetamine_22,test_Opiate300_22,test_Propoxyphene_23,test_Amphetamines_23,test_Cannabinoids_23,test_Benzodiazepines_23,test_Methadone_23,test_Oxycodone_23,test_Cocaine_23,test_Methamphetamine_23,test_Opiate300_23,test_Propoxyphene_24,test_Amphetamines_24,test_Cannabinoids_24,test_Benzodiazepines_24,test_Methadone_24,test_Oxycodone_24,test_Cocaine_24,test_Methamphetamine_24,test_Opiate300_24,survey_cannabis_0,survey_cocaine_0,survey_alcohol_0,survey_oxycodone_0,survey_methadone_0,survey_amphetamine_0,survey_methamphetamine_0,survey_opiates_0,survey_benzodiazepines_0,survey_propoxyphene_0,survey_cannabis_4,survey_cocaine_4,survey_alcohol_4,survey_oxycodone_4,survey_methadone_4,survey_amphetamine_4,survey_methamphetamine_4,survey_opiates_4,survey_benzodiazepines_4,survey_propoxyphene_4,survey_cannabis_8,survey_cocaine_8,survey_alcohol_8,survey_oxycodone_8,survey_methadone_8,survey_amphetamine_8,survey_methamphetamine_8,survey_opiates_8,survey_benzodiazepines_8,survey_propoxyphene_8,survey_cannabis_12,survey_cocaine_12,survey_alcohol_12,survey_oxycodone_12,survey_methadone_12,survey_amphetamine_12,survey_methamphetamine_12,survey_opiates_12,survey_benzodiazepines_12,survey_propoxyphene_12,survey_cannabis_16,survey_cocaine_16,survey_alcohol_16,survey_oxycodone_16,survey_methadone_16,survey_amphetamine_16,survey_methamphetamine_16,survey_opiates_16,survey_benzodiazepines_16,survey_propoxyphene_16,survey_cannabis_20,survey_cocaine_20,survey_alcohol_20,survey_oxycodone_20,survey_methadone_20,survey_amphetamine_20,survey_methamphetamine_20,survey_opiates_20,survey_benzodiazepines_20,survey_propoxyphene_20,survey_cannabis_24,survey_cocaine_24,survey_alcohol_24,survey_oxycodone_24,survey_methadone_24,survey_amphetamine_24,survey_methamphetamine_24,survey_opiates_24,survey_benzodiazepines_24,survey_propoxyphene_24,Sex,Ethnicity,race_amer_ind,race_Asian,race_Black,race_pacific_isl,race_White,medication,total_visits,TNT,CNT,responder
0,2,0.0,8.0,0.0,48.0,0.0,48.0,0.0,60.0,0.0,48.0,0.0,84.0,0.0,84.0,0.0,72.0,0.0,36.0,0.0,72.0,0.0,76.0,0.0,96.0,0.0,88.0,0.0,40.0,0.0,64.0,0.0,144.0,0.0,64.0,0.0,48.0,0.0,40.0,0.0,40.0,0.0,40.0,0.0,72.0,0.0,60.0,0.0,72.0,0.0,68.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,2.0,26,6,4,0
1,3,30.0,0.0,170.0,0.0,310.0,0.0,420.0,0.0,360.0,0.0,480.0,0.0,310.0,0.0,455.0,0.0,455.0,0.0,345.0,0.0,600.0,0.0,380.0,0.0,560.0,0.0,720.0,0.0,600.0,0.0,360.0,0.0,640.0,0.0,600.0,0.0,600.0,0.0,800.0,0.0,600.0,0.0,670.0,0.0,630.0,0.0,510.0,0.0,540.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,23.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,9.0,18.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,8.0,4.0,0.0,0.0,0.0,0.0,28.0,1.0,0.0,0.0,5.0,4.0,0.0,0.0,0.0,1.0,26.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,28.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,26,0,0,0
2,4,0.0,16.0,0.0,152.0,0.0,192.0,0.0,160.0,0.0,64.0,0.0,256.0,0.0,256.0,0.0,160.0,0.0,128.0,0.0,192.0,0.0,128.0,0.0,128.0,0.0,288.0,0.0,128.0,0.0,160.0,0.0,96.0,0.0,224.0,0.0,128.0,0.0,192.0,0.0,192.0,0.0,256.0,0.0,32.0,0.0,160.0,0.0,128.0,0.0,32.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,30.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,8.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,1.0,2.0,26,4,1,0
3,24,90.0,0.0,730.0,0.0,1070.0,0.0,600.0,0.0,1080.0,0.0,840.0,0.0,840.0,0.0,860.0,0.0,780.0,0.0,910.0,0.0,780.0,0.0,1300.0,0.0,650.0,0.0,910.0,0.0,910.0,0.0,1170.0,0.0,910.0,0.0,540.0,0.0,1260.0,0.0,1120.0,0.0,840.0,0.0,490.0,0.0,1540.0,0.0,140.0,0.0,1540.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,22.0,0.0,0.0,30.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,26,22,13,0
4,27,0.0,8.0,0.0,40.0,0.0,72.0,0.0,40.0,0.0,40.0,0.0,48.0,0.0,40.0,0.0,40.0,0.0,16.0,0.0,56.0,0.0,8.0,0.0,48.0,0.0,24.0,0.0,32.0,0.0,40.0,0.0,40.0,0.0,32.0,0.0,32.0,0.0,24.0,0.0,0.0,0.0,4.0,0.0,40.0,0.0,8.0,0.0,60.0,0.0,24.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,26,0,0,0


In [34]:
# export new df to csv
new_df.to_csv('../notebooks/data/merged_inputed_data.csv', index=False)