## How to clean Concomitant Meds

This notebook demonstrates how to load and clean the concomitant medication table from the PPMI dataset.

In [None]:
import pandas as pd
from pie.data_loader import DataLoader
from pie.data_preprocessor import DataPreprocessor

In [None]:
# Load data, and check we have the Concomitant Meds
data = DataLoader.load("../PPMI", "PPMI")
cmeds_df = data["clinical"]["med_hist"]["Concomitant_Medication"]
print(cmeds_df.shape)
cmeds_df.head()

In [None]:
# Before cleaning, check the dtypes of the dates
print(cmeds_df["STARTDT"].dtype)
print(cmeds_df["STOPDT"].dtype)

In [None]:
# Before cleaning, count how many indication codes we have (45267 rows but fewer codes)
print(f"There are {cmeds_df['CMINDC'].isnull().sum()} nulls")
cmeds_df["CMINDC"].value_counts()

In [None]:
# Clean up the dates, and map the indication codes and reason text
clean_df = DataPreprocessor.clean_concomitant_meds(cmeds_df)

In [None]:
# After cleaning, dates are now datetimes
print(clean_df["STARTDT"].dtype)
print(clean_df["STOPDT"].dtype)

In [None]:
# After cleaning, all indication codes have been mapped, and converted to int
print(f"There are {clean_df['CMINDC'].isnull().sum()} nulls")
clean_df["CMINDC"].value_counts()