# MDR Analysis Functions

This notebook contains functions to create new variables based on ICD code substrings from a specified range of existing variables. It includes handling of periods in ICD codes.

In [1]:
import pandas as pd

## Sample Data

In [2]:
# Sample data for demonstration
data = {
    'DX1': ['51881', '51882', '51884', '7991', 'M170', 'M171'],
    'DX2': ['M1710', 'M1711', 'M1712', '51881', '51882', '51884'],
    'DX3': ['7991', 'M170', 'M171', 'M1710', 'M1711', 'M1712']
}

# Create a DataFrame
df = pd.DataFrame(data)
df

## Function to Create Variable Based on ICD Code Substrings

In [3]:
def create_var_based_on_subcodes(df, codes, var_range, newvar):
    """
    Create a new variable based on ICD code substrings.
    
    Parameters:
    df (pd.DataFrame): The DataFrame containing the data.
    codes (list): A list of truncated ICD codes to match.
    var_range (list): A list of existing variables to search through.
    newvar (str): The name of the new variable to be created.
    
    Returns:
    pd.DataFrame: The DataFrame with the new variable added.
    """
    df[newvar] = 0
    for var in var_range:
        for code in codes:
            df[newvar] = df[newvar] | df[var].str.startswith(code)
    return df

## Function to Create Variable Based on ICD Code Substrings and Clean Periods

In [4]:
def create_var_based_on_subcodes_with_period(df, codes, var_range, newvar):
    """
    Create a new variable based on ICD code substrings and clean periods.
    
    Parameters:
    df (pd.DataFrame): The DataFrame containing the data.
    codes (list): A list of truncated ICD codes to match.
    var_range (list): A list of existing variables to search through.
    newvar (str): The name of the new variable to be created.
    
    Returns:
    pd.DataFrame: The DataFrame with the new variable added.
    """
    df[newvar] = 0
    for var in var_range:
        df[var] = df[var].str.replace('.', '', regex=False)
        for code in codes:
            df[newvar] = df[newvar] | df[var].str.startswith(code)
    return df

## Example Usage

In [5]:
# Example usage
codes = ["51881", "51882", "51884", "7991"]
var_range = ["DX1", "DX2", "DX3"]

df = create_var_based_on_subcodes(df, codes, var_range, "pc_respiratory_dx")
df = create_var_based_on_subcodes_with_period(df, codes, var_range, "pc_respiratory_dx_cleaned")

df

     DX1    DX2    DX3  pc_respiratory_dx  pc_respiratory_dx_cleaned
0  51881  M1710   7991               True                       True
1  51882  M1711   M170               True                       True
2  51884  M1712   M171               True                       True
3   7991  51881  M1710               True                       True
4   M170  51882  M1711               True                       True
5   M171  51884  M1712               True                       True