In [5]:
import pandas as pd
import altair as alt
import warnings
warnings.filterwarnings('ignore')

genetic = pd.read_csv('test_genetic_disorders.csv')

# quantitative columns for visuals 1 and 2
blood_cell_col = 'Blood cell count (mcL)'
white_cell_col = 'White Blood cell count (thousand per microliter)'

# categorical columns for visuals 1 and 2
gender_col = 'Gender'
status_col = 'Status'
blood_test_col = 'Blood test result'
birth_defects_col = 'Birth defects'
inherit_mother_col = "Genes in mother's side"
inherit_father_col = 'Inherited from father'

# all columns needed for this analysis
all_cols = [blood_cell_col, white_cell_col, gender_col, status_col, 
            blood_test_col, birth_defects_col, inherit_mother_col, inherit_father_col]

genetic_clean = genetic[all_cols].copy()

# replacing -99 (missing indicator) with NA for all columns
for col in all_cols:
    genetic_clean[col] = genetic_clean[col].replace(-99, pd.NA)

# droping rows with any missing values
genetic_clean = genetic_clean.dropna()

def get_inheritance_source(row):
    mother = row[inherit_mother_col]
    father = row[inherit_father_col]
    
    if mother == 'Yes' and father == 'Yes':
        return 'Both'
    elif mother == 'Yes':
        return 'Maternal'
    elif father == 'Yes':
        return 'Paternal'
    else:
        return 'Neither'

genetic_clean['Inheritance_Source'] = genetic_clean.apply(get_inheritance_source, axis=1)
