In [8]:
import pandas as pd
import statistics
import numpy as np

# Declaration
data = pd.read_csv("modified_healthcare_dataset.csv")

target_col = ['Age','Billing Amount','Length of Stay']

for dt in target_col:
    # Convert to int
    data[dt] = pd.to_numeric(data[dt], errors='coerce')
    
    # Remove NaN
    data = data.dropna(subset=[dt])

    max_val = data[dt].max()
    min_val = data[dt].min()
    range_val = max_val - min_val
    mean = round(data[dt].mean(),2)
    mode = round(statistics.mode(data[dt]),2)
    var = round(np.nanvar(data[dt],ddof=1),2)
    std = round(var ** 0.5,2)

    print(f"Max of {dt.title()} : {max_val}")
    print(f"Min of {dt.title()} : {min_val}")
    print(f"Range of {dt.title()} : {range_val}")
    print(f"Mean of {dt.title()} : {mean}")
    print(f"Mode of {dt.title()} : {mode}")
    print(f"Variance of {dt.title()} : {var}")
    print(f"Standard Deviance of {dt.title()} : {std}\n")

Max of Age : 90
Min of Age : 5
Range of Age : 85
Mean of Age : 48.0
Mode of Age : 60
Variance of Age : 445.46
Standard Deviance of Age : 21.11

Max of Billing Amount : 99997.79797710298
Min of Billing Amount : 500.22098943266377
Range of Billing Amount : 99497.57698767031
Mean of Billing Amount : 21835.04
Mode of Billing Amount : 2212.27
Variance of Billing Amount : 555752976.28
Standard Deviance of Billing Amount : 23574.41

Max of Length Of Stay : 89
Min of Length Of Stay : 1
Range of Length Of Stay : 88
Mean of Length Of Stay : 17.84
Mode of Length Of Stay : 3
Variance of Length Of Stay : 410.97
Standard Deviance of Length Of Stay : 20.27



In [5]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic for Average Length of Stay, Billing Ammount, and Age per Medical Condition
data = pd.read_csv("modified_healthcare_dataset.csv")

# Unique values for Medical Condition
age_groups = data['Medical Condition'].unique().tolist()

avg_columns = ["Length of Stay","Billing Amount","Age"]

results = []
for age in age_groups:
    age_data = data[data['Medical Condition'] == age]
    total_patient = age_data['Name'].count()
    
    result_row = {
        'Medical Condition': age,
        'Total Patient': total_patient,
    }

    for col in avg_columns:
        result_row[f'Average {col}'] = round(age_data[col].mean(), 2)

    results.append(result_row)

# Sort
res_df = pd.DataFrame(results)
res_df = res_df.sort_values(by='Total Patient', ascending=False)
res_df

Unnamed: 0,Medical Condition,Total Patient,Average Length of Stay,Average Billing Amount,Average Age
1,Flu,7046,2.5,2744.15,27.38
6,Diabetes,7005,8.06,12503.19,54.96
7,Obesity,6994,5.97,10055.56,45.01
2,Cancer,6940,36.54,64537.09,64.92
3,Asthma,6908,3.5,5025.35,24.93
4,Heart Disease,6900,26.86,44913.43,64.92
5,Alzheimer’s,6861,54.42,32543.54,74.97
0,Infections,6846,5.52,2747.57,27.23
