In [24]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_excel("Inpatient Data.xlsx")

# Standardize column names so they match
df.rename(columns={
    'Patient_ID': 'PatientID',
    'Discarge_status': 'DischargeStatus',
    'sex': 'Sex',
    'age': 'Age',
    'ComorbidityCount': 'ChronicConditionsCount',
    'FamilySupport': 'FamilySupportScore',
    'InsurancePolicy_ID': 'InsurancePolicyID'
}, inplace=True)

# <('.'<) ^('.'^) (>'.')>
# Part 1: Data Loading and Cleaning
# (>'.')> (^'.')^ <('.'<)

# Show first 10 rows
print("First 10 rows:") 
print(df.head(10))

# Summarize dataset (structure, missing values, stats)
print("\nDataset Info:")
df.info()

print("\nSummary Statistics:")
print(df.describe())

# Fill missing values in Age with median
median_age = df['Age'].median()
df['Age'] = df['Age'].fillna(median_age)

# Drop rows where AdmissionSource is missing
df = df.dropna(subset=['AdmissionSource'])

# <('.'<) ^('.'^) (>'.')>
# Part 2: Statistical Analysis
#  >'.')> (^'.')^ <('.'<)

# Average age of patients
avg_age = df['Age'].mean()
print("\nAverage Age of Patients:", avg_age)

# Discharge status counts for each care level
discharge_status = df.groupby('CareLevel')['DischargeStatus'].value_counts()
print("\nDischarge Status by Care Level:", discharge_status)

# Average hospital bill by outcome (1 = discharged, 0 = expired)
avg_bill_outcome = df.groupby('DischargeStatus')['HospitalBill'].mean()
print("\nAverage Hospital Bill by Outcome:", avg_bill_outcome)

# Correlation between hospital bill and age (using numpy)
corr = np.corrcoef(df['HospitalBill'], df['Age'])[0,1]
print("\nCorrelation between Hospital Bill and Age:", corr)
# There is little correlation between Hospital Bill and Age. ~0.09

First 10 rows:
   PatientID  DischargeStatus  CareLevel  \
0          1                0          3   
1          2                1          1   
2          3                1          3   
3          4                1          1   
4          5                0          3   
5          6                0          3   
6          7                0          1   
7          8                0          3   
8          9                1          3   
9         10                1          2   

                                                name     Sex   Age  \
0                            Braund, Mr. Owen Harris    male  22.0   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0   
2                             Heikkinen, Miss. Laina  female  26.0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0   
4                           Allen, Mr. William Henry    male  35.0   
5                                   Moran, Mr. James    male   NaN   
6         