In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

In [20]:
dementia = pd.read_csv("/Users/john/Downloads/fdata.csv")

In [21]:
dementia['ADMIT'] = dementia['ADMITTIME'].str.slice(0, 4).astype(int)
dementia['DOBYR'] = dementia['DOB'].str.slice(0, 4).astype(int)

dementia['AGE'] = dementia['ADMIT'] - dementia['DOBYR']

RandomForest

In [22]:
age_categories = [(65, 75), (75, 85), (85, float('inf'))]
disease_burdens = [(1, 8), (8, 13), (13, 39)]

data = dementia[(dementia['Dementia'] == 1) & (dementia['HOSPITAL_EXPIRE_FLAG'] == 0)]

results = {}
summaries = {}

for age_category in age_categories:
    age_lower, age_upper = age_category
    age_data = data[(data['AGE'] >= age_lower) & (data['AGE'] <= age_upper)]
    
    for disease_burden in disease_burdens:
        lower_bound, upper_bound = disease_burden
        category_data = age_data[(age_data['disease_burden'] >= lower_bound) & (age_data['disease_burden'] <= upper_bound)]['length_of_stay'].dropna()
        
        if len(category_data) > 0:
            train_data, test_data = train_test_split(category_data, test_size=0.2, random_state=42)
            
            train_data = train_data.values.reshape(-1, 1)
            test_data = test_data.values.reshape(-1, 1)

            model = RandomForestRegressor(random_state=42)
            model.fit(train_data, train_data.ravel())

            predictions = model.predict(test_data)
            mse = mean_squared_error(test_data, predictions)

            key = f'Age {age_lower}-{age_upper} Disease Burden {lower_bound}-{upper_bound}'
            results[key] = model
            summaries[key] = mse

print("With Dementia and Released from Hospital")
print("Simplified Summary of Hospital Stay by Age and Disease Burden\n")
for key, mse in summaries.items():
    print(f"{key}:")
    print(f"  - Mean Squared Error on Test Data: {mse:.2f}\n")


With Dementia and Released from Hospital
Simplified Summary of Hospital Stay by Age and Disease Burden

Age 65-75 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.06

Age 65-75 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.13

Age 65-75 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 4.56

Age 75-85 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.04

Age 75-85 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.04

Age 75-85 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 68.76

Age 85-inf Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.77

Age 85-inf Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.68

Age 85-inf Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.34



In [23]:
age_categories = [(65, 75), (75, 85), (85, float('inf'))]
disease_burdens = [(1, 8), (8, 13), (13, 39)]

data = dementia[(dementia['Dementia'] == 1) & (dementia['HOSPITAL_EXPIRE_FLAG'] == 1)]

results = {}
summaries = {}

for age_category in age_categories:
    age_lower, age_upper = age_category
    age_data = data[(data['AGE'] >= age_lower) & (data['AGE'] <= age_upper)]
    
    for disease_burden in disease_burdens:
        lower_bound, upper_bound = disease_burden
        category_data = age_data[(age_data['disease_burden'] >= lower_bound) & (age_data['disease_burden'] <= upper_bound)]['length_of_stay'].dropna()
        
        if len(category_data) > 0:
            train_data, test_data = train_test_split(category_data, test_size=0.2, random_state=42)
            
            train_data = train_data.values.reshape(-1, 1)
            test_data = test_data.values.reshape(-1, 1)

            model = RandomForestRegressor(random_state=42)
            model.fit(train_data, train_data.ravel())

            predictions = model.predict(test_data)
            mse = mean_squared_error(test_data, predictions)

            key = f'Age {age_lower}-{age_upper} Disease Burden {lower_bound}-{upper_bound}'
            results[key] = model
            summaries[key] = mse

print("With Dementia and Died In Hospital")
print("Simplified Summary of Hospital Stay by Age and Disease Burden\n")
for key, mse in summaries.items():
    print(f"{key}:")
    print(f"  - Mean Squared Error on Test Data: {mse:.2f}\n")


With Dementia and Died In Hospital
Simplified Summary of Hospital Stay by Age and Disease Burden

Age 65-75 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 164.89

Age 65-75 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 2.47

Age 75-85 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 15.51

Age 75-85 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.62

Age 75-85 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.06

Age 85-inf Disease Burden 1-8:
  - Mean Squared Error on Test Data: 79.62

Age 85-inf Disease Burden 8-13:
  - Mean Squared Error on Test Data: 4.80

Age 85-inf Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.02



In [24]:
age_categories = [(65, 75), (75, 85), (85, float('inf'))]
disease_burdens = [(1, 8), (8, 13), (13, 39)]

data = dementia[dementia['Dementia'] == 0]
data = data[data['HOSPITAL_EXPIRE_FLAG'] == 0]
data = data[data['length_of_stay'] > 0]

results = {}
summaries = {}

for age_category in age_categories:
    age_lower, age_upper = age_category
    age_data = data[(data['AGE'] >= age_lower) & (data['AGE'] <= age_upper)]
    
    for disease_burden in disease_burdens:
        lower_bound, upper_bound = disease_burden
        category_data = age_data[(age_data['disease_burden'] >= lower_bound) & (age_data['disease_burden'] <= upper_bound)]['length_of_stay'].dropna()
        
        if len(category_data) > 0:
            train_data, test_data = train_test_split(category_data, test_size=0.2, random_state=42)
            
            train_data = train_data.values.reshape(-1, 1)
            test_data = test_data.values.reshape(-1, 1)

            model = RandomForestRegressor(random_state=42)
            model.fit(train_data, train_data.ravel())

            predictions = model.predict(test_data)
            mse = mean_squared_error(test_data, predictions)

            key = f'Age {age_lower}-{age_upper} Disease Burden {lower_bound}-{upper_bound}'
            results[key] = model
            summaries[key] = mse

print("Without Dementia and Released from Hospital")
print("Simplified Summary of Hospital Stay by Age and Disease Burden\n")
for key, mse in summaries.items():
    print(f"{key}:")
    print(f"  - Mean Squared Error on Test Data: {mse:.2f}\n")


Without Dementia and Released from Hospital
Simplified Summary of Hospital Stay by Age and Disease Burden

Age 65-75 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.00

Age 65-75 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.01

Age 65-75 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 2.60

Age 75-85 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.06

Age 75-85 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.02

Age 75-85 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.23

Age 85-inf Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.00

Age 85-inf Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.06

Age 85-inf Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.04



In [25]:
age_categories = [(65, 75), (75, 85), (85, float('inf'))]
disease_burdens = [(1, 8), (8, 13), (13, 39)]

data = dementia[dementia['Dementia'] == 0]
data = data[data['HOSPITAL_EXPIRE_FLAG'] == 1]
data = data[data['length_of_stay'] > 0]

results = {}
summaries = {}

for age_category in age_categories:
    age_lower, age_upper = age_category
    age_data = data[(data['AGE'] >= age_lower) & (data['AGE'] <= age_upper)]
    
    for disease_burden in disease_burdens:
        lower_bound, upper_bound = disease_burden
        category_data = age_data[(age_data['disease_burden'] >= lower_bound) & (age_data['disease_burden'] <= upper_bound)]['length_of_stay'].dropna()
        
        if len(category_data) > 0:
            train_data, test_data = train_test_split(category_data, test_size=0.2, random_state=42)
            
            train_data = train_data.values.reshape(-1, 1)
            test_data = test_data.values.reshape(-1, 1)

            model = RandomForestRegressor(random_state=42)
            model.fit(train_data, train_data.ravel())

            predictions = model.predict(test_data)
            mse = mean_squared_error(test_data, predictions)

            key = f'Age {age_lower}-{age_upper} Disease Burden {lower_bound}-{upper_bound}'
            results[key] = model
            summaries[key] = mse

print("Without Dementia and Died In Hospital")
print("Simplified Summary of Hospital Stay by Age and Disease Burden\n")
for key, mse in summaries.items():
    print(f"{key}:")
    print(f"  - Mean Squared Error on Test Data: {mse:.2f}\n")


Without Dementia and Died In Hospital
Simplified Summary of Hospital Stay by Age and Disease Burden

Age 65-75 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.26

Age 65-75 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.31

Age 65-75 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.47

Age 75-85 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 10.68

Age 75-85 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.02

Age 75-85 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 159.52

Age 85-inf Disease Burden 1-8:
  - Mean Squared Error on Test Data: 6.82

Age 85-inf Disease Burden 8-13:
  - Mean Squared Error on Test Data: 1.67

Age 85-inf Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.01



Gradient Boosting Regressor

In [26]:
age_categories = [(65, 75), (75, 85), (85, float('inf'))]
disease_burdens = [(1, 8), (8, 13), (13, 39)]

data = dementia[(dementia['Dementia'] == 1) & (dementia['HOSPITAL_EXPIRE_FLAG'] == 0)]

results = {}
summaries = {}

for age_category in age_categories:
    age_lower, age_upper = age_category
    age_data = data[(data['AGE'] >= age_lower) & (data['AGE'] <= age_upper)]
    
    for disease_burden in disease_burdens:
        lower_bound, upper_bound = disease_burden
        category_data = age_data[(age_data['disease_burden'] >= lower_bound) & (age_data['disease_burden'] <= upper_bound)]['length_of_stay'].dropna()
        
        if len(category_data) > 0:
            train_data, test_data = train_test_split(category_data, test_size=0.2, random_state=42)
            
            train_data = train_data.values.reshape(-1, 1)
            test_data = test_data.values.reshape(-1, 1)

            model = GradientBoostingRegressor(random_state=42)
            model.fit(train_data, train_data.ravel())

            predictions = model.predict(test_data)
            mse = mean_squared_error(test_data, predictions)

            key = f'Age {age_lower}-{age_upper} Disease Burden {lower_bound}-{upper_bound}'
            results[key] = model
            summaries[key] = mse

print("With Dementia and Released from Hospital")
print("Simplified Summary of Hospital Stay by Age and Disease Burden\n")
for key, mse in summaries.items():
    print(f"{key}:")
    print(f"  - Mean Squared Error on Test Data: {mse:.2f}\n")


With Dementia and Released from Hospital
Simplified Summary of Hospital Stay by Age and Disease Burden

Age 65-75 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.06

Age 65-75 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.28

Age 65-75 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 3.61

Age 75-85 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.03

Age 75-85 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.05

Age 75-85 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 30.23

Age 85-inf Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.47

Age 85-inf Disease Burden 8-13:
  - Mean Squared Error on Test Data: 3.81

Age 85-inf Disease Burden 13-39:
  - Mean Squared Error on Test Data: 1.57



In [27]:
age_categories = [(65, 75), (75, 85), (85, float('inf'))]
disease_burdens = [(1, 8), (8, 13), (13, 39)]

data = dementia[(dementia['Dementia'] == 1) & (dementia['HOSPITAL_EXPIRE_FLAG'] == 1)]

results = {}
summaries = {}

for age_category in age_categories:
    age_lower, age_upper = age_category
    age_data = data[(data['AGE'] >= age_lower) & (data['AGE'] <= age_upper)]
    
    for disease_burden in disease_burdens:
        lower_bound, upper_bound = disease_burden
        category_data = age_data[(age_data['disease_burden'] >= lower_bound) & (age_data['disease_burden'] <= upper_bound)]['length_of_stay'].dropna()
        
        if len(category_data) > 0:
            train_data, test_data = train_test_split(category_data, test_size=0.2, random_state=42)
            
            train_data = train_data.values.reshape(-1, 1)
            test_data = test_data.values.reshape(-1, 1)

            model = GradientBoostingRegressor(random_state=42)
            model.fit(train_data, train_data.ravel())

            predictions = model.predict(test_data)
            mse = mean_squared_error(test_data, predictions)

            key = f'Age {age_lower}-{age_upper} Disease Burden {lower_bound}-{upper_bound}'
            results[key] = model
            summaries[key] = mse

print("With Dementia and Died In Hospital")
print("Simplified Summary of Hospital Stay by Age and Disease Burden\n")
for key, mse in summaries.items():
    print(f"{key}:")
    print(f"  - Mean Squared Error on Test Data: {mse:.2f}\n")


With Dementia and Died In Hospital
Simplified Summary of Hospital Stay by Age and Disease Burden

Age 65-75 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 127.50

Age 65-75 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 37.51

Age 75-85 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 12.00

Age 75-85 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 1.21

Age 75-85 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.11

Age 85-inf Disease Burden 1-8:
  - Mean Squared Error on Test Data: 77.29

Age 85-inf Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.75

Age 85-inf Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.13



In [28]:
age_categories = [(65, 75), (75, 85), (85, float('inf'))]
disease_burdens = [(1, 8), (8, 13), (13, 39)]

data = dementia[dementia['Dementia'] == 0]
data = data[data['HOSPITAL_EXPIRE_FLAG'] == 0]
data = data[data['length_of_stay'] > 0]

results = {}
summaries = {}

for age_category in age_categories:
    age_lower, age_upper = age_category
    age_data = data[(data['AGE'] >= age_lower) & (data['AGE'] <= age_upper)]
    
    for disease_burden in disease_burdens:
        lower_bound, upper_bound = disease_burden
        category_data = age_data[(age_data['disease_burden'] >= lower_bound) & (age_data['disease_burden'] <= upper_bound)]['length_of_stay'].dropna()
        
        if len(category_data) > 0:
            train_data, test_data = train_test_split(category_data, test_size=0.2, random_state=42)
            
            train_data = train_data.values.reshape(-1, 1)
            test_data = test_data.values.reshape(-1, 1)

            model = GradientBoostingRegressor(random_state=42)
            model.fit(train_data, train_data.ravel())

            predictions = model.predict(test_data)
            mse = mean_squared_error(test_data, predictions)

            key = f'Age {age_lower}-{age_upper} Disease Burden {lower_bound}-{upper_bound}'
            results[key] = model
            summaries[key] = mse

print("Without Dementia and Released from Hospital")
print("Simplified Summary of Hospital Stay by Age and Disease Burden\n")
for key, mse in summaries.items():
    print(f"{key}:")
    print(f"  - Mean Squared Error on Test Data: {mse:.2f}\n")


Without Dementia and Released from Hospital
Simplified Summary of Hospital Stay by Age and Disease Burden

Age 65-75 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.00

Age 65-75 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.02

Age 65-75 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.05

Age 75-85 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.00

Age 75-85 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.03

Age 75-85 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.02

Age 85-inf Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.00

Age 85-inf Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.08

Age 85-inf Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.05



In [29]:
age_categories = [(65, 75), (75, 85), (85, float('inf'))]
disease_burdens = [(1, 8), (8, 13), (13, 39)]

data = dementia[dementia['Dementia'] == 0]
data = data[data['HOSPITAL_EXPIRE_FLAG'] == 1]
data = data[data['length_of_stay'] > 0]

results = {}
summaries = {}

for age_category in age_categories:
    age_lower, age_upper = age_category
    age_data = data[(data['AGE'] >= age_lower) & (data['AGE'] <= age_upper)]
    
    for disease_burden in disease_burdens:
        lower_bound, upper_bound = disease_burden
        category_data = age_data[(age_data['disease_burden'] >= lower_bound) & (age_data['disease_burden'] <= upper_bound)]['length_of_stay'].dropna()
        
        if len(category_data) > 0:
            train_data, test_data = train_test_split(category_data, test_size=0.2, random_state=42)
            
            train_data = train_data.values.reshape(-1, 1)
            test_data = test_data.values.reshape(-1, 1)

            model = GradientBoostingRegressor(random_state=42)
            model.fit(train_data, train_data.ravel())

            predictions = model.predict(test_data)
            mse = mean_squared_error(test_data, predictions)

            key = f'Age {age_lower}-{age_upper} Disease Burden {lower_bound}-{upper_bound}'
            results[key] = model
            summaries[key] = mse

print("Without Dementia and Died In Hospital")
print("Simplified Summary of Hospital Stay by Age and Disease Burden\n")
for key, mse in summaries.items():
    print(f"{key}:")
    print(f"  - Mean Squared Error on Test Data: {mse:.2f}\n")


Without Dementia and Died In Hospital
Simplified Summary of Hospital Stay by Age and Disease Burden

Age 65-75 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 0.17

Age 65-75 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.38

Age 65-75 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.30

Age 75-85 Disease Burden 1-8:
  - Mean Squared Error on Test Data: 8.49

Age 75-85 Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.21

Age 75-85 Disease Burden 13-39:
  - Mean Squared Error on Test Data: 114.19

Age 85-inf Disease Burden 1-8:
  - Mean Squared Error on Test Data: 5.80

Age 85-inf Disease Burden 8-13:
  - Mean Squared Error on Test Data: 0.65

Age 85-inf Disease Burden 13-39:
  - Mean Squared Error on Test Data: 0.01

