In [1]:
import pandas as pd
import pandas as pd
from typing import Dict, Tuple
from sklearn.preprocessing import KBinsDiscretizer
from joblib import Parallel, delayed

## Importing the Diabetes Dataset

In [2]:
df = pd.read_csv("diabetes.csv")

In [3]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [5]:
class DiabetesMDP:
    def __init__(self, data_path: str, gamma: float):
        self.data = pd.read_csv(data_path)
        self.gamma = gamma # Discount factor for future rewards
        self.state_columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 
                              'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
        self.reward_column = 'Outcome' # Binary outcome, representing positive or negative health

        self.preprocess_data()  # convert the data to bins
        self.build_mdp_components() # define reawrds for each state and transition probabilities

    def preprocess_data(self):
        # Discretize continuous variables
        self.discretizers = {}
        for col in self.state_columns:
            kbd = KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile')
            self.data[f'{col}_binned'] = kbd.fit_transform(self.data[[col]])
            self.discretizers[col] = kbd

        # Create a combined state column
        self.data['state'] = self.data[[f'{col}_binned' for col in self.state_columns]].apply(tuple, axis=1)

        # Define actions (simplified treatment approaches)
        self.data['action'] = pd.cut(self.data['Glucose'], bins=3, labels=['Diet', 'Oral Medication', 'Insulin'])

    def build_mdp_components(self):
        self.states = self.data['state'].unique()
        self.actions = self.data['action'].unique()

        # Build transition probabilities and rewards
        self.transition_prob = {}
        self.rewards = {}
        
        for i in range(len(self.data) - 1):  # Exclude the last row
            s = self.data.iloc[i]['state']
            a = self.data.iloc[i]['action']
            s_next = self.data.iloc[i + 1]['state']
            r = 1 if self.data.iloc[i][self.reward_column] == 0 else -1

            key = (s, a, s_next)
            self.transition_prob[key] = self.transition_prob.get(key, 0) + 1
            self.rewards[key] = r

        # Normalize transition probabilities
        for s in self.states:
            for a in self.actions:
                total = sum(self.transition_prob.get((s, a, s_next), 0) for s_next in self.states)
                if total > 0:
                    for s_next in self.states:
                        key = (s, a, s_next)
                        if key in self.transition_prob:
                            self.transition_prob[key] /= total

    def compute_state_value(self, s: tuple, V: Dict[tuple, float]) -> float:
        return max(sum(self.transition_prob.get((s, a, s_next), 0) * 
                       (self.rewards.get((s, a, s_next), 0) + self.gamma * V[s_next])
                       for s_next in self.states)
                   for a in self.actions)

    def value_iteration(self, theta: float = 0.0001, max_iterations: int = 2) -> Tuple[Dict[tuple, float], Dict[tuple, str]]:
        V = {s: 0 for s in self.states}
        
        for _ in range(max_iterations):
            new_V = Parallel(n_jobs=-1)(delayed(self.compute_state_value)(s, V) for s in self.states)
            new_V = dict(zip(self.states, new_V))
            
            delta = max(abs(new_V[s] - V[s]) for s in self.states)
            V = new_V
            
            if delta < theta:
                break

        policy = {}
        for s in self.states:
            policy[s] = max(self.actions, 
                            key=lambda a: sum(self.transition_prob.get((s, a, s_next), 0) * 
                                              (self.rewards.get((s, a, s_next), 0) + self.gamma * V[s_next])
                                              for s_next in self.states))

        return V, policy

    def interpret_policy(self, policy: Dict[tuple, str]) -> pd.DataFrame:
        rows = []
        for state, action in policy.items():
            row = dict(zip([f'{col}_binned' for col in self.state_columns], state))
            row['optimal_action'] = action
            rows.append(row)
        return pd.DataFrame(rows)

    def get_state_description(self, state: tuple) -> Dict[str, str]:
        description = {}
        for i, col in enumerate(self.state_columns):
            bin_edges = self.discretizers[col].bin_edges_[0]
            bin_value = int(state[i])  # Convert to int
            if bin_value < 0 or bin_value >= len(bin_edges) - 1:
                description[col] = "Unknown"
            elif bin_value == 0:
                description[col] = f"< {bin_edges[1]:.2f}"
            elif bin_value == len(bin_edges) - 2:
                description[col] = f"> {bin_edges[-2]:.2f}"
            else:
                description[col] = f"{bin_edges[bin_value]:.2f} - {bin_edges[bin_value+1]:.2f}"
        return description

In [11]:
mdp = DiabetesMDP(data_path='diabetes.csv', gamma=0.9)



In [12]:
mdp.data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,Pregnancies_binned,Glucose_binned,BloodPressure_binned,SkinThickness_binned,Insulin_binned,BMI_binned,DiabetesPedigreeFunction_binned,Age_binned,state,action
0,6,148,72,35,0,33.6,0.627,50,1,3.0,4.0,2.0,3.0,0.0,2.0,3.0,4.0,"(3.0, 4.0, 2.0, 3.0, 0.0, 2.0, 3.0, 4.0)",Insulin
1,1,85,66,29,0,26.6,0.351,31,0,1.0,0.0,1.0,2.0,0.0,1.0,2.0,2.0,"(1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 2.0)",Oral Medication
2,8,183,64,0,0,23.3,0.672,32,1,4.0,4.0,1.0,0.0,0.0,0.0,3.0,2.0,"(4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 3.0, 2.0)",Insulin
3,1,89,66,23,94,28.1,0.167,21,0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,"(1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0)",Oral Medication
4,0,137,40,35,168,43.1,2.288,33,1,0.0,3.0,0.0,3.0,2.0,4.0,4.0,3.0,"(0.0, 3.0, 0.0, 3.0, 2.0, 4.0, 4.0, 3.0)",Insulin
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0,4.0,1.0,3.0,3.0,2.0,2.0,0.0,4.0,"(4.0, 1.0, 3.0, 3.0, 2.0, 2.0, 0.0, 4.0)",Oral Medication
764,2,122,70,27,0,36.8,0.340,27,0,2.0,2.0,2.0,2.0,0.0,3.0,2.0,2.0,"(2.0, 2.0, 2.0, 2.0, 0.0, 3.0, 2.0, 2.0)",Oral Medication
765,5,121,72,23,112,26.2,0.245,30,0,3.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,"(3.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0)",Oral Medication
766,1,126,60,0,0,30.1,0.349,47,1,1.0,3.0,1.0,0.0,0.0,2.0,2.0,4.0,"(1.0, 3.0, 1.0, 0.0, 0.0, 2.0, 2.0, 4.0)",Oral Medication


In [13]:
optimal_values, optimal_policy = mdp.value_iteration()
policy_df = mdp.interpret_policy(optimal_policy)

In [15]:
print("Sample of Optimal Policy:")
policy_df.head()

Sample of Optimal Policy:


Unnamed: 0,Pregnancies_binned,Glucose_binned,BloodPressure_binned,SkinThickness_binned,Insulin_binned,BMI_binned,DiabetesPedigreeFunction_binned,Age_binned,optimal_action
0,3.0,4.0,2.0,3.0,0.0,2.0,3.0,4.0,Oral Medication
1,1.0,0.0,1.0,2.0,0.0,1.0,2.0,2.0,Oral Medication
2,4.0,4.0,1.0,0.0,0.0,0.0,3.0,2.0,Oral Medication
3,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,Oral Medication
4,0.0,3.0,0.0,3.0,2.0,4.0,4.0,3.0,Oral Medication


In [17]:
print("\nMost common optimal action:")
policy_df['optimal_action'].value_counts().head()


Most common optimal action:


optimal_action
Oral Medication    528
Insulin            221
Diet                12
Name: count, dtype: int64

In [25]:
print("\nOptimal action for high glucose levels:")
high_glucose = policy_df[policy_df['Glucose_binned'] == 4]['optimal_action'].value_counts().head(1)
print(high_glucose)


Optimal action for high glucose levels:
optimal_action
Insulin    88
Name: count, dtype: int64


In [22]:
# Interpret a specific state
sample_state = tuple(policy_df.iloc[0][:-1].tolist())  # Convert to list then to tuple
state_description = mdp.get_state_description(sample_state)
optimal_action = policy_df.iloc[0]['optimal_action']

print("\nSample State Interpretation:\n")
for feature, value in state_description.items():
    print(f"{feature}: {value}")


Sample State Interpretation:

Pregnancies: 4.00 - 7.00
Glucose: > 147.00
BloodPressure: 68.00 - 74.00
SkinThickness: > 35.00
Insulin: < 72.20
BMI: 30.10 - 33.70
DiabetesPedigreeFunction: 0.45 - 0.69
Age: > 42.60


In [20]:
print(f"Optimal Action: {optimal_action}")

Optimal Action: Oral Medication
