In [1]:
import pickle
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the trained model
with open('models/random_forest_model.pkl', 'rb') as file:
    rf_model = pickle.load(file)


In [3]:
columns_to_scale = ['age', 'bmi', 'HbA1c_level', 'blood_glucose_level']
training_data_statistics = {
    'age': {'mean': 45, 'std': 10},
    'bmi': {'mean': 25, 'std': 5},
    'HbA1c_level': {'mean': 6.0, 'std': 1.0},
    'blood_glucose_level': {'mean': 120, 'std': 15}
}

scaler = StandardScaler()
scaler.mean_ = [training_data_statistics[col]['mean'] for col in columns_to_scale]
scaler.scale_ = [training_data_statistics[col]['std'] for col in columns_to_scale]

# Create a small made-up dataset with the same columns
sample_data = {
    'age': [35, 60, 25],
    'hypertension': [0, 1, 0],
    'heart_disease': [0, 1, 0],
    'smoking_history': [2, 0, 1],  # Encoded: 2 = never, 0 = No Info, etc.
    'bmi': [22.5, 30.2, 18.7],
    'HbA1c_level': [5.8, 7.2, 6.1],
    'blood_glucose_level': [110, 140, 85],
    'age_group': [1, 2, 0],  # Encoded: 0 = Child, 1 = Adult, etc.
    'gender_Female': [1, 0, 0],
    'gender_Male': [0, 1, 1],
    'gender_Other': [0, 0, 0],
    'Male_Child': [0, 0, 0],
    'Male_Adult': [0, 1, 1],
    'Male_Old': [0, 0, 0],
    'Female_Child': [0, 0, 0],
    'Female_Adult': [1, 0, 0],
    'Female_Old': [0, 0, 0],
    'Other_Child': [0, 0, 0],
    'Other_Adult': [0, 0, 0],
    'Other_Old': [0, 0, 0],
    'age_squared': [1225, 3600, 625],
    'bmi_category': [3, 2, 1],  # Encoded: 1 = Underweight, 2 = Normal weight, etc.
    'HbA1c_above_6_5': [0, 1, 0],
    'blood_glucose_above_126': [0, 1, 0]
}

# Convert to DataFrame
sample_df = pd.DataFrame(sample_data)

# Apply the scaler to the numerical columns
sample_df[columns_to_scale] = scaler.transform(sample_df[columns_to_scale])

# Predict using the trained model
predictions = rf_model.predict(sample_df)

# Add predictions to the dataframe
sample_df['predicted_diabetes'] = predictions

# Display the sample data with predictions
print(sample_df)


   age  hypertension  heart_disease  smoking_history   bmi  HbA1c_level  \
0 -1.0             0              0                2 -0.50         -0.2   
1  1.5             1              1                0  1.04          1.2   
2 -2.0             0              0                1 -1.26          0.1   

   blood_glucose_level  age_group  gender_Female  gender_Male  ...  \
0            -0.666667          1              1            0  ...   
1             1.333333          2              0            1  ...   
2            -2.333333          0              0            1  ...   

   Female_Adult  Female_Old  Other_Child  Other_Adult  Other_Old  age_squared  \
0             1           0            0            0          0         1225   
1             0           0            0            0          0         3600   
2             0           0            0            0          0          625   

   bmi_category  HbA1c_above_6_5  blood_glucose_above_126  predicted_diabetes  
0            

