In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
from google.colab import drive

# Step 1: Generate Realistic Synthetic Data
num_samples = 20000

np.random.seed(42)

def generate_realistic_heart_rate():
    return np.random.normal(loc=75, scale=5, size=num_samples)

def generate_realistic_hrv_mean():
    return np.random.normal(loc=50, scale=15, size=num_samples)

def generate_realistic_bp_systolic():
    return np.random.normal(loc=120, scale=10, size=num_samples)

def generate_realistic_bp_diastolic():
    return np.random.normal(loc=80, scale=5, size=num_samples)

def generate_target():
    return np.random.choice([0, 1], size=num_samples, p=[0.6, 0.4])  # Increased high-risk samples to 40%

avg_heart_rate = generate_realistic_heart_rate()
hrv_mean = generate_realistic_hrv_mean()
bp_systolic = generate_realistic_bp_systolic()
bp_diastolic = generate_realistic_bp_diastolic()
target = generate_target()

data = pd.DataFrame({
    'average_heart_rate': avg_heart_rate,
    'hrv_mean': hrv_mean,
    'blood_pressure_systolic': bp_systolic,
    'blood_pressure_diastolic': bp_diastolic,
    'target': target
})

# Save synthetic data to CSV (optional)
data.to_csv('synthetic_gbm_data.csv', index=False)

# Step 2: Authenticate and Mount Google Drive
drive.mount('/content/drive')

# Step 3: Preprocess Data
X = data.drop('target', axis=1)
y = data['target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train the Model
model = XGBClassifier(scale_pos_weight=2)  # Adjusted to give more weight to high-risk samples
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]
y_pred_adjusted = (y_pred_proba > 0.3).astype(int)  # Lowered threshold for high-risk classification

accuracy = accuracy_score(y_test, y_pred_adjusted)
print(f'Accuracy: {accuracy}')

# Print detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred_adjusted))

# Step 5: Save the Model to Google Drive
model_filename = '/content/drive/MyDrive/gbm_model.pkl'
joblib.dump(model, model_filename)
print(f"Model saved to {model_filename}")

Mounted at /content/drive
Accuracy: 0.41925

Classification Report:
              precision    recall  f1-score   support

           0       0.61      0.07      0.13      2386
           1       0.40      0.93      0.56      1614

    accuracy                           0.42      4000
   macro avg       0.51      0.50      0.35      4000
weighted avg       0.53      0.42      0.31      4000

Model saved to /content/drive/MyDrive/gbm_model.pkl


In [None]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.0-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting nvidia-nccl-cu12 (from xgboost)
  Downloading nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.0 kB)
Downloading xgboost-3.0.0-py3-none-manylinux_2_28_x86_64.whl (253.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.9/253.9 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (201.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m201.3/201.3 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nvidia-nccl-cu12, xgboost
Successfully installed nvidia-nccl-cu12-2.26.2 xgboost-3.0.0


In [None]:
import pandas as pd
import numpy as np
import joblib
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load the saved model
model_filename = '/content/drive/MyDrive/project models/gbm_model.pkl'
loaded_model = joblib.load(model_filename)
print(f"Model loaded from {model_filename}")

# Generate new data for inference (you can replace this with real data)
num_samples = 5

def generate_inference_data():
    return pd.DataFrame({
        'average_heart_rate': np.random.normal(loc=75, scale=5, size=num_samples),
        'hrv_mean': np.random.normal(loc=50, scale=15, size=num_samples),
        'blood_pressure_systolic': np.random.normal(loc=120, scale=10, size=num_samples),
        'blood_pressure_diastolic': np.random.normal(loc=80, scale=5, size=num_samples)
    })

new_data = generate_inference_data()

# Make predictions
predictions = loaded_model.predict(new_data)
probabilities = loaded_model.predict_proba(new_data)[:, 1]

# Add predictions to the DataFrame
new_data['predicted_risk'] = predictions
new_data['risk_probability'] = probabilities

# Print results
print("\nInference Results:")
print(new_data)
print("\nPredicted Risk: 0 = Low Risk, 1 = High Risk")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Model loaded from /content/drive/MyDrive/project models/gbm_model.pkl

Inference Results:
   average_heart_rate   hrv_mean  blood_pressure_systolic  \
0           73.176956  34.729675               115.652572   
1           67.670995  44.131139               118.964901   
2           78.892273  53.551864               118.200728   
3           74.457990  31.924377               106.539584   
4           74.716125  47.482336               128.486420   

   blood_pressure_diastolic  predicted_risk  risk_probability  
0                 67.676162               1          0.669645  
1                 81.070482               0          0.452906  
2                 85.199407               0          0.327507  
3                 79.579816               1          0.677214  
4                 73.109857               1          0.717186  

Predicted Risk: 0 = Low Risk,