In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
from sklearn.ensemble import VotingClassifier

# Load the dataset
data = pd.read_csv("/content/Hypertension-risk-model-main.csv")

# Drop columns with high correlation or not needed
data.dropna(inplace=True)
data = data.drop(columns=['cigsPerDay', 'glucose', 'diabetes'])

# Separate features and target
X = data.drop(columns=['Risk'])
y = data['Risk']

# One-hot encode categorical variables (if any)
X = pd.get_dummies(X, drop_first=True)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the classifiers
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
svm_model = SVC(kernel='linear', probability=True, random_state=42)

# Create an ensemble model using VotingClassifier
voting_model = VotingClassifier(estimators=[
    ('random_forest', rf_model),
    ('svm', svm_model)
], voting='soft')  # 'soft' to consider predicted probabilities for averaging

# Train the ensemble model
voting_model.fit(X_train, y_train)

# Make predictions using the ensemble model
voting_predictions = voting_model.predict(X_test)

# Evaluate the ensemble model
print("Ensemble Model Accuracy:", accuracy_score(y_test, voting_predictions))
print(classification_report(y_test, voting_predictions))


Ensemble Model Accuracy: 0.8894806924101198
              precision    recall  f1-score   support

           0       0.92      0.92      0.92       516
           1       0.82      0.82      0.82       235

    accuracy                           0.89       751
   macro avg       0.87      0.87      0.87       751
weighted avg       0.89      0.89      0.89       751

