In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv('Cardiovascular_Disease_Dataset.csv')

# Drop 'patientid' as it is not needed for prediction
df.drop(columns=['patientid'], inplace=True)

# Define Features and Target
X = df.drop(columns=['target'])  # Only dropping the target column
y = df['target']

# Standardize numerical columns
num_cols = ['age', 'restingBP', 'serumcholestrol', 'maxheartrate', 'oldpeak']
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Model (Random Forest)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate Model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Save Model and Scaler
joblib.dump(model, 'cvd_model.pkl')
joblib.dump(scaler, 'scaler.pkl')


Model Accuracy: 0.99


['scaler.pkl']