In [56]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = "heart_disease.csv"
df = pd.read_csv(file_path)

# Check for missing values
print("Missing values in the dataset:")
print(df.isnull().sum())

# Handle missing values using SimpleImputer
imputer = SimpleImputer(strategy="mean")  # Replace NaNs with mean for numerical columns

# Encode categorical variables
df['prevalentStroke'] = df['prevalentStroke'].astype('category').cat.codes
df['Heart_ stroke'] = df['Heart_ stroke'].astype('category').cat.codes
df['Gender'] = df['Gender'].astype('category').cat.codes

# Remove the 'education' column
if 'education' in df.columns:
    df = df.drop(columns=['education'])

# Impute missing values for numerical features
df.iloc[:, :] = imputer.fit_transform(df)

# Define features (X) and target (y)
X = df.drop(columns=['Heart_ stroke']) 
y = df['Heart_ stroke']  

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the SVM model
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)

# Make predictions
y_pred = svm_model.predict(X_test)

# Evaluate the model
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Missing values in the dataset:
Gender               0
age                  0
education          105
currentSmoker        0
cigsPerDay          29
BPMeds              53
prevalentStroke      0
prevalentHyp         0
diabetes             0
totChol             50
sysBP                0
diaBP                0
BMI                 19
heartRate            1
glucose            388
Heart_ stroke        0
dtype: int64

Accuracy: 0.8537735849056604

Classification Report:
              precision    recall  f1-score   support

           0       0.85      1.00      0.92       724
           1       0.00      0.00      0.00       124

    accuracy                           0.85       848
   macro avg       0.43      0.50      0.46       848
weighted avg       0.73      0.85      0.79       848



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
