# Importing the Required Libraries 

In [1]:
import pandas as pd
import numpy as np
import warnings
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

warnings.filterwarnings('ignore')

# Data Collection

In [2]:
df = pd.read_csv('heart.csv')

# Data Preprocessing

In [3]:
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])
df['ChestPainType'] = le.fit_transform(df['ChestPainType'])
df['RestingECG'] = le.fit_transform(df['RestingECG'])
df['ExerciseAngina'] = le.fit_transform(df['ExerciseAngina'])
df['ST_Slope'] = le.fit_transform(df['ST_Slope'])

# Splitting the Data into Input and Output

In [4]:
x = df.drop(columns=['HeartDisease'])
y = df['HeartDisease']

# Splitting the Data into Training set and Testing set

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=12)

# Model Training 

In [6]:
NB = GaussianNB()

# Defining the Parameter Grid

In [7]:
param_grid = {'var_smoothing': np.logspace(0, -9, num=100)}

# Initializing GridSearchCV

In [8]:
grid_search = GridSearchCV(estimator=NB, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the Model

In [9]:
grid_search.fit(x_train, y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


# Getting the Best Parameter and Best Score

In [10]:
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print(f"Best Parameters: {best_params}")
print(f"Best Cross-Validation Score: {best_score}")

Best Parameters: {'var_smoothing': 5.336699231206313e-06}
Best Cross-Validation Score: 0.8664150591743546


# Evaluating the Model on the Test Set

In [11]:
y_pred = grid_search.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Test Set Accuracy: {accuracy}")

Test Set Accuracy: 0.8260869565217391
