In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Done by Joseph Middleton Jr
# Due 4/28/24
# On Heart attack dataset: https://www.kaggle.com/datasets/sukhmandeepsinghbrar/heart-attack-dataset
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

# Example dataset
data = pd.read_csv('Medicaldataset1.csv')
df = pd.DataFrame(data)

# Encoding categorical data
label_encoder = LabelEncoder()
for col in df.columns:
    df[col] = label_encoder.fit_transform(df[col])

# Splitting dataset into features and target variable
X = df.drop('Result', axis=1)
y = df['Result']

# print(y)

# Splitting dataset into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Initializing the Gaussian Naive Bayes classifier
model = GaussianNB()

# Training the model
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Specific day's conditions for prediction
new_data1 = pd.DataFrame({
    'Age': [20],
    'Gender': [1],  # Male
    'Heart rate': [94],
    'Systolic blood pressure': [98],
    'Diastolic blood pressure': [46],
    'Blood sugar': [296],
    'CK-MB': [6.75],
    'Troponin': [1.06]
})

new_data2 = pd.DataFrame({
    'Age': [52],
    'Gender': [0],  # Female
    'Heart rate': [61],
    'Systolic blood pressure': [112],
    'Diastolic blood pressure': [58],
    'Blood sugar': [87],
    'CK-MB': [1.83],
    'Troponin': [0.004]
})

# Making prediction for the specific conditions
new_prediction1 = model.predict(new_data1)

# Decoding the prediction back to the categorical label
decoded_prediction1 = 'negative' if new_prediction1[0] == 1 else 'positive'

# Printing the result
print(f"Prediction for Test (Age: 20, Gender: Male (1), Heart rate: 94, S Blood pressure: 98, D Blood pressure: 46, Blood Sugar: 296, CK-MB: 6.75, Troponin: 1.06) = {decoded_prediction1}")

# Making prediction for the specific conditions
new_prediction2 = model.predict(new_data2)

# Decoding the prediction back to the categorical label
decoded_prediction2 = 'negative' if new_prediction2[0] == 1 else 'positive'

# Printing the result
print(f"Prediction for Test (Age: 52, Gender: Female (0), Heart rate: 61, S Blood pressure: 112, D Blood pressure: 58, Blood Sugar: 87, CK-MB: 1.83, Troponin: 0.004) = {decoded_prediction2}")

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# A measurement of the average size of mistakes in a collection of predictions
mae = mean_absolute_error(y_test, y_pred)
# A measurement of how close a regression line is to a sed of data points
mse = mean_squared_error(y_test, y_pred)
# R Squared Value tells us how well the model explains/predicts the data's outcomes
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared score: {r2}")

Prediction for Test (Age: 20, Gender: Male (1), Heart rate: 94, S Blood pressure: 98, D Blood pressure: 46, Blood Sugar: 296, CK-MB: 6.75, Troponin: 1.06) = positive
Prediction for Test (Age: 52, Gender: Male (0), Heart rate: 61, S Blood pressure: 112, D Blood pressure: 58, Blood Sugar: 87, CK-MB: 1.83, Troponin: 0.004) = positive
Accuracy: 92.12%
Mean Absolute Error (MAE): 0.07878787878787878
Mean Squared Error (MSE): 0.07878787878787878
R-squared score: 0.6681621287128713
