In [21]:
import pandas as pd
from sklearn.naive_bayes import CategoricalNB
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import numpy as np

In [22]:
# Load the "Enjoy Sports" dataset
file_path = 'Enjoy sports.csv'
df_sports = pd.read_csv(file_path)

# Drop the "Day" column as it is not relevant for classification
df_sports = df_sports.drop('Day', axis=1)



In [23]:
# Convert categorical features into dummy/indicator variables
df_sports_encoded = pd.get_dummies(df_sports, drop_first=True)

# Split the data into features (X) and target (y)
X_sports = df_sports_encoded.drop('Decision_Yes', axis=1)
y_sports = df_sports_encoded['Decision_Yes']

# Train-test split
# X_train_sports, X_test_sports, y_train_sports, y_test_sports = train_test_split(X_sports, y_sports, test_size=0.2, random_state=42)




In [24]:
# Initialize and train the Naïve Bayes classifier
nb_categorical = CategoricalNB()

# Perform Stratified K-Fold Cross-Validation for better generalization on small datasets
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cross_val_scores = cross_val_score(nb_categorical, X_sports, y_sports, cv=kf, scoring='accuracy')

# Fit the model on the entire dataset and make predictions to evaluate classification metrics
nb_categorical.fit(X_sports, y_sports)
y_pred_sports = nb_categorical.predict(X_sports)

# Evaluation metrics
conf_matrix = confusion_matrix(y_sports, y_pred_sports)
accuracy = accuracy_score(y_sports, y_pred_sports)
classification_rep = classification_report(y_sports, y_pred_sports)



In [25]:
# Print results
print("Cross-Validation Accuracy Scores:", cross_val_scores)
print("Average Cross-Validation Accuracy:", np.mean(cross_val_scores))
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nAccuracy:", accuracy)
print("\nClassification Report:")
print(classification_rep)

Cross-Validation Accuracy Scores: [0.66666667 0.66666667 0.         0.66666667 0.5       ]
Average Cross-Validation Accuracy: 0.5

Confusion Matrix:
[[3 2]
 [0 9]]

Accuracy: 0.8571428571428571

Classification Report:
              precision    recall  f1-score   support

       False       1.00      0.60      0.75         5
        True       0.82      1.00      0.90         9

    accuracy                           0.86        14
   macro avg       0.91      0.80      0.82        14
weighted avg       0.88      0.86      0.85        14

