In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB,MultinomialNB,BernoulliNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Step 1: Load dataset from CSV
df = pd.read_csv("play_tennis.csv")

# Step 2: Drop the "day" column (not a feature)
df = df.drop(columns=['day'])

# Step 3: Encode categorical features into numbers (store encoders for later use)
encoders = {}
for col in df.columns:
    encoders[col] = LabelEncoder()
    df[col] = encoders[col].fit_transform(df[col])

# Step 4: Split data into features and target
X = df.drop(columns=['play'])  # Features
y = df['play']  # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 5: Train Naïve Bayes model
#model = GaussianNB()
#model = MultinomialNB()
model = BernoulliNB()
model.fit(X_train, y_train)

# Step 6: Make predictions
y_pred = model.predict(X_test)

# Step 7: Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Step 8: Print the test set with actual & predicted results
print("\nTest Set and Predictions:")
test_results = X_test.copy()  # Copy test set
test_results['Actual Play'] = y_test  # Add actual labels
test_results['Predicted Play'] = y_pred  # Add predicted labels

# Convert encoded values back to original labels
test_results['Actual Play'] = encoders['play'].inverse_transform(test_results['Actual Play'])
test_results['Predicted Play'] = encoders['play'].inverse_transform(test_results['Predicted Play'])

# Convert feature values back to original categories
for col in X_test.columns:
    test_results[col] = encoders[col].inverse_transform(test_results[col])

# Print the test results
print(test_results)



Model Accuracy: 60.00%

Test Set and Predictions:
     outlook  temp humidity    wind Actual Play Predicted Play
9       Rain  Mild   Normal    Weak         Yes            Yes
11  Overcast  Mild     High  Strong         Yes             No
0      Sunny   Hot     High    Weak          No             No
12  Overcast   Hot   Normal    Weak         Yes            Yes
5       Rain  Cool   Normal  Strong          No            Yes


In [3]:
# Step 9: Predict for a new sample
new_sample = pd.DataFrame([['Sunny', 'Cool', 'High', 'Strong']], columns=['outlook', 'temp', 'humidity', 'wind'])

# Ensure column names match exactly with training data
new_sample = new_sample[X.columns]  # Reorder columns to match training set

# Use the SAME encoders from training data
for col in new_sample.columns:
    new_sample[col] = encoders[col].transform(new_sample[col])

# Predict class for new sample
prediction = model.predict(new_sample)
prediction_label = encoders['play'].inverse_transform(prediction)[0]  # Decode prediction
print(f"\nPrediction for new sample (Sunny, Cool, High, Strong): {prediction_label}")



Prediction for new sample (Sunny, Cool, High, Strong): Yes
