In [5]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

data = {
    'Weather': ['Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy',
                'Overcast', 'Sunny', 'Sunny', 'Rainy', 'Sunny'],
    'Temperature': ['Hot', 'Cool', 'Hot', 'Cool', 'Mild',
                    'Hot', 'Cool', 'Mild', 'Cool', 'Hot'],
    'Play': ['No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No']
}
df = pd.DataFrame(data)

le = LabelEncoder()
df['Weather'] = le.fit_transform(df['Weather'])
df['Temperature'] = le.fit_transform(df['Temperature'])
df['Play'] = le.fit_transform(df['Play'])

features = df[['Weather', 'Temperature']]
labels = df['Play']

features_train, features_test, label_train, label_test = train_test_split(
    features, labels, test_size=0.2, random_state=42, stratify=labels)  # Stratify ensures both labels in the split

model = GaussianNB()
model.fit(features_train, label_train)
predicted = model.predict(features_test)

print("Prediction:", predicted)
cm = confusion_matrix(label_test, predicted, labels=[0, 1])  # Ensure both labels are included
print("Confusion Matrix:\n", cm)

accuracy = accuracy_score(label_test, predicted)
print("Accuracy:", accuracy)

TP = cm[1][1]  # True Positive
TN = cm[0][0]  # True Negative
FP = cm[0][1]  # False Positive
FN = cm[1][0]  # False Negative

if (TP + TN + FP + FN) > 0:
    calculated_accuracy = (TP + TN) / (TP + TN + FP + FN)
    print(f"Calculated Accuracy: {calculated_accuracy:.2f} or {calculated_accuracy * 100:.2f}%")


Prediction: [0 0]
Confusion Matrix:
 [[1 0]
 [1 0]]
Accuracy: 0.5
Calculated Accuracy: 0.50 or 50.00%


In [7]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB

# Dataset creation
data = {
    'age': ['youth', 'youth', 'middle_aged', 'senior', 'senior', 'senior',
            'middle_aged', 'youth', 'youth', 'senior', 'youth', 'middle_aged', 'middle_aged', 'senior'],
    'income': ['high', 'high', 'high', 'medium', 'low', 'low',
               'low', 'medium', 'low', 'medium', 'low', 'medium', 'high', 'medium'],
    'student': ['no', 'no', 'no', 'no', 'yes', 'yes',
                'yes', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no'],
    'credit_rating': ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent',
                      'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'excellent'],
    'buys_computer': ['no', 'no', 'yes', 'yes', 'yes', 'no',
                      'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
}

# Convert dataset to DataFrame
df = pd.DataFrame(data)

# Encoding categorical variables
df['age'] = df['age'].astype('category').cat.codes
df['income'] = df['income'].astype('category').cat.codes
df['student'] = df['student'].astype('category').cat.codes
df['credit_rating'] = df['credit_rating'].astype('category').cat.codes
df['buys_computer'] = df['buys_computer'].map({'no': 0, 'yes': 1})

# Splitting features and target
X = df[['age', 'income', 'student', 'credit_rating']]
y = df['buys_computer']

# Model training
model = GaussianNB()
model.fit(X, y)

# Predicting for a new data point
prediction_input = pd.DataFrame([[2, 1, 1, 0]], columns=['age', 'income', 'student', 'credit_rating'])
prediction = model.predict(prediction_input)

# Result interpretation
result = "Yes" if prediction[0] == 1 else "No"
print(f"Prediction: Will the person buy a computer? {result}")


Prediction: Will the person buy a computer? Yes
