## 7. Write a program to implement the naive bayesian classifier for given dataset. Compute accuracy of the classifier.

### From Scratch 1

In [30]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data = {
    "Outlook": ["Sunny", "Sunny", "Overcast", "Rain", "Rain", "Rain", "Overcast", "Sunny", "Sunny", "Rain", "Sunny", "Overcast", "Overcast", "Rain"],
    "Temp": ["Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Cool", "Mild", "Cool", "Mild", "Mild", "Mild", "Hot", "Mild"],
    "Humidity": ["High", "High", "High", "High", "Normal", "Normal", "Normal", "High", "Normal", "Normal", "Normal", "High", "Normal", "High"],
    "Wind": ["Weak", "Strong", "Weak", "Weak", "Weak", "Strong", "Strong", "Weak", "Weak", "Weak", "Strong", "Strong", "Weak", "Strong"],
    "Play": ["No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No"]
}

df = pd.DataFrame(data)

x = df.drop("Play", axis=1)
y = df["Play"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

class NaiveBayesClassifier:
    def fit(self, x, y):
        self.labels = y.unique()
        self.features = list(x.columns)
        self.total = len(y)
        self.label_count = y.value_counts()
        self.feature_probability = {}

        for feature in self.features:
            if feature not in self.feature_probability:
                self.feature_probability[feature] = {}
            for i in range(len(y)):
                feature_value = x.iloc[i][feature]
                label = y.iloc[i]

                if feature_value not in self.feature_probability[feature]:
                    self.feature_probability[feature][feature_value] = {}
                if label not in self.feature_probability[feature][feature_value]:
                    self.feature_probability[feature][feature_value][label] = 0
                self.feature_probability[feature][feature_value][label] += 1

    def predict(self, x):
        result = []
        for _, row in x.iterrows():
            max_value = 0
            max_label = None
            for label in self.labels:
                value = self.label_count[label] / self.total
                for feature in row.index:
                    feature_value = row[feature]
                    if feature_value in self.feature_probability[feature] and label in self.feature_probability[feature][feature_value]:
                        value *= self.feature_probability[feature][feature_value][label] / self.label_count[label]
                    else:
                        value = 0
                        break
                if value > max_value:
                    max_value = value
                    max_label = label
            result.append(max_label)
        return result

nb = NaiveBayesClassifier()
nb.fit(x_train, y_train)
y_pred = nb.predict(x_test)

accuracy = accuracy_score(y_test, y_pred) * 100
print(f"Accuracy of Naive Bayes classifier: {accuracy:.2f}%")

Accuracy of Naive Bayes classifier: 80.00%


### From Scratch 2

In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
import numpy as np
from sklearn.metrics import accuracy_score

data = {
    "Outlook": ["Sunny", "Sunny", "Overcast", "Rain", "Rain", "Rain", "Overcast", "Sunny", "Sunny", "Rain", "Sunny", "Overcast", "Overcast", "Rain"],
    "Temp": ["Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Cool", "Mild", "Cool", "Mild", "Mild", "Mild", "Hot", "Mild"],
    "Humidity": ["High", "High", "High", "High", "Normal", "Normal", "Normal", "High", "Normal", "Normal", "Normal", "High", "Normal", "High"],
    "Wind": ["Weak", "Strong", "Weak", "Weak", "Weak", "Strong", "Strong", "Weak", "Weak", "Weak", "Strong", "Strong", "Weak", "Strong"],
    "Play": ["No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No"]
}

df = pd.DataFrame(data)

le = LabelEncoder()
for column in df.columns:
    df[column] = le.fit_transform(df[column])

X = df.drop(columns=["Play"])
y = df["Play"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

class NaiveBayesClassifier:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.class_counts = defaultdict(int)
        self.feature_counts = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.total_samples = len(y)
        
        for i in range(len(y)):
            label = y.iloc[i]
            self.class_counts[label] += 1
            for feature in X.columns:
                self.feature_counts[label][feature][X.iloc[i][feature]] += 1
    
    def calculate_prior(self, cls):
        return self.class_counts[cls] / self.total_samples
    
    def calculate_likelihood(self, cls, feature, feature_val):
        feature_count = self.feature_counts[cls][feature].get(feature_val, 0)
        return (feature_count + 1) / (self.class_counts[cls] + len(self.feature_counts[cls][feature]))
    
    def predict(self, X):
        predictions = []
        for _, row in X.iterrows():
            class_probs = {}
            for cls in self.classes:
                class_prob = self.calculate_prior(cls)
                for feature in X.columns:
                    feature_val = row[feature]
                    class_prob *= self.calculate_likelihood(cls, feature, feature_val)
                class_probs[cls] = class_prob
            predictions.append(max(class_probs, key=class_probs.get))
        return predictions

model = NaiveBayesClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred) * 100
print(f"Accuracy of Naive Bayes classifier: {accuracy:.2f}%")

Accuracy of Naive Bayes classifier: 60.00%


### Using Library

In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

data = {
    "Outlook": ["Sunny", "Sunny", "Overcast", "Rain", "Rain", "Rain", "Overcast", "Sunny", "Sunny", "Rain", "Sunny", "Overcast", "Overcast", "Rain"],
    "Temp": ["Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Cool", "Mild", "Cool", "Mild", "Mild", "Mild", "Hot", "Mild"],
    "Humidity": ["High", "High", "High", "High", "Normal", "Normal", "Normal", "High", "Normal", "Normal", "Normal", "High", "Normal", "High"],
    "Wind": ["Weak", "Strong", "Weak", "Weak", "Weak", "Strong", "Strong", "Weak", "Weak", "Weak", "Strong", "Strong", "Weak", "Strong"],
    "Play": ["No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No"]
}

df = pd.DataFrame(data)

le = LabelEncoder()
for column in df.columns:
    df[column] = le.fit_transform(df[column])

X = df.drop(columns=["Play"])
y = df["Play"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = GaussianNB()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Naive Bayes classifier: {accuracy * 100:.2f}%")

Accuracy of Naive Bayes classifier: 60.00%
