In [1]:
import pandas as pd
import random
from collections import defaultdict

# --- Generate Dataset ---
features = {
    "renk": ["kirmizi", "kahverengi", "sari"],
    "sapka": ["duz", "yuvarlak"],
    "sap": ["kahverengi", "beyaz", "sari"]
}
labels = ["P", "E"]

rows = []
for _ in range(30):
    row = {
        "renk": random.choice(features["renk"]),
        "sapka": random.choice(features["sapka"]),
        "sap": random.choice(features["sap"]),
        "label": random.choice(labels)
    }
    rows.append(row)

df = pd.DataFrame(rows)

# --- Separate features (X) and label (y) ---
X = df.drop(columns=["label"])  # only renk, sapka, sap
y = df["label"]                 # only P or E

print("Features (X):")
print(X.head())
print("\nLabels (y):")
print(y.head())

# --- Custom Naive Bayes ---
class CustomNaiveBayes:
    def __init__(self):
        self.class_counts = defaultdict(int)
        self.feature_counts = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feature_values = defaultdict(set)
        self.total = 0

    def fit(self, X: pd.DataFrame, y: pd.Series):
        self.total = len(y)
        for i in range(len(y)):
            c = y.iloc[i]
            self.class_counts[c] += 1
            for col in X.columns:
                val = X.iloc[i][col]
                self.feature_counts[col][val][c] += 1
                self.feature_values[col].add(val)

    def predict(self, x: dict):
        probs = {}
        for c in self.class_counts:
            # prior probability
            prob = self.class_counts[c] / self.total
            # multiply by conditional probabilities
            for col, val in x.items():
                count = self.feature_counts[col][val][c]
                total_c = self.class_counts[c]
                k = len(self.feature_values[col])  # number of categories for Laplace smoothing
                prob *= (count + 1) / (total_c + k)
            probs[c] = prob
        return max(probs, key=probs.get), probs

# --- Train ---
model = CustomNaiveBayes()
model.fit(X, y)

# --- Predict new mushroom ---
new_mushroom = {"renk":"kirmizi","sapka":"yuvarlak","sap":"beyaz"}
pred, probs = model.predict(new_mushroom)

print("\nNew mushroom:", new_mushroom)
print("Predicted label:", pred)
print("Class probabilities:", probs)


Features (X):
         renk     sapka         sap
0     kirmizi       duz  kahverengi
1        sari       duz  kahverengi
2  kahverengi       duz       beyaz
3        sari  yuvarlak        sari
4        sari  yuvarlak       beyaz

Labels (y):
0    P
1    E
2    E
3    E
4    P
Name: label, dtype: object

New mushroom: {'renk': 'kirmizi', 'sapka': 'yuvarlak', 'sap': 'beyaz'}
Predicted label: P
Class probabilities: {'P': 0.04238754325259515, 'E': 0.01575869498307171}


In [3]:
for i in range(5):
    a=X.iloc[i]
    print(a)

renk        kirmizi
sapka           duz
sap      kahverengi
Name: 0, dtype: object
renk           sari
sapka           duz
sap      kahverengi
Name: 1, dtype: object
renk     kahverengi
sapka           duz
sap           beyaz
Name: 2, dtype: object
renk         sari
sapka    yuvarlak
sap          sari
Name: 3, dtype: object
renk         sari
sapka    yuvarlak
sap         beyaz
Name: 4, dtype: object
