In [2]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix

# Load dataset
from sklearn.datasets import load_iris
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.3, random_state=42)

In [3]:
data

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [4]:

# Train Naive Bayes model
model = GaussianNB()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", cm)


Accuracy: 0.9777777777777777
Confusion Matrix:
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]


In [25]:
import pandas as pd
from sklearn.naive_bayes import CategoricalNB
from sklearn.preprocessing import LabelEncoder

# Training data
data = {
    'Size': ['Small', 'Large', 'Medium', 'Small', 'Large', 'Medium', 'Small', 'Large'],
    'Weight': ['Light', 'Heavy', 'Light', 'Heavy', 'Heavy', 'Light', 'Light', 'Heavy'],
    'Texture': ['Smooth', 'Rough', 'Rough', 'Smooth', 'Smooth', 'Rough', 'Rough', 'Rough'],
    'Color': ['Red', 'Blue', 'Green', 'Red', 'Blue', 'Green', 'Red', 'Blue']
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Encode categorical variables to numeric values
label_encoders = {}
for column in df.columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Separate features and target
X = df[['Size', 'Weight', 'Texture']]
y = df['Color']

# Create and train the Naive Bayes model
model = CategoricalNB()
model.fit(X, y)

# New marble attributes to predict
new_marble = pd.DataFrame({
    'Size': ['Medium'],
    'Weight': ['Light'],
    'Texture': ['Smooth']
})

# Encode the new marble's attributes
for column in new_marble.columns:
    new_marble[column] = label_encoders[column].transform(new_marble[column])

# Predict the color
predicted_color_encoded = model.predict(new_marble)
print(model)
print(predicted_color_encoded)
predicted_color = label_encoders['Color'].inverse_transform(predicted_color_encoded)

print("Predicted Color of the new marble:", predicted_color[0])


CategoricalNB()
[1]
Predicted Color of the new marble: Green


In [None]:
pip install pgmpy

In [8]:
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

# Define the structure of the Bayesian Network
model = BayesianNetwork([('Disease', 'Fever'), ('Disease', 'Cough'), ('Fever', 'Fatigue')])

# Define the CPDs (Conditional Probability Distributions)
cpd_disease = TabularCPD(variable='Disease', variable_card=2, values=[[0.6], [0.4]])
cpd_fever = TabularCPD(variable='Fever', variable_card=2, 
                       values=[[0.8, 0.2], [0.2, 0.8]], 
                       evidence=['Disease'], evidence_card=[2])
cpd_cough = TabularCPD(variable='Cough', variable_card=2, 
                       values=[[0.7, 0.3], [0.3, 0.7]], 
                       evidence=['Disease'], evidence_card=[2])
cpd_fatigue = TabularCPD(variable='Fatigue', variable_card=2, 
                         values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], 
                         evidence=['Fever', 'Disease'], evidence_card=[2, 2])

# Add CPDs to the model
model.add_cpds(cpd_disease, cpd_fever, cpd_cough, cpd_fatigue)

# Perform inference
infer = VariableElimination(model)
prob_disease_given_evidence = infer.map_query(variables=['Disease'], evidence={'Fever': 1, 'Cough': 1})
print(prob_disease_given_evidence)


ModuleNotFoundError: No module named 'pgmpy'

In [10]:
# Given probabilities
P_R = 0.2  # P(R=Yes)
P_not_R = 0.8  # P(R=No)

P_S_given_R = 0.01  # P(S=Yes | R=Yes)
P_not_S_given_R = 0.99  # P(S=No | R=Yes)
P_S_given_not_R = 0.4  # P(S=Yes | R=No)
P_not_S_given_not_R = 0.6  # P(S=No | R=No)

P_W_given_R_S = 0.99  # P(W=Yes | R=Yes, S=Yes)
P_not_W_given_R_S = 0.01  # P(W=No | R=Yes, S=Yes)
P_W_given_R_not_S = 0.8  # P(W=Yes | R=Yes, S=No)
P_not_W_given_R_not_S = 0.2  # P(W=No | R=Yes, S=No)
P_W_given_not_R_S = 0.9  # P(W=Yes | R=No, S=Yes)
P_not_W_given_not_R_S = 0.1  # P(W=No | R=No, S=Yes)
P_W_given_not_R_not_S = 0.0  # P(W=Yes | R=No, S=No)
P_not_W_given_not_R_not_S = 1.0  # P(W=No | R=No, S=No)

# Joint probability for Wet Grass being Yes
P_W_and_R_and_S = P_R * P_S_given_R * P_W_given_R_S
P_W_and_R_and_not_S = P_R * P_not_S_given_R * P_W_given_R_not_S
P_W_and_not_R_and_S = P_not_R * P_S_given_not_R * P_W_given_not_R_S
P_W_and_not_R_and_not_S = P_not_R * P_not_S_given_not_R * P_W_given_not_R_not_S

# Total probability of W = Yes (using the law of total probability)
P_W = (P_W_and_R_and_S +
       P_W_and_R_and_not_S +
       P_W_and_not_R_and_S +
       P_W_and_not_R_and_not_S)

# Calculate P(R=Yes | W=Yes) using Bayes' theorem
P_R_given_W = (P_W_and_R_and_S + P_W_and_R_and_not_S) / P_W

# Output the result
print("P(Rain | Wet Grass) =", P_R_given_W)


P(Rain | Wet Grass) = 0.35768767563227616


In [17]:
import numpy as np
import bayespy as bp

# Define the probabilities
P_R = 0.2  # P(Rain=Yes)
P_S_given_R = np.array([[0.6, 0.4], [0.99, 0.01]])  # P(Sprinkler=No,Yes | Rain=No) and P(Sprinkler=No,Yes | Rain=Yes)
P_W_given_RS = np.array([[[1.0, 0.0], [0.1, 0.9]],  # P(Wet=No,Yes | Rain=No, Sprinkler=No)
                         [[0.2, 0.8], [0.01, 0.99]]])  # P(Wet=No,Yes | Rain=Yes, Sprinkler=Yes)

# Bayesian Network model
R = bp.nodes.Bernoulli(P_R)  # Rain node

# Sprinkler node conditioned on Rain
S = bp.nodes.Categorical(bp.nodes.Gate(R, P_S_given_R))

# Wet Grass node conditioned on Rain and Sprinkler
W = bp.nodes.Categorical(bp.nodes.Gate([R, S], P_W_given_RS))

# Observe evidence: grass is wet
W.observe(1)  # W=Yes

# Perform inference
Q = bp.inference.VB(R, S, W)
Q.update(repeat=100)

# Print results
print("P(Rain=Yes | Wet Grass=Yes) =", Q.R.get_moments()[0])
print("P(Sprinkler=Yes | Wet Grass=Yes) =", Q.S.get_moments()[0][1])


ValueError: X must be a node or moments should be provided