# Practice Solving Probability Problem

In [1]:
import numpy as np

def create_training_data():
    data = [["Sunny", "Hot", "High", "Weak", "no"],
            ["Sunny", "Hot", "High", "Strong", "no"],
            ["Overcast", "Hot", "High", "Weak", "yes"],
            ["Rain", "Mild", "High", "Weak", "yes"],
            ["Rain", "Cool", "Normal", "Weak", "yes"],
            ["Rain", "Cool", "Normal", "Strong", "no"],
            ["Overcast", "Cool", "Normal", "Strong", "yes"],
            ["Overcast", "Mild", "High", "Weak", "no"],
            ["Sunny", "Cool", "Normal", "Weak", "yes"],
            ["Rain", "Mild", "Normal", "Weak", "yes"]]
    
    return np.array(data)

train_data = create_training_data()
print(train_data)


[['Sunny' 'Hot' 'High' 'Weak' 'no']
 ['Sunny' 'Hot' 'High' 'Strong' 'no']
 ['Overcast' 'Hot' 'High' 'Weak' 'yes']
 ['Rain' 'Mild' 'High' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'no']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'yes']
 ['Overcast' 'Mild' 'High' 'Weak' 'no']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'yes']]


In [39]:
def compute_prior_probabilities(train_data: np.ndarray):
    class_labels = train_data[:,-1]
    class_names, counts = np.unique(class_labels, return_counts = True)
    
    total_sample = len(class_labels)
    
    return np.array([count/ total_sample \
        for name, count in zip(class_names, counts)])

In [40]:
prior_probablity = compute_prior_probabilities(train_data)
print("P(‘Play Tennis’ = No)", prior_probablity[0])
print("P(‘Play Tennis’ = Yes)", prior_probablity[1])

P(‘Play Tennis’ = No) 0.4
P(‘Play Tennis’ = Yes) 0.6


In [42]:
import numpy as np

def compute_conditional_probabilities(train_data):
    """
    Compute conditional probabilities P(feature=value | class)
    for each feature and class.

    Parameters:
        train_data (numpy.ndarray): 2D array where the last column contains class labels.

    Returns:
        conditional_probs (list of np.ndarray): 
            For each feature, a matrix of shape (n_classes, n_feature_values)
        feature_vals (list of np.ndarray):
            The unique values for each feature.
    """
    class_labels = train_data[:, -1]
    class_names = np.unique(class_labels)
    n_features = train_data.shape[1] - 1
    
    feature_vals = []
    conditional_probs = []

    for feature_idx in range(n_features):
        # Unique values of this feature
        unique_vals = np.unique(train_data[:, feature_idx])
        feature_vals.append(unique_vals)

        # Matrix: rows = classes, cols = feature values
        feature_cond_probs = np.zeros((len(class_names), len(unique_vals)))

        for cls_idx, class_name in enumerate(class_names):
            # Select samples of the current class
            class_samples = train_data[class_labels == class_name]
            total_class_samples = len(class_samples)

            for ft_idx, val in enumerate(unique_vals):
                # Count how many class samples have this feature value
                match_count = np.sum(class_samples[:, feature_idx] == val)
                # Conditional probability
                feature_cond_probs[cls_idx, ft_idx] = match_count / total_class_samples

        conditional_probs.append(feature_cond_probs)

    return conditional_probs, feature_vals


In [43]:
_, feature_values = compute_conditional_probabilities(train_data)
print("x1 = ",feature_values[0])
print("x2 = ",feature_values[1])
print("x3 = ",feature_values[2])
print("x4 = ",feature_values[3])

x1 =  ['Overcast' 'Rain' 'Sunny']
x2 =  ['Cool' 'Hot' 'Mild']
x3 =  ['High' 'Normal']
x4 =  ['Strong' 'Weak']


In [44]:
def train_naive_bayes(train_data):
    prior_probabilities = compute_prior_probabilities(train_data)
    
    conditional_probabilities, feature_names = compute_conditional_probabilities(train_data)
    
    return prior_probabilities, conditional_probabilities, feature_names

In [45]:
prior_probs, conditional_probs, feature_names = train_naive_bayes(train_data)

In [None]:
def predict_tennis(
    X, prior_probabilities, conditional_probabilities, feature_names
):
    class_labels = train_data[:, -1]
    class_names = np.unique(class_labels)