# **Homework 4 Practicum**
# Version 1.0 (Nov 1, 2024)

<font color='blue'> TODO:</font> Name (JHED)

Instructions:
This notebook has two parts:

Part 1: Implement Belief propagation or sum-product message passing.

Part 2: Implement KMeans Clustering.

Please note that in this practicum, we only require code implementation without any usual questions.

Please <font color='blue'>make a copy of this notebook in your own drive</font> before you make any edits. You can do so through File -> Save a copy in Drive

NOTE: Submit notebook on gradescope. You can run autograder as many times as needed.



In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import silhouette_score
import random

random.seed(42)
np.random.seed(42)




# **PART I: Sum Product Message Passing**

Things to do in this part:
1. Implement send_messages of BeliefPropagation class


In this section, please implement belief propagation for Trees





In [None]:

class Node:
    """
    A class representing a node in a Bayesian network.

    Attributes:
        name (str): The name of the node.
        children (list): A list of child nodes.
        parent (Node): The parent node (if any).
        potential (np.ndarray): The potential distribution of the node.
        messages (dict): A dictionary to store messages from other nodes.
    """
    def __init__(self, name, potential=None):
        self.name = name
        self.children = []
        self.parent = None
        self.potential = np.array([1.0, 1.0]) if  potential is None else potential
        self.messages = {}  # Optional: Use store messages from other nodes

    def __str__(self):
        """
        Returns a string representation of the node.

        Returns:
            str: A string representation of the node.
        """
        return f"{self.name}: {self.potential}"

    def add_child(self, child_node):
        """
        Adds a child node to the current node.

        Parameters:
            child_node (Node): The child node to be added.
        """
        self.children.append(child_node)
        child_node.parent = self


In [None]:
class BeliefPropagation:
    """
    A class representing the Belief Propagation algorithm.

    Attributes:
        root (Node): The root node of the Bayesian network.
    """
    def __init__(self, root):
        """
        Initializes the BeliefPropagation object.

        Parameters:
            root (Node): The root node of the Bayesian network.
        """
        self.root = root

    def send_messages(self, node):
        """
        Implement the send_messages method for Belief Propagation.

        Parameters:
            node (Node): The node from which messages are sent.
        """
        # TODO ...WRITE YOUR CODE HERE...
        ...




    def belief_propagation(self):
        """
        Perform belief propagation to calculate the marginal distribution of the root node.

        Returns:
            np.ndarray: The marginal distribution of the root node.
        """
        if not self.root.children:
            return self.root.potential

        self.send_messages(self.root)

        return self.root.potential


In [None]:
def test_bp():
        # Create a simple tree structure
        root = Node("Root",None)
        child1 = Node("Child1", potential=np.array([0.3, 0.7]))
        child2 = Node("Child2", potential=np.array([0.6, 0.4]))
        leaf1 = Node("Leaf1", potential=np.array([0.8, 0.2]))
        leaf2 = Node("Leaf2", potential=np.array([0.8, 0.2]))

        # Build the tree
        root.add_child(child1)
        root.add_child(child2)
        child1.add_child(leaf1)
        child1.add_child(leaf2)

        # Run belief propagation
        bp = BeliefPropagation(root)
        root_marginal = bp.belief_propagation()

        # Output the marginal at the root
        print("Marginal at the root:", root_marginal)
        print("Sum of the marginal:", np.sum(root_marginal))

test_bp()


# **PART II: KMeans Clustering**
Things to do in this part:
1. Implement fit and predict methods of Kmeans class.



### Synthetic Dataset Generation


In [None]:
# Create a synthetic dataset with specific number of clusters
def create_dataset(num_clusters=4,num_samples=500):
  X, y_true = make_blobs(n_samples=num_samples, centers=num_clusters, cluster_std=1.0, random_state=42)
  # Split the dataset into a training set and a testing set
  X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.3, random_state=42)
  return X_train, X_test, y_train, y_test



In [None]:
class KMeans:
    """
    KMeans clustering algorithm implementation.

    Parameters:
        n_clusters (int): The number of clusters to form. Defaults to 3.
        max_iters (int): The maximum number of iterations for the algorithm. Defaults to 100.

    Attributes:
        n_clusters (int): The number of clusters.
        max_iters (int): The maximum number of iterations.
        centroids (np.ndarray): The cluster centroids.
    """
    def __init__(self, n_clusters=4, max_iters=1000):
        """
        Initializes the KMeans object.

        Parameters:
            n_clusters (int): The number of clusters to form. Defaults to 3.
            max_iters (int): The maximum number of iterations for the algorithm. Defaults to 100.
        """
        self.n_clusters = n_clusters
        self.max_iters = max_iters
        self.centroids = None

    def fit(self, X):
        """
        Fits the KMeans model to the data.

        Parameters:
            X (np.ndarray): The input data.
        """
        # TODO ...WRITE YOUR CODE HERE...
        ...

    def predict(self, X):
        """
        Predicts the cluster assignments for new data points.

        Parameters:
            X (np.ndarray): The new data points.

        Returns:
            np.ndarray: The cluster assignments for each data point.
        """
        # TODO ...WRITE YOUR CODE HERE...
        ...

    def accuracy(self, X, true_labels):
        """
        Calculates the silhouette score of the clustering.

        The silhouette score is a measure of how similar an object is to its own cluster
        compared to other clusters. A high silhouette score indicates that the objects
        are well matched to their own cluster and poorly matched to neighboring clusters.
        The score ranges from -1 to 1, where a score close to 1 indicates strong
        clustering.

        Parameters:
            X (np.ndarray): The data points.
            true_labels (np.ndarray): The true cluster labels.

        Returns:
            float: The silhouette score.
        """
        cluster_assignments = self.predict(X)
        return silhouette_score(X, np.argmax(cluster_assignments, axis=1))

    def visualize(self, X, title="K-means Clustering"):
        """
        Visualizes the clustering results.

        Parameters:
            X (np.ndarray): The data points.
            title (str): The title of the plot. Defaults to "K-means Clustering".
        """
        plt.figure(figsize=(10, 6))
        plt.scatter(X[:, 0], X[:, 1], c='lightgray', marker='o', label='Data points')
        plt.scatter(self.centroids[:, 0], self.centroids[:, 1], c='red', marker='x', s=200, label='Centroids')
        plt.title(title)
        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
        plt.legend()
        plt.show()

In [None]:
# Simple Test- Generate Data, instantiate and fit KMeans
def test_kmeans(num_clusters=4,visualize=True):
  X_train, X_test, y_train, y_test = create_dataset(num_clusters)
  kmeans = KMeans(num_clusters)
  kmeans.fit(X_train)

  # Calculate and print silhouette score for training and test sets
  train_accuracy = kmeans.accuracy(X_train, y_train)
  test_accuracy = kmeans.accuracy(X_test, y_test)
  if visualize:
    # Visualize the training results
    kmeans.visualize(X_train, title="K-means Clustering (Training Set)")
    # Visualize the test results
    kmeans.visualize(X_test, title="K-means Clustering (Test Set)")
    print(f'Silhouette Score (Training Set): {train_accuracy:.2f}')
    print(f'Silhouette Score (Test Set): {test_accuracy:.2f}')
  return train_accuracy,test_accuracy
test_kmeans()

## Feedback

Please provide us with some feedback on how long each section or this homework overall took you. Any other feedback is also welcomed.

## Submit
Great work! You're all done.

Make sure to submit this Python notebook. See the homework writeup for directions.