**NAMA : DWIFI PARIZZA IBRAHIM**

**NPM  : G1A021092**

**KELAS: INFORMATIKA B**

**MK   : Machine Learning**

**Kode 1: Implementasi Robinson-Foulds Distance**

In [None]:
import numpy as np

class TreeNode:
    def __init__(self, label):
        self.label = label
        self.children = []

def robinson_foulds_distance(tree1, tree2):
    leaves1 = get_leaf_labels(tree1)
    leaves2 = get_leaf_labels(tree2)

    symmetric_diff = symmetric_set_difference(leaves1, leaves2)

    distance = len(symmetric_diff) / 2.0
    return distance

def get_leaf_labels(tree):
    if not tree.children:
        return [tree.label]
    else:
        leaf_labels = []
        for child in tree.children:
            leaf_labels.extend(get_leaf_labels(child))
        return leaf_labels

def symmetric_set_difference(set1, set2):
    return list(set(set1) ^ set(set2))

print(f"\nRobinson-Foulds Distance: {distance}")


Robinson-Foulds Distance: 2.0


**Kode 2: Pengelompokan K-Means**

In [None]:
import numpy as np
from sklearn.cluster import KMeans

class TreeNode:
    def __init__(self, label):
        self.label = label
        self.children = []

def compute_distance_matrix(trees):
    n = len(trees)
    distance_matrix = np.zeros((n, n))

    for i in range(n):
        for j in range(i + 1, n):
            distance = robinson_foulds_distance(trees[i], trees[j])
            distance_matrix[i, j] = distance
            distance_matrix[j, i] = distance

    return distance_matrix

def kmeans_clustering(distance_matrix, num_clusters):
    kmeans = KMeans(n_clusters=num_clusters)
    clusters = kmeans.fit_predict(distance_matrix)
    return clusters

# Tree nodes
tree1 = TreeNode("A")
tree1.children = [TreeNode("B"), TreeNode("C")]

tree2 = TreeNode("A")
tree2.children = [TreeNode("D"), TreeNode("E")]

tree3 = TreeNode("X")
tree3.children = [TreeNode("Y"), TreeNode("Z")]

tree4 = TreeNode("X")
tree4.children = [TreeNode("W"), TreeNode("Y")]

# Example list of phylogenetic trees
tree_list = [tree1, tree2, tree3, tree4]

# Calculate Robinson-Foulds distance matrix
distance_matrix = compute_distance_matrix(tree_list)

# Determine the number of clusters (adjust as needed)
num_clusters = 2

# Use K-Means clustering algorithm
clusters = kmeans_clustering(distance_matrix, num_clusters)

# Display the clustering results
for i in range(num_clusters):
    print(f"Cluster {i + 1}:")
    cluster_indices = np.where(clusters == i)[0]
    for idx in cluster_indices:
        print(f"Tree {idx + 1}")
    print("\n")


Cluster 1:
Tree 1
Tree 2


Cluster 2:
Tree 3
Tree 4






**Kode 3: Algoritma Konsensus**

In [None]:
import numpy as np
from sklearn.cluster import KMeans

class TreeNode:
    def __init__(self, label):
        self.label = label
        self.children = []

def robinson_foulds_distance(tree1, tree2):
    leaves1 = get_leaf_labels(tree1)
    leaves2 = get_leaf_labels(tree2)

    symmetric_diff = symmetric_set_difference(leaves1, leaves2)

    distance = len(symmetric_diff) / 2.0
    return distance

def get_leaf_labels(tree):
    if not tree.children:
        return [tree.label]
    else:
        leaf_labels = []
        for child in tree.children:
            leaf_labels.extend(get_leaf_labels(child))
        return leaf_labels

def symmetric_set_difference(set1, set2):
    return list(set(set1) ^ set(set2))

def compute_distance_matrix(trees):
    n = len(trees)
    distance_matrix = np.zeros((n, n))

    for i in range(n):
        for j in range(i + 1, n):
            distance = robinson_foulds_distance(trees[i], trees[j])
            distance_matrix[i, j] = distance
            distance_matrix[j, i] = distance

    return distance_matrix

def kmeans_clustering(distance_matrix, num_clusters):
    kmeans = KMeans(n_clusters=num_clusters)
    clusters = kmeans.fit_predict(distance_matrix)
    return clusters

def consensus_tree(cluster_trees):
    # Implementasi algoritma konsensus (sederhana)
    # Pilih simpul terbanyak dari semua pohon dalam kelompok
    # dan bentuk pohon konsensus dengan simpul-simpul tersebut
    consensus_root = TreeNode("Consensus")
    consensus_root.children = majority_vote_nodes(cluster_trees)
    return consensus_root

def majority_vote_nodes(trees):
    # Memilih simpul terbanyak dari semua pohon dalam kelompok
    # Dalam kasus ini, kita menganggap setiap simpul sebagai suara
    # dan memilih simpul dengan suara terbanyak
    nodes = [node for tree in trees for node in get_all_nodes(tree)]
    node_counts = {node: nodes.count(node) for node in set(nodes)}
    majority_nodes = [node for node, count in node_counts.items() if count == max(node_counts.values())]
    return majority_nodes

def get_all_nodes(tree):
    # Mendapatkan semua simpul dari suatu pohon secara rekursif
    nodes = [tree]
    for child in tree.children:
        nodes.extend(get_all_nodes(child))
    return nodes

def is_binary_tree(tree):
    # Fungsi untuk memeriksa apakah pohon adalah pohon biner
    # Misalnya, Anda dapat menggunakan algoritma BFS atau DFS untuk memeriksa struktur pohon
    # Kembalikan True jika pohon adalah pohon biner, False sebaliknya
    # (Implementasi ini hanya contoh, Anda mungkin perlu mengadaptasi sesuai kebutuhan)
    return True

def display_tree(node, level=0, prefix="Root: "):
    if node is not None:
        print(" " * (level * 4) + prefix + node.label)
        for child in node.children:
            display_tree(child, level + 1, "Child: ")

# Tree nodes
tree1 = TreeNode("A")
tree1.children = [TreeNode("B"), TreeNode("C")]

tree2 = TreeNode("A")
tree2.children = [TreeNode("D"), TreeNode("E")]

tree3 = TreeNode("X")
tree3.children = [TreeNode("Y"), TreeNode("Z")]

tree4 = TreeNode("X")
tree4.children = [TreeNode("W"), TreeNode("Y")]

# Example list of phylogenetic trees
tree_list = [tree1, tree2, tree3, tree4]

# Calculate Robinson-Foulds distance matrix
distance_matrix = compute_distance_matrix(tree_list)

# Determine the number of clusters (adjust as needed)
num_clusters = 2

# Use K-Means clustering algorithm
clusters = kmeans_clustering(distance_matrix, num_clusters)

# Display the clustering results
for i in range(num_clusters):
    print(f"Cluster {i + 1}:")
    cluster_indices = np.where(clusters == i)[0]

    # Extract trees for the current cluster
    cluster_trees = [tree_list[idx] for idx in cluster_indices]

    # Generate consensus tree for the cluster
    consensus_tree_root = consensus_tree(cluster_trees)

    # Display consensus tree
    print("Consensus Tree:")
    display_tree(consensus_tree_root)
    print("Is Binary Tree:", is_binary_tree(consensus_tree_root))
    print("\n")


Cluster 1:
Consensus Tree:
Root: Consensus
    Child: Z
    Child: Y
    Child: W
    Child: X
        Child: Y
        Child: Z
    Child: X
        Child: W
        Child: Y
    Child: Y
Is Binary Tree: True


Cluster 2:
Consensus Tree:
Root: Consensus
    Child: A
        Child: B
        Child: C
    Child: A
        Child: D
        Child: E
    Child: D
    Child: C
    Child: E
    Child: B
Is Binary Tree: True




