<a href="https://colab.research.google.com/github/Shreyacy/ML-LABB/blob/main/ml_lab_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import pandas as pd
import numpy as np
from collections import Counter


file_path = "/content/WEATHER.csv"
df = pd.read_csv(file_path)

print("Dataset Loaded Successfully:\n")
print(df.head())
def entropy(data):
    """Calculate entropy of the target column."""
    labels = data.iloc[:, -1]
    label_counts = Counter(labels)
    total_instances = len(data)

    entropy_value = -sum((count / total_instances) * np.log2(count / total_instances) for count in label_counts.values())
    return entropy_value
dataset_entropy = entropy(df)
print(f"\nEntropy of the dataset: {dataset_entropy:.4f}")

def entropy_of_attribute(data, attribute):
    """Calculate entropy for an attribute based on the target column."""
    attribute_values = data[attribute].unique()
    entropy_values = {}

    for value in attribute_values:
        subset = data[data[attribute] == value]
        entropy_values[value] = entropy(subset)

    return entropy_values


ignore_columns = ["Day"]  # Ignore specific columns if needed
attribute_entropies = {}

for attr in df.columns[:-1]:  # Exclude target column
    if attr in ignore_columns:
        continue

    attribute_entropies[attr] = entropy_of_attribute(df, attr)
    print(f"\nAttribute: {attr}")
    for val, ent in attribute_entropies[attr].items():
        print(f" - {val}: {ent:.4f}")

# Step 6: Function to calculate information gain
def information_gain(data, attribute):
    """Calculate information gain for a given attribute."""
    total_entropy = entropy(data)

    attribute_values = data[attribute].unique()
    total_instances = len(data)

    weighted_entropy = 0
    for value in attribute_values:
        subset = data[data[attribute] == value]
        prob = len(subset) / total_instances
        weighted_entropy += prob * entropy(subset)

    gain = total_entropy - weighted_entropy
    return gain

# Step 7: Compute information gain for each attribute
attributes = df.columns[:-1]  # Exclude target column
gains = {attr: information_gain(df, attr) for attr in attributes if attr not in ignore_columns}

print("\nInformation Gain for Each Attribute:")
for attr, gain in gains.items():
    print(f"{attr}: {gain:.4f}")

# Step 8: Function to build the ID3 decision tree
def id3(data, features, target):
    """Recursively builds a decision tree using ID3."""
    # If all target values are the same, return the value (pure leaf node)
    if len(np.unique(data[target])) == 1:
        return np.unique(data[target])[0]

    # If no features left, return the most common target value
    if len(features) == 0:
        return data[target].mode()[0]

    # Select the best feature using information gain
    best_feature = max(features, key=lambda attr: information_gain(data, attr))
    tree = {best_feature: {}}

    # Create a subtree for each unique value of the best feature
    for value in np.unique(data[best_feature]):
        subset = data[data[best_feature] == value]
        subtree = id3(subset, [f for f in features if f != best_feature], target)
        tree[best_feature][value] = subtree

    return tree

# Step 9: Train the ID3 decision tree
features = list(df.columns[:-1])  # Exclude target column
target = df.columns[-1]  # Target column
decision_tree = id3(df, features, target)

# Step 10: Print the Decision Tree in a structured way
def print_tree(tree, depth=0):
    """Recursively prints the decision tree row-wise with proper indentation."""
    if not isinstance(tree, dict):
        print("  " * depth + f"⮕ Decision: {tree}")
        return

    for attribute, branches in tree.items():
        print("  " * depth + f"[{attribute}]")
        for value, subtree in branches.items():
            print("  " * (depth + 1) + f"→ {value}:")
            print_tree(subtree, depth + 2)

print("\n=== Decision Tree (Row-wise) ===")
print_tree(decision_tree)


Dataset Loaded Successfully:

   DAY   OUTLOOK TEMPERTATURE HUMIDITY    WIND DECISION
0    1     SUNNY          HOT     HIGH    WEAK       NO
1    2     SUNNY         HOIT     HIGH  STRONG       NO
2    3  OVERCAST          HOT     HIGH    WEAK      YES
3    4      RAIN         MILD     HIGH    WEAK      YES
4    5      RAIN         COOL   NORMAL    WEAK      YES

Entropy of the dataset: 0.9403

Attribute: DAY
 - 1: -0.0000
 - 2: -0.0000
 - 3: -0.0000
 - 4: -0.0000
 - 5: -0.0000
 - 6: -0.0000
 - 7: -0.0000
 - 8: -0.0000
 - 9: -0.0000
 - 10: -0.0000
 - 11: -0.0000
 - 12: -0.0000
 - 13: -0.0000
 - 14: -0.0000

Attribute: OUTLOOK
 - SUNNY: 0.9710
 - OVERCAST: -0.0000
 - RAIN: 0.9710

Attribute: TEMPERTATURE
 - HOT: 0.9183
 - HOIT: -0.0000
 - MILD: 0.9183
 - COOL: 0.8113

Attribute: HUMIDITY
 - HIGH: 0.9852
 - NORMAL: 0.5917

Attribute: WIND
 - WEAK: 0.8113
 - STRONG: 1.0000

Information Gain for Each Attribute:
DAY: 0.9403
OUTLOOK: 0.2467
TEMPERTATURE: 0.1182
HUMIDITY: 0.1518
WIND: 0.0481