In [46]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import copy
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

dataset = pd.read_csv('loan.csv')


original_data = dataset.values.tolist()

attributes = dataset.columns[:-1].tolist()
target_attribute = dataset.columns[-1]

class Node(object):
    def __init__(self):
        self.value = None
        self.decision = None
        self.childs = None


def entropy(data):
    target_values = [row[-1] for row in data]
    value_counts = pd.Series(target_values).value_counts()
    total_count = len(target_values)
    entropy_value = 0
    for count in value_counts:
        probability = count / total_count
        entropy_value -= probability * math.log2(probability)
    return entropy_value


def information_gain(data, attribute_index):
    total_entropy = entropy(data)
    attribute_values = [row[attribute_index] for row in data]
    value_counts = pd.Series(attribute_values).value_counts()
    total_count = len(attribute_values)
    weighted_entropy = 0
    for value, count in value_counts.items():
        subset = [row for row in data if row[attribute_index] == value]
        weighted_entropy += (count / total_count) * entropy(subset)
    return total_entropy - weighted_entropy


def build_tree(data, available_attributes):
    target_values = [row[-1] for row in data]


    if len(set(target_values)) == 1:
        root = Node()
        root.value = target_values[0]
        return root


    if not available_attributes:
        root = Node()

        root.value = pd.Series(target_values).mode()[0]
        return root


    best_attribute_index = -1
    max_gain = -1
    original_attribute_indices = [dataset.columns.get_loc(attr) for attr in attributes]

    for attr_name in available_attributes:
        attr_index_in_original_data = dataset.columns.get_loc(attr_name)
        gain = information_gain(data, attr_index_in_original_data)
        if gain > max_gain:
            max_gain = gain
            best_attribute_index = attr_index_in_original_data

    if max_gain == 0:
        root = Node()
        root.value = pd.Series(target_values).mode()[0]
        return root


    best_attribute_name = dataset.columns[best_attribute_index]

    root = Node()
    root.value = best_attribute_name
    root.childs = []

    attribute_values = [row[best_attribute_index] for row in data]
    unique_values = set(attribute_values)

    remaining_attributes = [attr for attr in available_attributes if attr != best_attribute_name]

    for value in unique_values:
        subset = [row for row in data if row[best_attribute_index] == value]
        if not subset:
             continue
        child_node = build_tree(subset, remaining_attributes)
        child_node.decision = value
        root.childs.append(child_node)

    return root

def print_tree(node, indent=""):
    if node.decision is not None:
        print(f"{indent}Decision: {node.decision}")

    print(f"{indent}Attribute/Leaf: {node.value}")

    if node.childs:
        print(f"{indent}Children:")
        for child in node.childs:
            print_tree(child, indent + "  ")



id3_tree_root = build_tree(original_data, attributes)
print("ID3 Decision Tree for loan.csv:")
print_tree(id3_tree_root)

categorical_cols = ['Age', 'Job_Type', 'Income_Level', 'Credit_History','Loan_Amount']
target_col = dataset.columns[-1]


for col in categorical_cols:
  le = LabelEncoder()
  dataset[col] = le.fit_transform(dataset[col])

le_target = LabelEncoder()
dataset[target_col] = le_target.fit_transform(dataset[target_col])

X = dataset[categorical_cols]
y = dataset[target_col]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

rf_classifier.fit(X_train, y_train)


y_pred = rf_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"\nRandom Forest Classifier Results:")
print(f"Accuracy: {accuracy}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
class_report


ID3 Decision Tree for loan.csv:
Attribute/Leaf: Age
Children:
  Decision: Senior
  Attribute/Leaf: Income_Level
  Children:
    Decision: Low
    Attribute/Leaf: No
    Decision: Medium
    Attribute/Leaf: Yes
    Decision: High
    Attribute/Leaf: Yes
  Decision: Young
  Attribute/Leaf: Job_Type
  Children:
    Decision: Salaried
    Attribute/Leaf: Income_Level
    Children:
      Decision: Low
      Attribute/Leaf: No
      Decision: High
      Attribute/Leaf: Yes
    Decision: Self_Employed
    Attribute/Leaf: Yes
    Decision: UnEmployed
    Attribute/Leaf: Yes
  Decision: Middle_Aged
  Attribute/Leaf: No

Random Forest Classifier Results:
Accuracy: 0.6666666666666666

Confusion Matrix:
[[0 1]
 [0 2]]

Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


'              precision    recall  f1-score   support\n\n           0       0.00      0.00      0.00         1\n           1       0.67      1.00      0.80         2\n\n    accuracy                           0.67         3\n   macro avg       0.33      0.50      0.40         3\nweighted avg       0.44      0.67      0.53         3\n'

In [43]:

from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree


cart_classifier = DecisionTreeClassifier(criterion='gini', random_state=42)

cart_classifier.fit(X_train, y_train)

y_pred_cart = cart_classifier.predict(X_test)


accuracy_cart = accuracy_score(y_test, y_pred_cart)
conf_matrix_cart = confusion_matrix(y_test, y_pred_cart)
class_report_cart = classification_report(y_test, y_pred_cart)

print(f"\nCART Classifier Results:")
print(f"Accuracy: {accuracy_cart}")
print("\nConfusion Matrix:")
print(conf_matrix_cart)
print("\nClassification Report:")
class_report_cart




CART Classifier Results:
Accuracy: 1.0

Confusion Matrix:
[[1 0]
 [0 2]]

Classification Report:


'              precision    recall  f1-score   support\n\n           0       1.00      1.00      1.00         1\n           1       1.00      1.00      1.00         2\n\n    accuracy                           1.00         3\n   macro avg       1.00      1.00      1.00         3\nweighted avg       1.00      1.00      1.00         3\n'

In [45]:


def predict(tree, row, attributes):
    if tree.childs is None:
        return tree.value

    attribute_index = attributes.index(tree.value)
    attribute_value = row[attribute_index]

    for child in tree.childs:
        if child.decision == attribute_value:
            return predict(child, row, attributes)


    if tree.childs:
        return tree.childs[0].value
    else:
        return None


def evaluate_id3(tree, data, attributes):
    true_labels = [row[-1] for row in data]
    predicted_labels = []
    unique_labels = sorted(list(set(true_labels)))
    label_to_index = {label: i for i, label in enumerate(unique_labels)}

    for row in data:

        prediction_row = row[:-1]
        predicted_labels.append(predict(tree, prediction_row, attributes))


    all_labels = sorted(list(set(true_labels + predicted_labels)))
    all_label_to_index = {label: i for i, label in enumerate(all_labels)}


    correct_predictions = sum([1 for true, pred in zip(true_labels, predicted_labels) if true == pred])
    accuracy = correct_predictions / len(true_labels) if len(true_labels) > 0 else 0

    n_classes = len(all_labels)
    conf_matrix = [[0] * n_classes for _ in range(n_classes)]

    for true, pred in zip(true_labels, predicted_labels):
        true_index = all_label_to_index.get(true, -1)
        pred_index = all_label_to_index.get(pred, -1)

        if true_index != -1 and pred_index != -1:
            conf_matrix[true_index][pred_index] += 1


    return accuracy, conf_matrix, all_labels


accuracy_id3, conf_matrix_id3, labels_id3 = evaluate_id3(id3_tree_root, original_data, attributes)

print(f"\nID3 Classifier Results:")
print(f"Accuracy: {accuracy_id3}")
print("\nConfusion Matrix:")
print("Labels:", labels_id3)
for row in conf_matrix_id3:
    print(row)



ID3 Classifier Results:
Accuracy: 1.0

Confusion Matrix:
Labels: ['No', 'Yes']
[5, 0]
[0, 9]
