In [2]:
import numpy as np
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt
import networkx as nx

In [3]:
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None, gain_ratio=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value
        self.gain_ratio = gain_ratio

In [4]:
class C45DecisionTree:
    def __init__(self, min_samples_split=2, max_depth=None):
        self.min_samples_split = min_samples_split
        self.max_depth = max_depth
        self.root = None
    def entropy(self,y):
        classes,counts= np.unique(y,return_counts = True)
        probs = counts/len(y)
        return -np.sum([p*np.log2(p) if p>0 else 0 for p in probs])
    def split_info(self, X, feature):
        unique_values, counts = np.unique(X[feature], return_counts=True)
        probabilities = counts / len(X)
        return -np.sum([p * np.log2(p) for p in probabilities if p > 0])
    def information_gain_ratio(self, X,y, feature):
        parent_entropy = self.entropy(y)
        weighted_entropy = 0
        values = np.unique(X[feature])
        for value in values:
            mask = X[feature] == value
            child = y[mask]
            weight = len(child)/len(y)
            weighted_entropy+=weight*self.entropy(child)
        split_info = self.split_info(X,feature)
        info_gain = parent_entropy-weighted_entropy
        if split_info==0:
            return 0
        return info_gain/split_info
    def build_tree(self,X,y):
        if len(np.unique(y))==1:
            return Node(value=y.iloc[0])
        gain_ratio = [(col , self.information_gain_ratio(X,y,col))for col in X.columns]
        best_feature = max(gain_ratio,key = lambda x:x[1])[0]

        node = Node(feature=best_feature)
        for value in [0,1]:
            mask = X[best_feature] == value
            if len(X[mask]) > 0:
                child = self.build_tree(X[mask].drop(best_feature,axis=1),y[mask])
                if value ==0 :
                    node.left = child
                else:
                    node.right = child
        return node
    def predict(self,node,x):
        if node.value is not None:
            return node.value
        return self.predict(node.left,x) if x[node.feature]==0 else self.predict(node.right,x)
    
        







In [14]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
# # Example usage
# data = {
#     'Blood_Pressure': [1,0,1,0,1,0,1,0,1,0],  # High=1, Low=0
#     'Cholesterol': [1,0,1,0,1,0,1,0,1,0],     # High=1, Normal=0
#     'Diagnosis': [1,0,1,0,1,0,1,0,1,0]        # Sick=1, Healthy=0
# }
# df = pd.DataFrame(data)
df = pd.read_csv("/Users/ayushbhakat/Desktop/sem-5/ML/Endsem/decision.csv")
for col in df.columns:
    le = LabelEncoder()
    if col in ['Blood Pressure','Cholesterol','Diagnosis']:
        df[col] = le.fit_transform(df[col])
# print(df)
df['Age'] = (df['Age'] > 45).astype(int)
print(df)
X = df.drop('Diagnosis', axis=1)
y = df['Diagnosis']

model = C45DecisionTree()
tree = model.build_tree(X,y)
test_case = pd.Series({'Blood Pressure': 1, 'Cholesterol': 1})
print(test_case)
prediction = model.predict(tree, test_case)
print(f"Prediction: {'Sick' if prediction == 0 else 'Healthy'}")

   Patient ID  Age  Blood Pressure  Cholesterol  Diagnosis
0           1    0               0            0          1
1           2    0               1            1          0
2           3    1               0            0          1
3           4    0               1            1          0
4           5    1               0            0          1
5           6    1               1            1          0
6           7    0               0            0          1
7           8    0               1            1          0
8           9    1               0            0          1
9          10    0               1            1          0
Blood Pressure    1
Cholesterol       1
dtype: int64
Prediction: Sick
