<a href="https://colab.research.google.com/github/JIJASH/data_mining_and_warehousing/blob/main/lab_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lab III: ID3, Naive Bayes, Backpropagation

In [14]:
import csv, math, random

# Load data from CSV file
def load_csv(filename):
    data = []
    with open(filename, 'r') as file:
        csv_reader = csv.DictReader(file)
        for row in csv_reader:
            data.append(row)
    return data

# Try different possible paths for the CSV file
paths = ['/content/drive/MyDrive/data_mining_and_warehousing/laptop_buy_data.csv']


data = None
for path in paths:
    try:
        data = load_csv(path)
        print(f"Data loaded from {path}: {len(data)} records")
        break
    except FileNotFoundError:
        continue

if data is None:
    print("CSV file not found. Please upload laptop_buy_data.csv or check the path.")
    # Fallback to sample data
    data = [
        {'Age': 'Youth', 'Income': 'Low', 'Student': 'Yes', 'Credit_Rating': 'Excellent', 'Class': 'Buy'},
        {'Age': 'Youth', 'Income': 'Low', 'Student': 'No', 'Credit_Rating': 'Excellent', 'Class': 'No'},
        {'Age': 'Middle_Aged', 'Income': 'Low', 'Student': 'No', 'Credit_Rating': 'Excellent', 'Class': 'No'},
        {'Age': 'Senior', 'Income': 'Low', 'Student': 'No', 'Credit_Rating': 'Fair', 'Class': 'No'},
        {'Age': 'Middle_Aged', 'Income': 'Low', 'Student': 'Yes', 'Credit_Rating': 'Excellent', 'Class': 'No'},
        {'Age': 'Middle_Aged', 'Income': 'Low', 'Student': 'Yes', 'Credit_Rating': 'Fair', 'Class': 'Buy'},
        {'Age': 'Youth', 'Income': 'Medium', 'Student': 'No', 'Credit_Rating': 'Fair', 'Class': 'Buy'},
        {'Age': 'Senior', 'Income': 'High', 'Student': 'Yes', 'Credit_Rating': 'Fair', 'Class': 'No'},
        {'Age': 'Senior', 'Income': 'Low', 'Student': 'No', 'Credit_Rating': 'Excellent', 'Class': 'Buy'},
        {'Age': 'Youth', 'Income': 'High', 'Student': 'Yes', 'Credit_Rating': 'Fair', 'Class': 'Buy'}
    ]
    print(f"Using sample data: {len(data)} records")

Data loaded from /content/drive/MyDrive/data_mining_and_warehousing/laptop_buy_data.csv: 20 records


In [15]:
# 1. ID3 Algorithm
def entropy(data):
    classes = {}
    for row in data:
        classes[row['Class']] = classes.get(row['Class'], 0) + 1
    total, entropy = len(data), 0
    for count in classes.values():
        p = count / total
        entropy -= p * math.log2(p)
    return entropy

def info_gain(data, attr):
    total_entropy = entropy(data)
    values = set(row[attr] for row in data)
    weighted_entropy = 0
    for value in values:
        subset = [row for row in data if row[attr] == value]
        weighted_entropy += (len(subset) / len(data)) * entropy(subset)
    return total_entropy - weighted_entropy

def build_tree(data, attrs):
    classes = [row['Class'] for row in data]
    if len(set(classes)) == 1: return classes[0]
    if not attrs: return max(set(classes), key=classes.count)

    best_attr = max(attrs, key=lambda attr: info_gain(data, attr))
    tree = {best_attr: {}}
    values = set(row[best_attr] for row in data)
    for value in values:
        subset = [row for row in data if row[best_attr] == value]
        tree[best_attr][value] = build_tree(subset, [a for a in attrs if a != best_attr])
    return tree

def predict_id3(tree, instance):
    if isinstance(tree, str): return tree
    attr = list(tree.keys())[0]
    return predict_id3(tree[attr][instance[attr]], instance)

# Train and test ID3
id3_tree = build_tree(data, ['Age', 'Income', 'Student', 'Credit_Rating'])
correct = sum(1 for row in data if predict_id3(id3_tree, row) == row['Class'])
print(f"ID3 Accuracy: {correct}/{len(data)} = {correct/len(data):.2%}")

ID3 Accuracy: 18/20 = 90.00%


In [16]:
# 2. Naive Bayes Algorithm
class NaiveBayes:
    def __init__(self):
        self.priors, self.likelihoods = {}, {}

    def train(self, data):
        classes = [row['Class'] for row in data]
        total = len(data)
        # Priors
        for c in set(classes):
            self.priors[c] = classes.count(c) / total

        # Likelihoods
        features = ['Age', 'Income', 'Student', 'Credit_Rating']
        for c in self.priors:
            self.likelihoods[c] = {}
            class_data = [row for row in data if row['Class'] == c]
            for feature in features:
                self.likelihoods[c][feature] = {}
                feature_values = set(row[feature] for row in data)
                for value in feature_values:
                    count = sum(1 for row in class_data if row[feature] == value)
                    self.likelihoods[c][feature][value] = (count + 1) / (len(class_data) + len(feature_values))

    def predict(self, instance):
        scores = {}
        for c in self.priors:
            score = self.priors[c]
            for feature in ['Age', 'Income', 'Student', 'Credit_Rating']:
                score *= self.likelihoods[c][feature].get(instance[feature], 1e-6)
            scores[c] = score
        return max(scores, key=scores.get)

# Train and test Naive Bayes
nb = NaiveBayes()
nb.train(data)
correct = sum(1 for row in data if nb.predict(row) == row['Class'])
print(f"Naive Bayes Accuracy: {correct}/{len(data)} = {correct/len(data):.2%}")

Naive Bayes Accuracy: 12/20 = 60.00%


In [17]:
# 3. Backpropagation Neural Network
class NeuralNetwork:
    def __init__(self):
        self.w1 = [[random.uniform(-1, 1) for _ in range(3)] for _ in range(2)]
        self.w2 = [[random.uniform(-1, 1)] for _ in range(3)]
        self.b1 = [random.uniform(-1, 1) for _ in range(3)]
        self.b2 = [random.uniform(-1, 1)]
        self.lr = 0.5

    def tanh(self, x): return math.tanh(x)
    def tanh_derivative(self, x): return 1 - math.tanh(x) ** 2

    def forward(self, inputs):
        self.z1 = [self.b1[i] + sum(inputs[j] * self.w1[j][i] for j in range(2)) for i in range(3)]
        self.a1 = [self.tanh(z) for z in self.z1]
        self.z2 = [self.b2[0] + sum(self.a1[i] * self.w2[i][0] for i in range(3))]
        self.a2 = [self.tanh(self.z2[0])]
        return self.a2[0]

    def backward(self, inputs, target):
        output_error = (target - self.a2[0]) * self.tanh_derivative(self.z2[0])
        hidden_errors = [output_error * self.w2[i][0] * self.tanh_derivative(self.z1[i]) for i in range(3)]

        for i in range(3):
            self.w2[i][0] += self.lr * output_error * self.a1[i]
        self.b2[0] += self.lr * output_error

        for i in range(2):
            for j in range(3):
                self.w1[i][j] += self.lr * hidden_errors[j] * inputs[i]
        for i in range(3):
            self.b1[i] += self.lr * hidden_errors[i]

    def train(self, training_data, epochs=1000):
        for epoch in range(epochs):
            for inputs, target in training_data:
                self.forward(inputs)
                self.backward(inputs, target)

# XOR data and training
xor_data = [([-1, -1], -1), ([-1, 1], 1), ([1, -1], 1), ([1, 1], -1)]
nn = NeuralNetwork()
nn.train(xor_data, 2000)

# Test
correct = 0
for inputs, target in xor_data:
    output = nn.forward(inputs)
    predicted = 1 if output > 0 else -1
    correct += (predicted == target)
    print(f"Input: {inputs}, Target: {target}, Output: {output:.3f}, Predicted: {predicted}")
print(f"Neural Network Accuracy: {correct}/{len(xor_data)} = {correct/len(xor_data):.2%}")

Input: [-1, -1], Target: -1, Output: -0.992, Predicted: -1
Input: [-1, 1], Target: 1, Output: 0.989, Predicted: 1
Input: [1, -1], Target: 1, Output: 0.988, Predicted: 1
Input: [1, 1], Target: -1, Output: -0.992, Predicted: -1
Neural Network Accuracy: 4/4 = 100.00%
