<a href="https://colab.research.google.com/github/Shena10/CSI_PY_Project1/blob/main/Copy_of_Homework1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title Default title text
# Import necessary libraries
import pandas as pd
from sklearn.datasets import load_iris

# Load the Iris dataset
iris = load_iris()

# Convert the dataset to a DataFrame for easier exploration
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target

# Display the first few rows of the dataset
print(df.head())

# Check for missing values
print(df.isnull().sum())

# Split data into features (X) and target (y)
X = df.drop('target', axis=1)
y = df['target']

# Split the data into training and testing sets (70% training, 30% testing)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print("Training set size:", len(X_train))
print("Testing set size:", len(X_test))

# Import Decision Tree and Perceptron classifiers
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import Perceptron

# Initialize the models
dt_classifier = DecisionTreeClassifier(random_state=42)
perceptron_classifier = Perceptron()

# Train the classifiers
dt_classifier.fit(X_train, y_train)
perceptron_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred_dt = dt_classifier.predict(X_test)
y_pred_perceptron = perceptron_classifier.predict(X_test)

# Import metrics
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix

# Decision Tree evaluation
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='macro')
confusion_dt = confusion_matrix(y_test, y_pred_dt)

print("Decision Tree Accuracy:", accuracy_dt)
print("Decision Tree Precision:", precision_dt)
print("Decision Tree Confusion Matrix:\n", confusion_dt)

# Perceptron evaluation
accuracy_perceptron = accuracy_score(y_test, y_pred_perceptron)
precision_perceptron = precision_score(y_test, y_pred_perceptron, average='macro')
confusion_perceptron = confusion_matrix(y_test, y_pred_perceptron)

print("Perceptron Accuracy:", accuracy_perceptron)
print("Perceptron Precision:", precision_perceptron)
print("Perceptron Confusion Matrix:\n", confusion_perceptron)

import numpy as np

# Function to calculate Gini impurity
def gini_impurity(labels):
    _, counts = np.unique(labels, return_counts=True)
    probabilities = counts / len(labels)
    return 1 - np.sum(probabilities ** 2)

# Function to split dataset
def split_dataset(X, y, feature_index, threshold):
    left = np.where(X[:, feature_index] <= threshold)
    right = np.where(X[:, feature_index] > threshold)
    return X[left], X[right], y[left], y[right]

# Function to find the best split
def best_split(X, y):
    best_gini = float('inf')
    for feature_index in range(X.shape[1]):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            X_left, X_right, y_left, y_right = split_dataset(X, y, feature_index, threshold)
            gini = (len(y_left) / len(y)) * gini_impurity(y_left) + (len(y_right) / len(y)) * gini_impurity(y_right)
            if gini < best_gini:
                best_gini, best_feature, best_threshold = gini, feature_index, threshold
    return best_feature, best_threshold

# A very basic recursive tree algorithm (no pruning)
def decision_tree(X, y, depth=0, max_depth=3):
    if len(np.unique(y)) == 1 or depth == max_depth:
        return np.argmax(np.bincount(y))
    feature_index, threshold = best_split(X, y)
    X_left, X_right, y_left, y_right = split_dataset(X, y, feature_index, threshold)
    left_subtree = decision_tree(X_left, y_left, depth + 1, max_depth)
    right_subtree = decision_tree(X_right, y_right, depth + 1, max_depth)
    return {'feature_index': feature_index, 'threshold': threshold, 'left': left_subtree, 'right': right_subtree}

# Example usage (for small datasets):
tree = decision_tree(X_train.values, y_train.values)

class PerceptronScratch:
    def __init__(self, lr=0.01, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self._activation_function(linear_output)

                update = self.lr * (y[idx] - y_predicted)
                self.weights += update * x_i
                self.bias += update

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        return self._activation_function(linear_output)

    def _activation_function(self, x):
        return np.where(x >= 0, 1, 0)

# Initialize and train
perceptron_scratch = PerceptronScratch()
perceptron_scratch.fit(X_train.values, y_train.values)
y_pred_scratch = perceptron_scratch.predict(X_test.values)

class PerceptronScratch:
    def __init__(self, lr=0.01, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self._activation_function(linear_output)

                update = self.lr * (y[idx] - y_predicted)
                self.weights += update * x_i
                self.bias += update

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        return self._activation_function(linear_output)

    def _activation_function(self, x):
        return np.where(x >= 0, 1, 0)

# Initialize and train
perceptron_scratch = PerceptronScratch()
perceptron_scratch.fit(X_train.values, y_train.values)
y_pred_scratch = perceptron_scratch.predict(X_test.values)

# Evaluate the Decision Tree and Perceptron implemented from scratch
accuracy_scratch = accuracy_score(y_test, y_pred_scratch)
print("Perceptron (from scratch) Accuracy:", accuracy_scratch)

def predict_tree(tree, X):
    if isinstance(tree, dict):
        feature_index = tree['feature_index']
        threshold = tree['threshold']
        if X[feature_index] <= threshold:
            return predict_tree(tree['left'], X)
        else:
            return predict_tree(tree['right'], X)
    else:
        return tree
# Predict on the test set using the decision tree from scratch
y_pred_tree = [predict_tree(tree, x) for x in X_test.values]

# Evaluate the Decision Tree implemented from scratch
accuracy_tree = accuracy_score(y_test, y_pred_tree)
print("Decision Tree (from scratch) Accuracy:", accuracy_tree)

# A very basic recursive tree algorithm (no pruning)
def decision_tree(X, y, depth=0, max_depth=3):
    if len(np.unique(y)) == 1 or depth == max_depth:
        return np.argmax(np.bincount(y))
    feature_index, threshold = best_split(X, y)
    X_left, X_right, y_left, y_right = split_dataset(X, y, feature_index, threshold)
    left_subtree = decision_tree(X_left, y_left, depth + 1, max_depth)
    right_subtree = decision_tree(X_right, y_right, depth + 1, max_depth)
    return {'feature_index': feature_index, 'threshold': threshold, 'left': left_subtree, 'right': right_subtree}

# Example usage (for small datasets):
tree = decision_tree(X_train.values, y_train.values)

# Prediction function for Decision Tree
def predict_tree(tree, X):
    if isinstance(tree, dict):
        feature_index = tree['feature_index']
        threshold = tree['threshold']
        if X[feature_index] <= threshold:
            return predict_tree(tree['left'], X)
        else:
            return predict_tree(tree['right'], X)
    else:
        return tree

# Predict on the test set using the decision tree from scratch
y_pred_tree = [predict_tree(tree, x) for x in X_test.values]

# Evaluate the Decision Tree implemented from scratch
accuracy_tree = accuracy_score(y_test, y_pred_tree)
print("Decision Tree (from scratch) Accuracy:", accuracy_tree)










   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  
sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
target               0
dtype: int64
Training set size: 105
Testing set size: 45
Decision Tree Accuracy: 1.0
Decision Tree Precision: 1.0
Decision Tree Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
Perceptron Accuracy: 0.8222222222222222
Perceptron Precision: 0.873015873015873
Perceptron Confusion Matrix:
 [[19  0  0]
 [ 0  5  8]
 [ 0  0 13]]
Percept