In [None]:
import numpy as np
import pandas as pd
from google.colab import drive

# **1. DATASET LOADING AND PREPARATION**

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

# Load the preprocessed dataset
data = pd.read_csv('drive/My Drive/iris_processed_standardized.csv')

# Extract features and target
features = ['sepal.length', 'sepal.width', 'petal.length', 'petal.width']
X = data[features].values
y = data['target'].values

def split_data(X, y, train_size, random_state=23):
    """
    Split the dataset into training and testing sets
    """
    # Set random seed for reproducibility
    np.random.seed(random_state)

    # Generate shuffled indices
    indices = np.random.permutation(len(X))

    # Calculate split point
    split_point = int(len(X) * train_size)

    # Split the data
    train_indices = indices[:split_point]
    test_indices = indices[split_point:]

    X_train = X[train_indices]
    y_train = y[train_indices]
    X_test = X[test_indices]
    y_test = y[test_indices]

    print(f"\nTraining set size: {len(X_train)}")
    print(f"Testing set size: {len(X_test)}")

    return X_train, X_test, y_train, y_test

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **2. MULTI-CLASS PERCEPTRON IMPLEMENTATION**

In [None]:

class MultiClassPerceptron:
    def __init__(self, n_classes, n_features, learning_rate=0.01, random_weights=False):
        """
        Initialize Multi-Class Perceptron classifier
        """
        self.n_classes = n_classes
        self.n_features = n_features
        self.learning_rate = learning_rate

        # Initialize weights
        if random_weights:
            self.weights = np.random.uniform(-0.5, 0.5, (n_classes, n_features + 1))
        else:
            self.weights = np.zeros((n_classes, n_features + 1))

        # Initialize history for tracking convergence
        self.history = []

    def add_bias(self, X):
        """Add bias term to features"""
        return np.hstack((X, np.ones((X.shape[0], 1))))

    def predict(self, X):
        """Predict class labels for samples in X"""
        X_bias = self.add_bias(X)
        scores = np.dot(X_bias, self.weights.T)
        return np.argmax(scores, axis=1)

    def fit(self, X, y):
        """
        Train the perceptron until convergence
        """
        X_bias = self.add_bias(X)
        n_samples = X_bias.shape[0]
        epoch = 0
        n_misclassified = 1

        while n_misclassified > 0:
            n_misclassified = 0
            epoch += 1

            # Shuffle the training data
            permutation = np.random.permutation(n_samples)
            for i in permutation:
                # Get true and predicted classes
                true_class = y[i]
                scores = np.dot(X_bias[i], self.weights.T)
                predicted_class = np.argmax(scores)

                # Update weights if prediction is wrong
                if predicted_class != true_class:
                    n_misclassified += 1
                    self.weights[true_class] += self.learning_rate * X_bias[i]
                    self.weights[predicted_class] -= self.learning_rate * X_bias[i]

            # Store training progress
            accuracy = 1 - (n_misclassified / n_samples)
            self.history.append({
                'epoch': epoch,
                'accuracy': accuracy,
                'misclassified': n_misclassified
            })

            # Break if taking too long to converge
            if epoch >= 1000:
                print("Warning: Maximum epochs (1000) reached")
                break

        print(f"Training completed in {epoch} epochs")
        return self

# **3. EVALUATION METRICS**

In [None]:
def calculate_metrics(y_true, y_pred):
    """
    Calculate accuracy and confusion matrix
    """
    # Calculate accuracy
    accuracy = np.mean(y_true == y_pred)

    # Define class names
    class_names = ['setosa', 'versicolor', 'virginica']

    # Calculate confusion matrix
    n_classes = len(class_names)
    conf_matrix = np.zeros((n_classes, n_classes), dtype=int)

    for i in range(len(y_true)):
        conf_matrix[y_true[i]][y_pred[i]] += 1

    # Create formatted confusion matrix
    conf_df = pd.DataFrame(
        conf_matrix,
        index=[f'True {name}' for name in class_names],
        columns=[f'Pred {name}' for name in class_names]
    )

    print("\nConfusion Matrix:")
    print(conf_df)
    print("\nAccuracy: {:.2%}".format(accuracy))

    return accuracy, conf_matrix

# **4. EXPERIMENT RUNNER**

In [None]:
def run_experiments():
    """
    Run all experiments with different configurations and return results
    """
    # Different train-test splits
    split_ratios = [0.8, 0.7, 0.6]
    learning_rates = [0.01, 0.1]
    weight_inits = [False, True]  # False for zeros, True for random

    results = []
    n_classes = len(np.unique(y))
    n_features = X.shape[1]

    print("\n" + "="*80)
    print("RUNNING EXPERIMENTS")
    print("="*80)

    for split in split_ratios:
        X_train, X_test, y_train, y_test = split_data(X, y, train_size=split)

        for lr in learning_rates:
            for random_weights in weight_inits:
                print(f"\nConfiguration:")
                print(f"- Train-Test Split: {split*100:.0f}%-{(1-split)*100:.0f}%")
                print(f"- Learning Rate: {lr}")
                print(f"- Weight Initialization: {'Random' if random_weights else 'Zeros'}")

                # Train model
                model = MultiClassPerceptron(
                    n_classes=n_classes,
                    n_features=n_features,
                    learning_rate=lr,
                    random_weights=random_weights
                )
                model.fit(X_train, y_train)

                # Evaluate
                y_pred = model.predict(X_test)
                accuracy, conf_matrix = calculate_metrics(y_test, y_pred)

                # Store results
                results.append({
                    'split_ratio': split,
                    'learning_rate': lr,
                    'random_weights': random_weights,
                    'epochs': len(model.history),
                    'final_accuracy': accuracy,
                    'history': model.history
                })

                print(f"Epochs to converge: {len(model.history)}")
                print(f"Test Accuracy: {accuracy:.4f}")

    return results

# **5. MAIN EXECUTION**

In [None]:
def analyze_results(results):
    """
    Analyze and display experiment results in a structured format
    """
    print("\n" + "="*80)
    print("EXPERIMENT RESULTS SUMMARY")
    print("="*80)

    # Convert results list to DataFrame for display
    df_results = pd.DataFrame([
        {
            'Split Ratio': f"{r['split_ratio']*100:.0f}%-{(1-r['split_ratio'])*100:.0f}%",
            'Learning Rate': r['learning_rate'],
            'Weight Init': 'Random' if r['random_weights'] else 'Zeros',
            'Epochs': r['epochs'],
            'Test Accuracy': f"{r['final_accuracy']:.2%}"
        } for r in results
    ])

    # Display full results table
    print("\nAll Experiments:")
    print(df_results.to_string(index=False))

    # Analyze effect of different parameters
    print("\n" + "="*80)
    print("PARAMETER ANALYSIS")
    print("="*80)

    # Group results by split ratio
    print("\n1. Effect of Dataset Splitting:")
    split_groups = {}
    for split in [0.8, 0.7, 0.6]:
        split_results = [r for r in results if r['split_ratio'] == split]
        avg_epochs = np.mean([r['epochs'] for r in split_results])
        avg_acc = np.mean([r['final_accuracy'] for r in split_results])
        split_groups[f"{split*100:.0f}%-{(1-split)*100:.0f}%"] = {
            'Avg Epochs': f"{avg_epochs:.1f}",
            'Avg Accuracy': f"{avg_acc:.2%}"
        }
    print(pd.DataFrame(split_groups).T)

    # Group results by learning rate
    print("\n2. Effect of Learning Rate:")
    lr_groups = {}
    for lr in [0.01, 0.1]:
        lr_results = [r for r in results if r['learning_rate'] == lr]
        avg_epochs = np.mean([r['epochs'] for r in lr_results])
        avg_acc = np.mean([r['final_accuracy'] for r in lr_results])
        lr_groups[lr] = {
            'Avg Epochs': f"{avg_epochs:.1f}",
            'Avg Accuracy': f"{avg_acc:.2%}"
        }
    print(pd.DataFrame(lr_groups).T)

    # Group results by weight initialization
    print("\n3. Effect of Weight Initialization:")
    init_groups = {}
    for init in [False, True]:
        init_results = [r for r in results if r['random_weights'] == init]
        avg_epochs = np.mean([r['epochs'] for r in init_results])
        avg_acc = np.mean([r['final_accuracy'] for r in init_results])
        init_groups['Random' if init else 'Zeros'] = {
            'Avg Epochs': f"{avg_epochs:.1f}",
            'Avg Accuracy': f"{avg_acc:.2%}"
        }
    print(pd.DataFrame(init_groups).T)

    # Find best configuration
    best_result = max(results, key=lambda x: x['final_accuracy'])

    print("\n" + "="*80)
    print("BEST CONFIGURATION")
    print("="*80)
    print(f"Train-Test Split: {best_result['split_ratio']*100:.0f}%-{(1-best_result['split_ratio'])*100:.0f}%")
    print(f"Learning Rate: {best_result['learning_rate']}")
    print(f"Weight Initialization: {'Random' if best_result['random_weights'] else 'Zeros'}")
    print(f"Epochs Required: {best_result['epochs']}")
    print(f"Testing Accuracy: {best_result['final_accuracy']:.2%}")

# Main execution
if __name__ == "__main__":
    # Run experiments
    results_df = run_experiments()

    # Analyze results
    analyze_results(results_df)


RUNNING EXPERIMENTS

Training set size: 120
Testing set size: 30

Configuration:
- Train-Test Split: 80%-20%
- Learning Rate: 0.01
- Weight Initialization: Zeros
Training completed in 133 epochs

Confusion Matrix:
                 Pred setosa  Pred versicolor  Pred virginica
True setosa               13                0               0
True versicolor            0                7               1
True virginica             0                0               9

Accuracy: 96.67%
Epochs to converge: 133
Test Accuracy: 0.9667

Configuration:
- Train-Test Split: 80%-20%
- Learning Rate: 0.01
- Weight Initialization: Random
Training completed in 24 epochs

Confusion Matrix:
                 Pred setosa  Pred versicolor  Pred virginica
True setosa               12                1               0
True versicolor            0                7               1
True virginica             0                0               9

Accuracy: 93.33%
Epochs to converge: 24
Test Accuracy: 0.9333

Configuratio