In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Weighted Weak Linear Classifier
class WeightedWeakLinearClassifier:
    def __init__(self):
        self.threshold = None
        self.orientation_vector = None
        self.polarity = None

    def fit(self, X, y, weights):
        # Calculate weighted means
        pos_weights = weights[y == 1]
        neg_weights = weights[y == -1]
        pos_mean = np.average(X[y == 1], axis=0, weights=pos_weights)
        neg_mean = np.average(X[y == -1], axis=0, weights=neg_weights)

        # Orientation vector
        self.orientation_vector = pos_mean - neg_mean
        norm = np.linalg.norm(self.orientation_vector)
        self.orientation_vector /= norm

        # Project points
        projections = np.dot(X, self.orientation_vector)

        # Sort projections
        sorted_indices = np.argsort(projections)
        sorted_projections = projections[sorted_indices]
        sorted_y = y[sorted_indices]
        sorted_weights = weights[sorted_indices]

        # Find the best split
        min_error = float('inf')
        for i in range(len(sorted_projections) - 1):
            threshold = (sorted_projections[i] + sorted_projections[i + 1]) / 2
            polarity = 1
            error = np.sum(sorted_weights[(sorted_y != polarity * np.sign(sorted_projections - threshold))])
            if error < min_error:
                min_error = error
                self.threshold = threshold
                self.polarity = polarity

    def predict(self, X):
        projections = np.dot(X, self.orientation_vector)
        return self.polarity * np.sign(projections - self.threshold)



In [None]:
# AdaBoost
class AdaBoost:
    def __init__(self, n_learners):
        self.n_learners = n_learners
        self.learners = []
        self.alphas = []

    def fit(self, X, y):
        n_samples = len(y)
        weights = np.ones(n_samples) / n_samples
        for _ in range(self.n_learners):
            # Train weak learner
            learner = WeightedWeakLinearClassifier()
            learner.fit(X, y, weights)
            predictions = learner.predict(X)

            # Calculate weighted error
            error = np.sum(weights[predictions != y])
            if error > 0.5:
                break

            # Calculate alpha and update weights
            alpha = 0.5 * np.log((1 - error) / error)
            weights *= np.exp(-alpha * y * predictions)
            weights /= np.sum(weights)

            # Store learner and alpha
            self.learners.append(learner)
            self.alphas.append(alpha)

    def predict(self, X):
        final_prediction = np.zeros(X.shape[0])
        for learner, alpha in zip(self.learners, self.alphas):
            final_prediction += alpha * learner.predict(X)
        return np.sign(final_prediction)



In [None]:
# Visualization and Evaluation
def evaluate_adaboost(X_train, y_train, X_test, y_test, n_learners):
    adaboost = AdaBoost(n_learners)
    adaboost.fit(X_train, y_train)

    train_predictions = adaboost.predict(X_train)
    test_predictions = adaboost.predict(X_test)

    train_accuracy = np.mean(train_predictions == y_train)
    test_accuracy = np.mean(test_predictions == y_test)

    return train_accuracy, test_accuracy



In [None]:
# Load dataset
train_data = np.loadtxt('/content/adaboost-train-24.txt')
test_data = np.loadtxt('/content/adaboost-test-24.txt')
X_train, y_train = train_data[:, :-1], train_data[:, -1]
X_test, y_test = test_data[:, :-1], test_data[:, -1]



FileNotFoundError: /content/adaboost-train-24.txt not found.

In [None]:
# Evaluate
# n_learners = 50
# train_accuracies, test_accuracies = [], []
# for n in range(1, n_learners + 1):
#     train_acc, test_acc = evaluate_adaboost(X_train, y_train, X_test, y_test, n)
#     train_accuracies.append(train_acc)
#     test_accuracies.append(test_acc)



# Evaluate the model and collect accuracy trends
n_learners = 50  # Maximum number of learners to analyze
train_accuracies = []
test_accuracies = []

for n in range(1, n_learners + 1):
    adaboost = AdaBoost(n_learners=n)
    adaboost.fit(X_train, y_train)

    # Predictions for training and testing sets
    train_predictions = adaboost.predict(X_train)
    test_predictions = adaboost.predict(X_test)

    # Calculate accuracies
    train_accuracy = np.mean(train_predictions == y_train) * 100
    test_accuracy = np.mean(test_predictions == y_test) * 100

    # Append results
    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)

# Print the trends in tabular format
print(f"{'Number of Learners':<20}{'Training Accuracy (%)':<25}{'Testing Accuracy (%)':<25}")
print("-" * 70)
for i in range(n_learners):
    print(f"{i+1:<20}{train_accuracies[i]:<25.2f}{test_accuracies[i]:<25.2f}")



In [None]:
# Plot accuracy
plt.plot(range(1, n_learners + 1), train_accuracies, label="Train Accuracy")
plt.plot(range(1, n_learners + 1), test_accuracies, label="Test Accuracy")
plt.xlabel("Number of Weak Learners")
plt.ylabel("Accuracy")
plt.legend()
plt.show()
