## AdaBoost Implementation

In [2]:
import numpy as np
%run LogisticRegression.ipynb
import import_ipynb
from LogisticRegression import logistic_regression, predict, normalize, sigmoid

importing Jupyter notebook from LogisticRegression.ipynb


In [None]:

# Precision to avoid division by 0
EPS = 1E-12

In [None]:
def adaptive_boosting(X, y, num_of_estimators):
    num_samples, num_features = X.shape
    
    # Initialize local variables
    # 𝐰, a vector of N example weights,initially 1/𝑁
    # np.full: NumPy function that creates a new array with a 
    # specified shape and fills it with a specified value.
    example_weights = np.full((num_samples), 1/num_samples)
    # 𝐡, a vector of K(num_of_estimators) hypothesis
    hypothesis = []
    # 𝐳, a vector of K(num_of_estimators) hypothesis weights
    hypothesis_weights = []
    
    print('fitting ' + str(num_of_estimators) + ' models')
    for k in range(num_of_estimators):
        # Resample input examples
        examples = np.concatenate((X, y), axis=1)
        # replace=True: This parameter allows sampling with 
        # replacement, meaning the same element can be chosen 
        # multiple times.
        # p=example_weights: example_weights is likely an array 
        # of weights associated with each example, indicating the 
        # probability of selecting each example.
        data = examples[np.random.choice(num_samples, size=num_samples, replace=True, p=example_weights)]
        
        data_X = data[:, :num_features]
        data_y = data[:, -1:]
        
        # Getting hypothesis from a weak learning algorithm
        w = logistic_regression(
            data_X, 
            data_y, 
            epochs=1000, 
            learning_rate=0.01, 
            early_stopping_threshold=0
        )
        
        # Predicting target values with hypothesis
        y_predicted = predict(X, w)
        
        # Printing accuracy of hypothesis
        print("Accuracy: ")
        print(np.sum(y == y_predicted) / num_samples)
        
        # Calculating error for hypothesis
        # Check if error is too high
        error = 0
        for i in range(num_samples):
            error += (example_weights[i] if y[i] != y_predicted[i] else 0)
        
        if error > 0.5:
            continue
        else:
            hypothesis.append(w)
            
        if error == 0:
            error = EPS
        
        # Updating example_weights
        for i in range(num_samples):
            example_weights[i] = example_weights[i] * ((error / (1 - error)) if y[i] == y_predicted[i] else 1)
        
        # Normalize example_weights
        example_weights /= np.sum(example_weights)
        
        # Updating hypothesis_weights
        hypothesis_weights.append(np.log((1 - error) / error))
    
    return hypothesis, np.array(hypothesis_weights).reshape(len(hypothesis), 1)

In [None]:
def weighted_majority(X, hypothesis, hypothesis_weights):
    num_samples = X.shape[0]
    num_hypotheses = len(hypothesis)
    
    # Normalizing inputs X
    X = normalize(X)
    
    X = np.concatenate((X, np.ones((num_samples, 1))), axis=1)
    
    # Calculating hypotheses
    y_predicts = []
    
    for i in range(num_hypotheses):
        y_predicted = (1 + sigmoid(np.dot(X, hypothesis[i]))) / 2
        y_predicts.append([1 if y_pred >= 0.5 else -1 for y_pred in y_predicted])
        
    y_predicts = np.array(y_predicts)
    
    # Calculating weighted majority hypothesis and storing predictions
    weighted_majority_hypothesis = np.dot(y_predicts.T, hypothesis_weights)
    predictions = [1 if y_pred >= 0 else 0 for y_pred in weighted_majority_hypothesis]
    
    return np.array(predictions).reshape(num_samples, 1)

In [None]:
def preformance_matrix(y_true, y_predicted):
    num_samples = y_true.shape[0]
    
    # Initializing confusion matrix values
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    
    # calculating and storing confusion matrix outcomes
    for i in range(num_samples):
        if y_true[i] == 0:
            if y_true[i] == y_predicted[i]:
                TN += 1
            else:
                FP += 1
        elif y_true[i] == 1:
            if y_true[i] == y_predicted[i]:
                TP += 1
            else:
                FN += 1
    
    # Calculating and storing Performance Measures
    accuracy = (TP + TN) / (TP + FN + TN + FP)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    specificity = TN / (TN + FP)
    false_discovery_rate = FP / (TP + FP)
    f1_score = 2 * recall * precision / (recall + precision)
    
    return (accuracy, recall, specificity, precision, false_discovery_rate, f1_score)

### Telco Customer Churn Dataset