## Problem 2

In [None]:
# Imports
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

Let's start by creating a new class for this system.

In [72]:
def softmax(x):
    return np.exp(x) /  np.exp(x).sum()

class WeightedAverageEnsemble:
    experts = None
    weights = None
    X_train = None
    X_validation = None
    y_validation = None
    y_train = None

    def __init__(self, experts : list):
        self.experts = experts

    def fit(self, X_train, y_train):
        self.X_train, self.X_validation, self.y_train, self.y_validation = train_test_split(X_train, y_train, test_size=0.2)
        accuracies = []
        for model in self.experts:
            model.fit(self.X_train, self.y_train)
            model_preds = model.predict(self.X_validation)
            accuracies.append(accuracy_score(model_preds, self.y_validation))
        self.weights = softmax(accuracies)

    def predict(self, X_test):
        expert_predictions = []
        for model in self.experts:
            model_preds = model.predict(X_test)
            expert_predictions.append(model_preds)
        previsions = np.dot(self.weights, expert_predictions)
        return [round(prev) for prev in previsions]

Import the dataset and scale the data

In [57]:
# Loading the dataset
X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=212)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [70]:
system_1 = WeightedAverageEnsemble([DecisionTreeClassifier(criterion="entropy"), DecisionTreeClassifier(criterion="gini")])
system_1.fit(X_train, y_train)
preds = system_1.predict(X_test)
print(accuracy_score(preds, y_test))

0.9035087719298246


In [None]:

system_1 = WeightedAverageEnsemble([])
system_1.fit(X_train, y_train)
preds = system_1.predict(X_test)
print(accuracy_score(preds, y_test))