In [None]:
from random import random
from statistics import mean, mode
from numpy.random import randint
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score

## Define dataset

In [None]:
class_0 = [0 for _ in range(2)]
class_1 = [1 for _ in range(8)]

y = class_0 + class_1

print('Class 0: {:.1f}%'.format(len(class_0) / len(y) * 100))
print('Class 1: {:.1f}%'.format(len(class_1) / len(y) * 100))

## General Formula

$ P(\hat{y} = y) = P(\hat{y} = 0) * P(y = 0) + P(\hat{y} = 1) * P(y = 1) $

## 1. Random-guess strategy

$
\begin{align}
    P(\hat{y} = y) & = P(\hat{y} = 0) * P(y = 0) + P(\hat{y} = 1) * P(y = 1) \\
                   & = 0.5 * 0.2 + 0.5 * 0.8 \\
                   & = 0.1 + 0.4 \\
                   & = 0.5
\end{align}
$

In [None]:
def random_guess():
    return int(random() < 0.5)

y_hat = [random_guess() for _ in y]

print('y =', y)
print('y_hat =', y_hat)

acc = accuracy_score(y, y_hat)

print('acc =', acc)

In [None]:
accs = []

for _ in range(1000):
    y_hat = [random_guess() for _ in y]
    acc = accuracy_score(y, y_hat)

    accs.append(acc)

print('mean acc =', mean(accs))

In [None]:
classifier = DummyClassifier(strategy='uniform')
dummy_data = [-1 for _ in y]
classifier.fit(dummy_data, y)

accs = []

for _ in range(1000):
    y_hat = classifier.predict(dummy_data)
    acc = accuracy_score(y, y_hat)

    accs.append(acc)

print('mean acc =', mean(accs))

## 2. Random-select strategy

$
\begin{align}
    P(\hat{y} = y) & = P(\hat{y} = 0) * P(y = 0) + P(\hat{y} = 1) * P(y = 1) \\
                   & = 0.2 * 0.2 + 0.8 * 0.8 \\
                   & = 0.04 + 0.64 \\
                   & = 0.68
\end{align}
$

In [None]:
def random_select():
    return y[randint(len(y))]

y_hat = [random_select() for _ in y]

print('y =', y)
print('y_hat =', y_hat)

acc = accuracy_score(y, y_hat)

print('acc =', acc)

In [None]:
accs = []

for _ in range(1000):
    y_hat = [random_select() for _ in y]
    acc = accuracy_score(y, y_hat)

    accs.append(acc)

print('mean acc =', mean(accs))

In [None]:
classifier = DummyClassifier(strategy='stratified')
dummy_data = [-1 for _ in y]
classifier.fit(dummy_data, y)

accs = []

for _ in range(1000):
    y_hat = classifier.predict(dummy_data)
    acc = accuracy_score(y, y_hat)

    accs.append(acc)

print('mean acc =', mean(accs))

## 3. Majority-select strategy

$
\begin{align}
    P(\hat{y} = y) & = P(\hat{y} = 0) * P(y = 0) + P(\hat{y} = 1) * P(y = 1) \\
                   & = 0 * 0.2 + 1 * 0.8 \\
                   & = 0 + 0.8 \\
                   & = 0.8
\end{align}
$

In [None]:
def majority_select(y):
    return mode(y)

y_hat = [majority_select(y) for _ in y]

print('y =', y)
print('y_hat =', y_hat)

acc = accuracy_score(y, y_hat)

print('acc =', acc)

In [None]:
classifier = DummyClassifier(strategy='most_frequent')
dummy_data = [-1 for _ in y]
classifier.fit(dummy_data, y)

y_hat = classifier.predict(dummy_data)
acc = accuracy_score(y, y_hat)

print('acc =', acc)