In [1]:
import numpy as np
import pandas as pd
import altair as alt
import concurrent.futures
from functools import wraps

# Hoeffding Inequality

## 1

<!--  -->

#### Todo
- create coin class (needs to be a fair coin)
    - ~~`method` flip (head/tail are binary use a bool)~~
    - ~~`method` frequency of heads (the fraction of heads obtained)~~
    - ~~`method` frequency of tails (the fraction of tails obtained)~~
- coin flipper class??
    - `method` run simulation
    - `method` pick min coin ()
    - ~~`method` to pick random coin(pick a random number from 1-1000) save that coin as the randomly chosen coin when flipping/running the experiment~~
    - ~~`property` c1 (c1 is the first coin flipped)~~
    - ~~`property` c_random~~
    - `property` c_min (c_min is the coin which had the minimum frequency of heads, pick the earlier one in case of a tie)
    - maybe combine he coins into an array of saved coin where index position maps to saved coin type/reason
    - `property` probability percent???
- run whole experiment function???
    - returns dictionary/array of "full distentions??(distribution)" of v1, v_random, v_min


In [2]:
from enum import Enum
from typing import Literal
from random import randrange

class CoinSides(Enum):
    HEAD = 0
    Tail = 1


class FairCoin:
    def __init__(self) -> None:
        self._numHeads = 0
        self._numTails = 0
        self._head_vs_tails_probability = 0.5

    def _processFlip(self, coinSide: Literal[0, 1]) -> None:
        if coinSide == CoinSides.HEAD.value:
            self._numHeads += 1
        elif coinSide == CoinSides.Tail.value:
            self._numTails += 1
    
    def flip(self) -> Literal[0, 1]:
        numOfFlips = 1
        # Binomial Distribution should return a scalar value of 0 | 1
        results: Literal[0, 1] = np.random.binomial(numOfFlips, self._head_vs_tails_probability)
        self._processFlip(results)
        return results
    
    def headFrequency(self) -> float:
        totalCoinFlips = self._numHeads + self._numTails
        return self._numHeads / totalCoinFlips

    def tailFrequency(self) -> float:
        totalCoinFlips = self._numHeads + self._numTails
        return self._numTails / totalCoinFlips

class CoinFlipper:
    def __init__(self) -> None:
        self._coins: dict[Literal["firstCoin", "randomCoin", "minCoin"], FairCoin | None] = self._defaultCoins()

    def getCoins(self):
        return self._coins
    
    def _defaultCoins(self) ->  dict[Literal["firstCoin", "randomCoin", "minCoin"], FairCoin | None]:
        return {
            "firstCoin": None,
            "randomCoin": None,
            "minCoin": None,
        }
    
    def _randomCoinToPick(self, numOfCoins: int) -> int:
        '''
        numOfCoins: int
                integer of coins to choose from. range is [0, numOfCoins)

        ### returns: 
        Literal[0, numOfCoins - 1]
                returns a random number that will map to the coin that should used be for self._coins["randomCoin"]

                Note return value of 0 means the first coin and  (numOfCoins - 1) means coin number equal to numOfCoins
        '''
        return randrange(0, numOfCoins)
    
    def flipCoins(self, sideFrequencyToTrack: CoinSides, numOfCoins: int, flipsPerCoin: int):
        randomCoinNum = self._randomCoinToPick(numOfCoins)
        for coinNum in range(numOfCoins):
            coin = FairCoin()
            if coinNum == 0:
              self._coins["firstCoin"] = coin
            if self._coins["randomCoin"] is None and coinNum == randomCoinNum:
                self._coins["randomCoin"] = coin
            
            for flipNum in range(flipsPerCoin):
                coin.flip()
            if self._coins["minCoin"] is None or coin.headFrequency() < self._coins["minCoin"].headFrequency():
                self._coins["minCoin"] = coin


In [3]:
def _coinFlipHelper(numOfCoins: int, flipsPerCoin: int, printI: int) -> CoinFlipper:
    print(f"iteration = {printI}")
    print(f"in helper")
    coinFlipper = CoinFlipper()
    coinFlipper.flipCoins(CoinSides.HEAD, numOfCoins, flipsPerCoin)
    return coinFlipper

In [4]:
def multiprocessorHoeffdingExperiment(iterations: int, numOfCoins: int, flipsPerCoin: int):
        coinHeadsFrequencies: dict[Literal["firstV", "randomV", "minV"], list[float]] = {
        "firstV": [],
        "randomV": [],
        "minV": [],
        }

        with concurrent.futures.ProcessPoolExecutor() as executor:
            print(f"in multi ProcessPoolExecutor")
            results = [executor.submit(_coinFlipHelper, numOfCoins, flipsPerCoin, i) for i in range(iterations)]

            for f in concurrent.futures.as_completed(results):
                coinHeadsFrequencies["firstV"].append(f.result().getCoins()["firstCoin"].headFrequency())
                coinHeadsFrequencies["randomV"].append(f.result().getCoins()["randomCoin"].headFrequency())
                coinHeadsFrequencies["minV"].append(f.result().getCoins()["minCoin"].headFrequency())
        return coinHeadsFrequencies

In [5]:
# def runHoeffdingInequalityExperiment(iterations: int, numOfCoins: int, flipsPerCoin: int):
#     coinHeadsFrequencies: dict[Literal["firstV", "randomV", "minV"], list[float]] = {
#         "firstV": [],
#         "randomV": [],
#         "minV": [],
#     }

#     for i in range(iterations):
#         print(f"iteration = {i}")
#         coinFlipper = CoinFlipper()
#         coinFlipper.flipCoins(CoinSides.HEAD, numOfCoins, flipsPerCoin)
#         coinHeadsFrequencies["firstV"].append(coinFlipper.getCoins()["firstCoin"].headFrequency())
#         coinHeadsFrequencies["randomV"].append(coinFlipper.getCoins()["randomCoin"].headFrequency())
#         coinHeadsFrequencies["minV"].append(coinFlipper.getCoins()["minCoin"].headFrequency())
#     return coinHeadsFrequencies


#### 1: my HW Answer = b (I was correct)

In [6]:
# vFullDistributions = runHoeffdingInequalityExperiment(100000, 1000, 10)
# vFullDistributions = multiprocessorHoeffdingExperiment(20000, 1000, 10)

# v1 = np.array(vFullDistributions["firstV"])
# vRand = np.array(vFullDistributions["randomV"])
# vMin = np.array(vFullDistributions["minV"])

# v1Avg = np.mean(v1)
# v1Avg = 0.498844
# vRandAvg = np.mean(vRand)
# vRandAvg = 0.50110599
# vMinAvg = np.mean(vMin)
# vMinAvg = 0.03776900 
# Closets to b) 0.01


Error: A process in the process pool was terminated abruptly while the future was running or pending.

When using the multiprocessing python library it was not working when ran through the notebook. In order to get this code working with the multiprocessing library I needed to run in as a script and using `if __name__ == "__main__":`

when running this code in the notebook without multiprocessing it too about 40 minutes, with multiprocessing it only took a few minutes

#### 2: my HW Answer = d (I was correct)

*Mu was set to 50% thus is a fair coin. After running the experiment over many iterations our distribution shows that v(the fractions of heads) approximated to probability of getting heads overall*

n=1000
k= ??? 0.5
v = k/n ??? = 0.5/1000 = 0.0005
mu = 0.5

|v - mu| = 0.0005 - 0.5 = 0.4995

2e^(-2*1000*(EPSILON)^2)

In [7]:
# df0 = pd.read_csv("./HW2Data/v1.csv", header=None, names=["v1"])
# df1 = pd.read_csv("./HW2Data/vRand.csv", header=None, names=["vRand"])
# df2 = pd.read_csv("./HW2Data/vMin.csv", header=None, names=["vMin"])

# finalDF = pd.concat([df0, df1, df2], axis=1, names=["v1", "vRand", "vMin"])
# finalDF.to_csv("./HW2Data/HW2_Vs.csv", index=False)

In [8]:
source = pd.read_csv("./HW2Data/HW2_Vs.csv")

In [29]:
alt.data_transformers.enable("json")

# base = alt.Chart(source)

# bar = base.mark_rect().encode(
#     x=alt.X("v", bin=True),
#     y="count()"
# )

# rule = base.mark_rule(color="red").encode(
#     x="mean(v):Q",
#     size=alt.value(4)
# )

# bar + rule

base = alt.Chart(source)

meanLine = base.mark_rule(color="red").encode(
    x=alt.X(alt.repeat("column"), aggregate="mean", type='quantitative'),
    size=alt.value(4)
)

chart = base.mark_rect().encode(
    x=alt.X(alt.repeat("column"), bin=True),
    y="count()",
)

(chart + meanLine).repeat(
    column = list(source.columns)
)