In [4]:
from user import User
from experiment import *
from hasher_implems import *



def init_experiment(hasher: Hasher):
    exp = Experiment()
    exp.hasher = hasher
    return exp

def check_repartition(experiment: Experiment, population: int):
    var1 = 0
    var2 = 0

    for i in range(1, population):
        user = User(i)
        var = experiment.assign(user)
        if var.name == "variation1":
            var1 += 1
        else:
            var2 += 1
    print(f"Users in variation 1: {var1} or {var1 / population * 100}%")
    print(f"Users in variation 2: {var2} or {var2 / population * 100}%")


In [7]:
expA = init_experiment(Md5Hasher())
check_repartition(expA, 10000)

Users in variation 1: 5051 or 50.51%
Users in variation 2: 4948 or 49.480000000000004%


In [6]:
expB = init_experiment(Sha256Hasher())
check_repartition(expB, 10000)

Users in variation 1: 4910 or 49.1%
Users in variation 2: 5089 or 50.89%


In [8]:
print("MD5 with bigger population:")
check_repartition(expA, 10000000)

print("SHA256 with bigger population:")
check_repartition(expB, 10000000)

MD5 with bigger population:
Users in variation 1: 4999401 or 49.99401%
Users in variation 2: 5000598 or 50.00598000000001%
SHA256 with bigger population:
Users in variation 1: 4999452 or 49.994519999999994%
Users in variation 2: 5000547 or 50.005469999999995%


In [9]:
def check_correlation(experimentA: Experiment, experimentB: Experiment, population: int):
    usersA1 = []

    varB1 = 0
    varB2 = 0

    for i in range(1, population):
        user = User(i)
        var = experimentA.assign(user)
        if var.name == "variation1":
            usersA1.append(user)

    for user in usersA1:
        var = experimentB.assign(user)
        if var.name == "variation1":
            varB1 += 1
        else:
            varB2 += 1

    print(f"Users in variation B1: {varB1} or {varB1 / len(usersA1) * 100}% of people in var A1, and {varB1 / population * 100}% ot total population")
    print(f"Users in variation B2: {varB2} or {varB2 / len(usersA1) * 100}% of people in var A1, and {varB2 / population * 100}% ot total population")


In [10]:
expA_md5 = init_experiment(Md5Hasher())
expB_md5 = init_experiment(Md5Hasher())
check_correlation(expA_md5, expB_md5, 1000000)

Users in variation B1: 249927 or 49.99269893404437% of people in var A1, and 24.9927% ot total population
Users in variation B2: 250000 or 50.00730106595563% of people in var A1, and 25.0% ot total population


In [12]:
expA_sha256 = init_experiment(Sha256Hasher())
expB_sha256 = init_experiment(Sha256Hasher())
check_correlation(expA_sha256, expB_sha256, 1000000)

Users in variation B1: 250083 or 50.08220769207662% of people in var A1, and 25.0083% ot total population
Users in variation B2: 249262 or 49.91779230792338% of people in var A1, and 24.9262% ot total population
