In [2]:
#as mentioned in the slides (slide09), the whole goal of a simulation is to compute the mean of the distro 
#we are gonna do the same here 

import numpy as np
import pandas as pd
from scipy.stats import uniform
import math
from datetime import datetime
import time 
import seaborn as sns
from scipy.stats import t
import matplotlib.pyplot as plt

#Input parameters
bins = [100, 200, 400, 600, 800, 1000, 2000, 4000, 6000, 8000, 10000, 20000, 40000, 60000, 80000, 100000, 200000, 400000, 600000, 800000, 1000000]
runs = 10
seed = 1997
#seed and input ->[] ->outputs        max_occurency 
#s1->[rvn1]->m1
#s2->[rvm2]->m2 so on... and s3 and m3 with the same shit 
#E[max_ouccur]= m1+m2+m3/3
#in here, since we have 3 different experiments, the degree of freedom is equal to 2 when we want to
#compute the I (confidence interval)

def RandomDropping(n, runs):
    results = np.zeros(runs, )
    for r in range(runs):
        bins = np.zeros(n,)
        for b in range(n):
            index = np.random.randint(low = 0, high = n, size = 1)
            bins[index] = bins[index] + 1
        results[r] = max(bins)

    avg = np.average(results)
    std = np.std(results, ddof=1)
    if std!=0:
        confInt = t.interval(0.95, runs-1, avg, std/math.sqrt(runs))

    else:
        confInt = (avg,avg)

    return avg, confInt

In [6]:
def RandomLoadBalancing(n, d, runs):
    results = np.zeros(runs, )
    for r in range(runs):
        bins = np.zeros(n,)
        for b in range(n):
            indexes = np.random.randint(low = 0, high = n, size = d)
            minB = bins[indexes[0]]
            minIdx = indexes[0]
            for i in range(d):
                if minB > bins[indexes[i]]:
                minIdx = indexes[i]
                minB = bins[indexes[i]]
            bins[minIdx] = bins[minIdx] + 1
        results[r] = max(bins)
  
  
    avg = np.average(results)
    std = np.std(results, ddof=1)
    if std!=0:
        confInt = t.interval(0.95, runs-1, avg, std/math.sqrt(runs))
    else:
        confInt = (avg,avg)
 
    return avg, confInt

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 24)

In [None]:
#Simulations for Random Dropping
avgs = []
confInts = []
np.random.seed(seed) 
for b in bins:
    avg, confInt= RandomDropping(b, runs)
    avgs.append(avg)
    confInts.append(confInt)
#Output parameters
avgs = np.array(avgs)
confInts = np.array(confInts)

#Simulations for Random Load Balancing with d = 2
avgs2 = []
confInts2 = []
np.random.seed(seed)
for b in bins:
  avg, confInt = RandomLoadBalancing(b, 2, runs)
  avgs2.append(avg)
  confInts2.append(confInt)
#Output parameters
avgs2 = np.array(avgs2)
confInts2 = np.array(confInts2)

#Simulations for Random Load Balancing with d = 4
avgs4 = []
confInts4 = []
np.random.seed(seed)
for b in bins:
  avg, confInt = RandomLoadBalancing(b, 4, runs)
  avgs4.append(avg)
  confInts4.append(confInt)
#Output parameters
avgs4 = np.array(avgs4)
confInts4 = np.array(confInts4)

#Plot Fig.1
fig, ax = plt.subplots(figsize=(20,10))

ax.plot(np.log10(bins), avgs, color = 'b', marker = 'o', label = 'Random Dropping')
ax.plot(np.log10(bins), avgs2, color = 'orange', marker = 'x', label = 'Random Load Balancing d = 2')
ax.plot(np.log10(bins), avgs4, color = 'g', marker = '^', label = 'Random Load Balancing d = 4')

ax.fill_between(np.log10(bins), confInts[:,0], confInts[:,1],alpha = 0.2, label = 'Random Dropping 95% Confidence Interval')
ax.fill_between(np.log10(bins), confInts2[:,0], confInts2[:,1],alpha = 0.2, label = 'Random Load Balancing d = 2 95% Confidence Interval')
ax.fill_between(np.log10(bins), confInts4[:,0], confInts4[:,1],alpha = 0.2, label = 'Random Load Balancing d = 4 95% Confidence Interval')
ax.legend()
plt.xlabel('log10(Bins)')
plt.title('Simulation results for randomized dropping policies')
plt.ylabel('Max bin occupancy')

def tRandomDropping(x):
  return 3*(np.log(x))/(np.log(np.log(x)))

#Plot Fig.2
fig, ax = plt.subplots(figsize=(20,10))
ax.plot(np.log10(bins), avgs, color = 'b', marker = 'o', label = 'Random Dropping')
ax.fill_between(np.log10(bins), confInts[:,0], confInts[:,1],alpha = 0.2, label = 'Random Dropping 95% Confidence Interval')
ax.plot(np.log10(bins), tRandomDropping(bins), color = 'r', label = 'Theoretical upper bound')
plt.xlabel('log10(Bins)')
plt.title('Simulation results vs theoretical results')
plt.ylabel('Max bin occupancy')
plt.legend()

#Random dropping witha Triangular distribution
def RandomDroppingWithT(n, runs):

  results = np.zeros(runs, )
  for r in range(runs):
    bins = np.zeros(n,)
    for b in range(n):
      index = int(np.random.triangular(0, int(n/2), n-1, size = 1))
      bins[index] = bins[index] + 1
    results[r] = max(bins)

  avg = np.average(results)
  std = np.std(results, ddof=1)
  if std!=0:
    confInt = t.interval(0.95, runs-1, avg, std/math.sqrt(runs))

  else:
    confInt = (avg,avg)

  return avg, confInt

#Simulations for Random dropping with Triangular distribution
avgsTr = []
confIntsTr = []
np.random.seed(seed)
for b in bins:
  avg, confInt = RandomDroppingWithT(b, runs)
  avgsTr.append(avg)
  confIntsTr.append(confInt)
#Output parameters
avgsTr = np.array(avgsTr)
confIntsTr = np.array(confIntsTr)

#Plot Fig.3
fig, ax = plt.subplots(figsize=(20,10))
ax.plot(np.log10(bins), avgsTr, color = 'b', marker = 'o', label = 'Random Dropping with a Triangular distribution')
ax.fill_between(np.log10(bins), confIntsTr[:,0], confIntsTr[:,1],alpha = 0.2, label = 'Random Dropping with a Triangular distribution 95% Confidence Interval')
ax.plot(np.log10(bins), avgs, color = 'r', marker = 'x', label = 'Random Dropping')
ax.fill_between(np.log10(bins), confInts[:,0], confInts[:,1],alpha = 0.2, label = 'Random Dropping 95% Confidence Interval', color = 'r')
ax.plot(np.log10(bins), tRandomDropping(bins), color = 'g', label = 'Theoretical upper bound')
ax.legend()
plt.xlabel('log10(Bins)')
plt.title('Triangular distribution')
plt.ylabel('Max bin occupancy')
