# Imports

In [None]:
import random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import gridspec

# Function definitions

In [None]:
def identical(n):
    return n

def square(n):
    return n ** 2

def nln(n):
    return n * np.log(n)

def lnDouble(n):
    return np.log(np.log(n))

def lnComplex(n):
    return np.log(n) / lnDouble(n)

def nlnDouble(n):
    return n * lnDouble(n)



def plotStatistic(values: pd.Series, N: pd.Series, functions: list, function_names: list, statistic_name="Statistic"):
    assert values.shape == N.shape, "Mismatched shapes!"
    assert all([callable(f) for f in functions]), "All elements in functions must be callable!"

    N_unique = N.unique()

    nf = len(functions)
    fig = plt.figure(figsize=(15, 10))
    plt.subplots_adjust(hspace=0.2, wspace=0.15)

    axs = [plt.subplot2grid((2, nf), (0, 0), colspan=nf)]
    axs[0].set_title(f"{statistic_name} values distribution")
    axs[0].set_xlabel("n - number of urns")
    axs[0].set_ylabel(f"{statistic_name} value")
    axs[0].grid()
    axs[0].scatter(N, values, color='blue', s=5)
    for n_urns in N_unique:
        n = N[N == n_urns]
        mean = np.mean(values[n.index])
        axs[0].scatter(n_urns, mean, color="red", s=20)


    mean_vals = pd.Series([np.mean(values[N[N == n_urns].index]) for n_urns in N_unique])
    for i in range(nf):
        ax_i = i + 1
        axs.append(plt.subplot2grid((2, nf), (1, i), colspan=1))
        axs[ax_i].set_title(f"mean({statistic_name}) / {function_names[i]}")
        axs[ax_i].set_xlabel("n - number of urns")

        plot_vals = mean_vals / functions[i](N_unique)
        axs[ax_i].grid()
        axs[ax_i].scatter(N_unique, plot_vals, color="blue", s=10)
        plot_min = min(plot_vals)
        plot_max = max(plot_vals)
        axs[ax_i].set_ylim(plot_min + (plot_min - plot_max), plot_max + (plot_max - plot_min))

    plt.show();

# Reading data and plotting experiment results

In [None]:
results = pd.read_csv("hw2_results.csv")
results

In [None]:
plotStatistic(values=results["Bn"], N=results["n_urns"], 
              functions = [identical, np.sqrt], function_names=["n", "sqrt(n)"],
              statistic_name="Bn")

In [None]:
plotStatistic(values=results["Un"], N=results["n_urns"], 
              functions=[identical], function_names=["n"],
              statistic_name="Un")

In [None]:
plotStatistic(values=results["Ln"], N=results["n_urns"], 
              functions=[np.log, lnComplex, lnDouble], function_names=["ln(n)", "(ln(n) / ln(ln(n)))", "ln(ln(n))"],
              statistic_name="Ln")

In [None]:
plotStatistic(values=results["Cn"], N=results["n_urns"], 
              functions=[identical, nln, square], function_names=["n", "(n * ln(n))", "n^2"],
              statistic_name="Cn")

In [None]:
plotStatistic(values=results["Dn"], N=results["n_urns"], 
              functions=[identical, nln, square], function_names=["n", "(n * ln(n))", "n^2"],
              statistic_name="Dn")

In [None]:
plotStatistic(values=results["Dn-Cn"], N=results["n_urns"], 
              functions=[identical, nln, nlnDouble], function_names=["n", "(n * ln(n))", "(n * ln(ln(n)))"],
              statistic_name="Dn-Cn")