In [None]:
from os.path import join
import numpy as np
import pandas as pd
from sklearn.neighbors import KernelDensity
from sklearn.preprocessing import MinMaxScaler
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
def set_size(width, fraction=1):
    """ Set aesthetic figure dimensions to avoid scaling in latex.

    Parameters
    ----------
    width: float
            Width in pts
    fraction: float
            Fraction of the width which you wish the figure to occupy

    Returns
    -------
    fig_dim: tuple
            Dimensions of figure in inches
            
    From: https://jwalton.info/Embed-Publication-Matplotlib-Latex/
    """
    # Width of figure
    fig_width_pt = width * fraction

    # Convert from pt to inches
    inches_per_pt = 1 / 72.27

    # Golden ratio to set aesthetic figure height
    golden_ratio = (5**.5 - 1) / 2

    # Figure width in inches
    fig_width_in = fig_width_pt * inches_per_pt
    # Figure height in inches
    fig_height_in = fig_width_in * golden_ratio

    fig_dim = (fig_width_in, fig_height_in)

    return fig_dim

In [None]:
data = pd.read_csv(join('..', 'data', 'synthetic', 'normal_train.csv'))
X = data[[c for c in data.columns if c != 'y']].to_numpy()
y = data['y'].to_numpy()

silverman_bandwidth = 1.06*np.std(y)*np.power(len(y), (-1/5))
best_bandwidth = silverman_bandwidth

kernel = KernelDensity(bandwidth=best_bandwidth).fit(y[:, None])

Y = np.sort(y)
# This returns the log-likelyhood of the samples
log_dens = kernel.score_samples(Y[:, None])
dens = np.exp(log_dens)

dens = MinMaxScaler().fit_transform(dens.reshape(-1, 1)).flatten()

In [None]:
nice_fonts = {
        # Use LaTeX to write all text
        "text.usetex": True,
        "font.family": "serif",
        # Use 10pt font in plots, to match 10pt font in document
        "axes.labelsize": 10,
        "font.size": 10,
        # Make the legend/label fonts a little smaller
        "legend.fontsize": 8,
        "xtick.labelsize": 8,
        "ytick.labelsize": 8,
}

mpl.rcParams.update(nice_fonts)

max_y = 4.8

text_width = 347.12354
fig, ax = plt.subplots(figsize=set_size(text_width, fraction=0.75))
ax.set_xlabel('$y$')
ax.set_xlim([0, max_y])
ax.fill_between(Y, dens, linestyle='-', label='$p\'(y)$', alpha=0.2, color='black')

def weight(a, dens):
    weight = -a*dens + 1
    w_star = np.maximum(1e-6, weight)
    w = w_star/np.mean(w_star)
    return w

linestyles = ['-', '--', '-.', ':']
for i, a in enumerate([0.0, 0.5, 1.0, 1.1]):
    w = weight(a, dens)
    ax.plot(Y, w, markersize=1, label='$f_w(%.1f,y)$' % a, color=str(a/2), linestyle=linestyles[i % len(linestyles)])

plt.legend()
plt.savefig(join('..', 'plots', 'denseloss.pdf'), format='pdf', bbox_inches='tight')