In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

In [26]:
import numpy as np
from sklearn.linear_model import LinearRegression
from scipy.optimize import curve_fit

In [3]:
df = pd.read_csv("MNIST_analysis.csv") # testing.csv

In [4]:
df.head()

Unnamed: 0,Points,Value,Analysis,Category,Slice,Algorithm,Reference
0,360,0.467772,KL-Divergence,BATCH,0.2,S+TSNE,"B:200, D:200, Iter:700"
1,560,0.781004,KL-Divergence,BATCH,0.2,S+TSNE,"B:200, D:200, Iter:700"
2,760,0.969692,KL-Divergence,BATCH,0.2,S+TSNE,"B:200, D:200, Iter:700"
3,960,1.119137,KL-Divergence,BATCH,0.2,S+TSNE,"B:200, D:200, Iter:700"
4,1160,1.262866,KL-Divergence,BATCH,0.2,S+TSNE,"B:200, D:200, Iter:700"


In [5]:
df_kl = df[df["Analysis"] == "KL-Divergence"].reset_index(drop=True)

In [6]:
new_dfs = {}
groups = df_kl['Reference'].unique()
for group in groups:
    name = "Reference_" + str(group)
    new_dfs[name] = df_kl[df_kl['Reference'] == group]

In [None]:
def model_func(x, a, k, b):
    return a * np.exp(-k*x) + b

def model_func2(x, a, b):
    return a * np.log10(x+1) + b

for slice_data in df["Slice"].unique():
  if np.isnan(slice_data):
    continue

  rochanges = []
  names = []
  max_size = 0
  models = []
  for name in new_dfs.keys():
    current = new_dfs[name]
    kl_series = current.loc[current["Slice"] == slice_data, "Value"]
    kl_series_mean = current.loc[current["Slice"] == slice_data, :].groupby(['Points']).mean(numeric_only=True)["Value"]
    rate_of_change  = [kn_1 - kn for kn, kn_1 in zip(kl_series_mean, kl_series_mean[1:])]
    if False or np.any(list(map(lambda x: abs(x) > 1, rate_of_change))):
      continue
    max_size = max(max_size, len(rate_of_change))
    rochanges.append(np.array(rate_of_change))
    names.append(name)

    # Log
    # opt, pcov = curve_fit(model_func2, np.arange(len(rochanges[-1])), rochanges[-1])
    # # a, k, b = opt
    # a, b = opt
    # models.append((a, b))

    # Reg
    regr = LinearRegression()
    regr.fit(np.arange(len(rochanges[-1])).reshape(-1,1), rochanges[-1])
    models.append(regr)

  redpoints = []
  for i in range(len(rochanges)):
    redpoints.append(len(rochanges[i]))
    rochanges[i] = np.pad(rochanges[i], (0, max_size-len(rochanges[i])), 'constant', constant_values=0)

  print(redpoints)
  print("MAX Actions: ", max_size)

  df1 = pd.DataFrame(np.stack(rochanges), columns=[i for i in range(max_size)])
  df1["Reference_Name"] = names
  df1_melted = pd.melt(df1, id_vars=["Reference_Name"], value_vars=df1.columns[:-1], var_name='Action', value_name="KLD Rate of Change")

  g = sns.FacetGrid(df1_melted, col="Reference_Name", col_wrap=5, sharey=False,)
  g.map_dataframe(sns.lineplot, x="Action", y="KLD Rate of Change", marker="o", dashes=False,)
  g.set_titles('{col_name}')
  g.fig.subplots_adjust(top=0.9)
  g.fig.suptitle('Slice ' + str(slice_data) + " - Linear Fit")
  g.tight_layout()

  axes = g.axes.flatten()
  # iterate through the axes
  for i, ax in enumerate(axes):
    if redpoints[i] != max_size:
      ax.axvline(redpoints[i], ls='--', c='red')

    # Log
    # x2 = np.arange(redpoints[i])
    # a, b = models[i]
    # y2 = model_func2(x2, a, b)
    # ax.plot(x2, y2, "k.--")

    # Reg
    ax.plot(np.arange(redpoints[i]), models[i].predict(np.arange(redpoints[i]).reshape(-1,1)), "k.--")

  g.savefig('RateChangeSlice_LinearFit' + str(slice_data) + ".png", format="png")
  plt.close()