In [38]:
import numpy as np
import pandas as pd
import string
from sklearn.utils import check_random_state

In [2]:
# Code from Paulo Rauber's example -- https://github.com/EduardoVernier/thesne/blob/master/examples/gaussians.py
def create_blobs(classes=10, dims=100, class_size=100, variance=0.1, steps=4,
                 advection_ratio=0.5, random_state=None):
    random_state = check_random_state(random_state)
    X = []

    indices = random_state.permutation(dims)[0:classes]
    means = []
    for c in range(classes):
        mean = np.zeros(dims)
        mean[indices[c]] = 1.0
        means.append(mean)

        X.append(random_state.multivariate_normal(mean, np.eye(dims)*variance,
                                                  class_size))
    X = np.concatenate(X)
    y = np.concatenate([[i]*class_size for i in range(classes)])

    Xs = [np.array(X)]
    for step in range(steps - 1):
        Xnext = np.array(Xs[step])
        for c in range(classes):
            stard, end = class_size*c, class_size*(c + 1)
            Xnext[stard: end] += advection_ratio*(means[c] - Xnext[stard: end])

        Xs.append(Xnext)

    return Xs, y

In [42]:
# Same parameters Rauber used
seed = 0
n_steps = 10
n_dims = 100
class_size = 200
n_classes = 10
Xs, y = create_blobs(classes=n_classes, class_size=class_size, dims=n_dims, advection_ratio=0.1, steps=n_steps,
                         random_state=seed)

In [34]:
!mkdir datasets/gaussians

In [59]:
# Each observation is a class (letter) + id -- Up to 26 classes
# Ex.: a0 -- first observation of the a class 
indices = [string.ascii_lowercase[c]+'-'+str(i%class_size) for i, c in enumerate(y)]
# Column names are f0, f1, ..., f<n_dims-1>
columns = ['f'+str(i) for i in range(n_dims)]

In [60]:
dataset_name = 'gaussians'

In [61]:
# Save each revision in a separate csv
for t in range(n_steps):
    df = pd.DataFrame(Xs[t], index=indices, columns=columns)
    df.to_csv('datasets/gaussians/' + dataset_name + '-' + str(t) + '.csv')

In [62]:
df

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99
a0,0.052116,-0.003427,0.180828,0.079239,-0.044636,-0.083159,-0.043324,-0.090751,-0.082699,-0.016268,...,0.146688,0.230736,0.087502,0.279861,0.191623,0.074868,-0.107489,-0.198604,-0.071263,-0.065892
a1,-0.190633,-0.006673,-0.221905,-0.077327,-0.113712,0.182633,0.023951,-0.057778,0.222037,-0.281062,...,-0.115206,0.113887,-0.130206,-0.108596,0.235390,-0.056329,-0.133421,0.120567,-0.142018,-0.053482
a2,0.123646,0.087400,-0.089197,0.102852,0.151796,-0.218662,-0.097543,-0.171585,-0.022585,-0.170439,...,-0.034491,0.068653,0.092523,-0.129973,0.006674,-0.012776,-0.043077,0.072258,-0.181193,-0.174331
a3,-0.101252,0.096076,-0.023941,-0.154930,0.104986,-0.060521,0.001793,-0.196813,0.019846,0.065549,...,0.027001,-0.058205,0.029238,0.049697,0.036712,0.084677,-0.233490,0.034554,-0.280266,-0.067563
a4,0.147949,-0.141110,0.019012,-0.077429,-0.222578,-0.023034,0.010912,-0.196682,-0.086680,-0.263293,...,-0.037162,-0.166581,-0.261935,0.049120,0.086597,0.097105,-0.028597,0.105330,0.044874,0.088996
a5,0.002198,-0.280001,-0.181178,-0.165785,-0.003546,-0.210408,-0.141247,0.115289,-0.126008,0.008029,...,0.122431,0.004921,0.062295,0.028831,-0.104688,0.068212,-0.140254,-0.006427,0.067952,-0.021920
a6,-0.024933,0.048600,0.128036,-0.204894,0.069645,-0.045320,0.116341,-0.002364,0.017139,-0.164992,...,0.043615,0.070429,0.004266,0.000054,0.019750,-0.171279,-0.124602,-0.133462,0.091978,0.019862
a7,0.076436,0.104193,0.096811,0.013565,0.132058,-0.002698,-0.057097,0.066637,-0.135339,-0.088665,...,-0.005811,-0.091404,-0.169478,-0.035209,-0.229485,-0.123279,-0.025993,-0.149112,0.019539,-0.210072
a8,0.026772,0.161076,0.034573,-0.102921,-0.120059,0.033348,0.140822,-0.172539,0.011799,-0.005774,...,0.044368,-0.050659,0.188718,0.234534,0.058891,0.001508,0.148761,-0.070349,-0.260089,0.137797
a9,0.013729,0.063602,-0.025844,-0.119433,0.085662,0.124497,-0.024215,0.041898,-0.030312,0.195048,...,-0.185923,0.010701,0.115676,-0.129353,0.015641,-0.362841,0.258539,0.071140,-0.090046,-0.035022
