In [1]:
import numpy as np
import pandas as pd
import string
from sklearn.utils import check_random_state

In [2]:
# Code from Paulo Rauber's example -- https://github.com/EduardoVernier/thesne/blob/master/examples/gaussians.py
def create_blobs(classes=10, dims=100, class_size=100, variance=0.1, steps=4,
                 advection_ratio=0.5, random_state=None):
    random_state = check_random_state(random_state)
    X = []

    indices = random_state.permutation(dims)[0:classes]
    means = []
    for c in range(classes):
        mean = np.zeros(dims)
        mean[indices[c]] = 1.0
        means.append(mean)

        X.append(random_state.multivariate_normal(mean, np.eye(dims)*variance,
                                                  class_size))
    X = np.concatenate(X)
    y = np.concatenate([[i]*class_size for i in range(classes)])

    Xs = [np.array(X)]
    for step in range(steps - 1):
        Xnext = np.array(Xs[step])
        for c in range(classes):
            stard, end = class_size*c, class_size*(c + 1)
            Xnext[stard: end] += advection_ratio*(means[c] - Xnext[stard: end])

        Xs.append(Xnext)

    return Xs, y

In [3]:
# Same parameters Rauber used
seed = 0
n_steps = 10
n_dims = 20
class_size = 20
n_classes = 10
Xs, y = create_blobs(classes=n_classes, class_size=class_size, dims=n_dims, advection_ratio=0.1, steps=n_steps,
                         random_state=seed)

In [4]:
!mkdir datasets/minigaussians

In [5]:
# Each observation is a class (letter) + id -- Up to 26 classes
# Ex.: a0 -- first observation of the a class 
indices = [string.ascii_lowercase[c]+str(i%class_size) for i, c in enumerate(y)]
# Column names are f0, f1, ..., f<n_dims-1>
columns = ['f'+str(i) for i in range(n_dims)]

In [6]:
dataset_name = 'minigaussians'

In [7]:
# Save each revision in a separate csv
for t in range(n_steps):
    df = pd.DataFrame(Xs[t], index=indices, columns=columns)
    df.to_csv('datasets/minigaussians/' + dataset_name + '-' + str(t) + '.csv')

In [8]:
df

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19
a0,0.017647,0.178168,0.093237,0.014907,0.054379,0.040879,0.183044,-0.025135,0.038355,-0.104638,-0.312775,0.080077,0.105905,-0.090925,0.278075,-0.178179,0.005606,-0.022932,1.187786,0.180016
a1,0.018983,0.046330,-0.108765,-0.242674,-0.042624,0.019155,0.150727,0.147307,-0.047453,-0.037036,-0.128461,-0.173971,-0.209040,0.238996,-0.062439,-0.053670,-0.153484,0.095253,0.802276,-0.026063
a2,-0.109706,0.047401,-0.062580,-0.144643,-0.003453,0.052476,0.008149,0.037057,-0.077713,-0.044441,-0.082385,-0.044050,-0.099621,-0.211492,0.021737,-0.049223,-0.199721,0.056697,0.888844,0.006364
a3,0.089323,0.015802,0.139592,-0.151282,0.049292,-0.083898,-0.106684,-0.070917,-0.038169,0.006881,-0.142746,0.110363,0.057050,-0.188210,0.182330,0.232271,0.144416,-0.022043,0.868819,0.129184
a4,-0.049394,0.149766,0.025516,0.119651,0.043660,0.086564,0.001286,0.218793,0.015548,0.049249,0.230711,-0.165118,-0.155651,0.118764,-0.143723,0.238119,-0.050674,-0.091573,1.235586,0.181382
a5,0.228800,0.111002,-0.105511,0.234008,-0.032834,0.098311,0.116051,-0.018991,0.075233,0.112982,0.046117,-0.134691,0.036538,0.162500,-0.085094,-0.018332,-0.053312,0.226559,1.082365,0.049919
a6,-0.094325,0.066065,-0.082615,0.003900,-0.077899,0.082872,0.070640,-0.025519,0.048516,-0.133914,-0.182699,0.053831,0.020420,0.077800,0.291966,0.115711,-0.111833,0.136849,0.838784,-0.056550
a7,-0.008360,0.209907,-0.091242,-0.101250,-0.012062,-0.081285,0.138028,-0.132306,-0.140580,-0.053639,-0.061016,0.236393,0.116317,0.010726,-0.150132,0.103446,-0.122539,-0.189255,1.145549,0.038830
a8,0.112817,0.039048,0.104973,-0.079759,-0.126708,0.083504,-0.098428,-0.084479,-0.055809,0.002141,-0.043369,-0.168450,-0.078852,-0.272396,0.076599,-0.196273,-0.135301,0.006391,0.909394,0.189040
a9,-0.158392,0.032717,-0.004813,-0.143107,0.064108,-0.021017,0.094554,0.100890,0.265025,0.163742,-0.045230,-0.029327,0.134723,0.080278,0.078425,-0.198098,-0.002980,-0.090418,1.034294,-0.012025
