In [1]:
import torch as th
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from openTSNE import TSNE
from openTSNE.callbacks import ErrorApproximations

csvs_dir = '../csvs/'
file_path='D:\\steini\\chess\\lczero-training\\tsne\\outputs\\'

## Get all files from csvs folder

In [2]:
import os
import glob

# Get a list of files (file paths) in the given directory 
files_list = filter( os.path.isfile,
                        glob.glob(csvs_dir + '*.csv') )
# Sort list of files in directory by size 
files_list = sorted( files_list,
                        key =  lambda x: os.stat(x).st_size)
        
[print(f) for f in files_list]

../csvs\a0sf_value-dense2.csv
../csvs\a0sf_activation_2.csv
../csvs\a0sf_value-dense1.csv
../csvs\a0sf_moves_left-dense1.csv
../csvs\a0sf_activation_33.csv
../csvs\a0sf_flatten_1.csv
../csvs\a0sf_activation_32.csv
../csvs\a0sf_flatten.csv
../csvs\a0sf_apply_policy_map_2.csv
../csvs\lc0_inputs.csv
../csvs\a0sf_activation.csv
../csvs\a0sf_activation_1.csv
../csvs\a0sf_activation_3.csv
../csvs\a0sf_activation_31.csv
../csvs\a0sf_policy.csv


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

## Prepare function for tsne hyperparameter search and saving csv/png

In [3]:
def run_tsne(data, path, perplexity=30, lr='auto', n_iter=500):
    # tsne object
    reducer = TSNE(
    perplexity=perplexity,
    learning_rate=lr,
    n_iter=n_iter,
    n_jobs=6,
    metric='euclidean',
    random_state=42
    )
    
    # prepare paths for saving csv and png
    path = path.replace('.csv', '')
    csv_path = path + '_perpl' + str(perplexity) + '_lr' + str(reducer.learning_rate) + '_n_iter' + str(n_iter) + '.csv'
    png_path = csv_path.replace('.csv', '.png')
    
    # prepare (pca)
    reducer.prepare_initial(X=data, initialization='pca')
    
    # tsne fit transform
    tsne_embedding = reducer.fit(data, initialization='pca')
    
    tsne_df = pd.DataFrame(tsne_embedding)
    tsne_df.to_csv(csv_path)
    
    plt.figure(figsize=(16,12))
    plt.scatter(tsne_embedding[:,0], tsne_embedding[:,1])
    print('storing plot to', png_path)
    plt.savefig(png_path)
    plt.close()

## Iterate all files, create df, prepare umap, run hparam search, store csvs and pngs

In [None]:
for f in files_list:
    print('loading',f)
    df = pd.read_csv(f)
    # create tensor w/o idx col
    loaded_x = th.tensor(df.values[:,1:])
    f = f[7:]
    
    
    for perplexity in [30, np.sqrt(len(loaded_x))]:
        path = file_path+f
        path = path.replace('.csv', '')
        try:
            print('running tsne on', path)
            run_tsne(data=loaded_x, path=path, perplexity=perplexity, n_iter=1000)
        except Exception as e:
            print(f, 'failed:', e)

loading ../csvs\a0sf_value-dense2.csv
running tsne on D:\steini\chess\lczero-training\tsne\outputs\\a0sf_value-dense2
storing plot to D:\steini\chess\lczero-training\tsne\outputs\\a0sf_value-dense2_perpl30_lrauto_n_iter1000.png
running tsne on D:\steini\chess\lczero-training\tsne\outputs\\a0sf_value-dense2
storing plot to D:\steini\chess\lczero-training\tsne\outputs\\a0sf_value-dense2_perpl169.04141504376966_lrauto_n_iter1000.png
loading ../csvs\a0sf_activation_2.csv
running tsne on D:\steini\chess\lczero-training\tsne\outputs\\a0sf_activation_2
storing plot to D:\steini\chess\lczero-training\tsne\outputs\\a0sf_activation_2_perpl30_lrauto_n_iter1000.png
running tsne on D:\steini\chess\lczero-training\tsne\outputs\\a0sf_activation_2
storing plot to D:\steini\chess\lczero-training\tsne\outputs\\a0sf_activation_2_perpl169.04141504376966_lrauto_n_iter1000.png
loading ../csvs\a0sf_value-dense1.csv
running tsne on D:\steini\chess\lczero-training\tsne\outputs\\a0sf_value-dense1
storing plot t