In [None]:
import pandas as pd
import torch
import numpy as np
import random
import os
import torch.nn as nn
from sklearn.model_selection import train_test_split

from utils import set_seed
import numpy as np
from grid_search import BaseSearcher

In [None]:
seed = 2
set_seed(seed)

os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
device = torch.device('cuda:2')

file_name = '../data/knot.csv'
full_df = pd.read_csv(file_name)

display_name_from_short_name = {
    'chern_simons': 'Chern-Simons',
    'cusp_volume': 'Cusp volume',
    'hyperbolic_adjoint_torsion_degree': 'Adjoint Torsion Degree',
    'hyperbolic_torsion_degree': 'Torsion Degree',
    'injectivity_radius': 'Injectivity radius',
    'longitudinal_translation': 'Longitudinal translation',
    'meridinal_translation_imag': 'Re(Meridional translation)',
    'meridinal_translation_real': 'Im(Meridional translation)',
    'short_geodesic_imag_part': 'Im(Short geodesic)',
    'short_geodesic_real_part': 'Re(Short geodesic)',
    'Symmetry_0': 'Symmetry: $0$',
    'Symmetry_D3': 'Symmetry: $D_3$',
    'Symmetry_D4': 'Symmetry: $D_4$',
    'Symmetry_D6': 'Symmetry: $D_6$',
    'Symmetry_D8': 'Symmetry: $D_8$',
    'Symmetry_Z/2 + Z/2': 'Symmetry: $\\frac{Z}{2} + \\frac{Z}{2}$',
    'volume': 'Volume',
}
column_names = list(display_name_from_short_name)
target = 'signature'

In [None]:
random_state = np.random.RandomState(seed)
train_df, validation_and_test_df = train_test_split(
    full_df, random_state=random_state)
validation_df, test_df = train_test_split(
    validation_and_test_df, test_size=.5, random_state=random_state)

# Find bounds for the signature in the training dataset.
max_signature = train_df[target].max()
min_signature = train_df[target].min()
classes = int((max_signature - min_signature) / 2) + 1

In [None]:
def normalize_features(df, cols, add_target=True):
    features = df[cols]
    sigma = features.std()
    if any(sigma == 0):
        print(sigma)
        raise RuntimeError(
            "A poor data stratification has led to no variation in one of the data "
            "splits for at least one feature (ie std=0). Restratify and try again.")
    mu = features.mean()
    normed_df = (features - mu) / sigma
    if add_target:
        normed_df[target] = df[target]
    return normed_df


def get_batch(df, cols, size=None):
    batch_df = df if size is None else df.sample(size)
    X = batch_df[cols].to_numpy()
    y = batch_df[target].to_numpy()
    y = torch.tensor(y)
    y = (y - torch.ones(len(y)) * min_signature)/2
    y = y.long()
    return X, y


normed_train_df = normalize_features(train_df, column_names)
normed_validation_df = normalize_features(validation_df, column_names)
normed_test_df = normalize_features(test_df, column_names)

In [None]:
from data_process import KANDataset
from torch.utils.data import DataLoader
batch_size = 128

train_X, train_y = get_batch(normed_train_df, column_names)
val_X, val_y = get_batch(normed_validation_df, column_names)
test_X, test_y = get_batch(normed_test_df, column_names)

train_X = torch.tensor(train_X).float()
val_X = torch.tensor(val_X).float()
test_X = torch.tensor(test_X).float()

trainset = KANDataset(train_X, train_y)
valset = KANDataset(val_X, val_y)
testset = KANDataset(test_X, test_y)
    
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(valset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False)

In [None]:
size_list = [
    [2,4,1],[2,8,1],[2,16,1],[2,32,1],
    [2,4,4,1],[2,8,8,1],[2,16,16,1],[2,32,32,1],
    [2,4,4,4,1],[2,8,8,8,1],[2,16,16,16,1],[2,32,32,32,1],
    [2,4,4,4,4,1],[2,8,8,8,8,1],[2,16,16,16,16,1],[2,32,32,32,32,1],
]    
lr_list = [0.3,0.2,0.1,0.08,0.05,0.03,0.01,0.008,0.005,0.001]

gs = BaseSearcher(device=device,save_dir='save/knots/')
gs.init_logs()
gs.grid_search(size_list,lr_list,train_loader,val_loader,test_loader,repu_order=3,optim='lbfgs',max_iter=500,epoch_list=[100,100,500],scheduler='cos')