# Generate SPOCK training data

In [1]:
import spock
import random
import numpy as np
import rebound
import pandas as pd
from spock import simsetup
from spock import FeatureClassifier

The initial conditions are stored as snapshots of a simulation archive, we must thus load the datapath and the labels for the corresponding systems

In [2]:
#specify the data path
#We will be using cleaned data generated from the original spock initial conditions data
# This data is in the form of a simulation archive
datapath = '../../cleanData/csvs/resonant/'
labels = pd.read_csv(datapath+'sim_labels.csv')

We can now generate the set of system indices based on the labels

In [3]:
#generates the indexes of the systems
systemNum = range(labels.shape[0])

We can note the column names and import the different feature generators

In [4]:
col = ['EMcrossnear', 'EMfracstdnear', 'EPstdnear', 'MMRstrengthnear', 'EMcrossfar', 'EMfracstdfar', 'EPstdfar', 'MMRstrengthfar', 'MEGNO', 'MEGNOstd', 'Tsec','InitialStable']

In [5]:
spock = FeatureClassifier()

We can then establish some helper functions that will allow us to map the spock.generate_feature function to the different systems by mapping to the different snapshots

In [6]:
def getList(features):
    '''Helper function which isolates the data list from the generate_features return'''
    return list(features[0][0].values())+[features[1]]

In [7]:
def getFeat(num):
    '''when given a index of a row, loads initial conditions and returns the spock generated features'''
    #gets features based on index num
    sim = rebound.Simulation(datapath+"clean_initial_conditions.bin", snapshot=num)
    return spock.generate_features(sim)

In [8]:
rebound.__version__

'4.3.2'

In [9]:
sim = rebound.Simulation(datapath+"clean_initial_conditions.bin", snapshot=1000)

We can now map getFeat to the different rows of the Initial df, this will create each simulation and generate the spock features.

In [9]:
import sys
from multiprocessing import Pool
if __name__ == "__main__":
    with Pool() as pool:
        features = pool.map(getFeat,systemNum)
        pool.close()
        pool.join()
#formats the data correctly
formattedFeat = pd.DataFrame(np.array(list(map(getList,features))), columns = list(features[0][0][0].keys())+['InitialStable'])

  return 2 * poch(s,j) * alpha**j * hyp2f1(s,s+j,j+1,alpha**2)/ factorial(j)
  return 2 * poch(s,j) * alpha**j * hyp2f1(s,s+j,j+1,alpha**2)/ factorial(j)
  return 2 * poch(s,j) * alpha**j * hyp2f1(s,s+j,j+1,alpha**2)/ factorial(j)
  return 2 * poch(s,j) * alpha**j * hyp2f1(s,s+j,j+1,alpha**2)/ factorial(j)
  laplace_b(s+1,j-1,0,alpha)
  return 2 * poch(s,j) * alpha**j * hyp2f1(s,s+j,j+1,alpha**2)/ factorial(j)
  laplace_b(s+1,j-1,0,alpha)
  laplace_b(s+1,j-1,0,alpha)
  return 2 * poch(s,j) * alpha**j * hyp2f1(s,s+j,j+1,alpha**2)/ factorial(j)
  laplace_b(s+1,j-1,0,alpha)
  laplace_b(s+1,j-1,0,alpha)
  return 2 * poch(s,j) * alpha**j * hyp2f1(s,s+j,j+1,alpha**2)/ factorial(j)
  laplace_b(s+1,j-1,0,alpha)
  laplace_b(s+1,j-1,0,alpha)
  return 2 * poch(s,j) * alpha**j * hyp2f1(s,s+j,j+1,alpha**2)/ factorial(j)
  laplace_b(s+1,j-1,0,alpha)
  laplace_b(s+1,j-1,0,alpha)
  return 2 * poch(s,j) * alpha**j * hyp2f1(s,s+j,j+1,alpha**2)/ factorial(j)
  return 2 * poch(s,j) * alpha**j * hyp2f1(s,s

We can then join the generated features with the corresponding labels

In [10]:
dataset = pd.DataFrame.join(formattedFeat,labels)

We can then save the new training data spreadsheet.

In [None]:
dataset.to_csv(datapath+'4-4-25-thetaSTD.csv')