# Generate SPOCK training data

In [1]:
import spock
import random
import numpy as np
import rebound
import pandas as pd
from spock import simsetup
from spock import FeatureClassifier

load dataset

In [2]:
#specify the data path
#We will be using cleaned data generated from the original spock initial conditions data, filtered according to https://github.com/Ethadhani/SPOCKcleanData.git
datapath = '../../cleanData/csvs/random/'
initial = pd.read_csv(datapath+'clean_initial_conditions.csv')
labels = pd.read_csv(datapath+'clean_labels.csv')
#drop junk column
initial = initial.drop('Unnamed: 0', axis = 1)
#merge labels and initial conditions based on runstring
Initialdataset = initial.set_index('runstring').join(labels.set_index('runstring'))
Initialdataset = Initialdataset.drop('Unnamed: 0', axis = 1)

We can establish a function that, given a list of initial conditions, will return a rebound simulation

In [3]:
def get_sim(row, dataset):
    '''Given a row number, and a data sheet containing initial conditions, returns a corresponding simulation
    
        Arguments:
            row: what row the simulation you would like to create is on
                format of row is in order: 
                [index, 'p0m', 'p0x', 'p0y', 'p0z', 'p0vx', 'p0vy', 'p0vz', 'p1m', 'p1x', 'p1y',
                'p1z', 'p1vx', 'p1vy', 'p1vz', 'p2m', 'p2x', 'p2y', 'p2z', 'p2vx',
                'p2vy', 'p2vz', 'p3m', 'p3x', 'p3y', 'p3z', 'p3vx', 'p3vy', 'p3vz']

            dataset: what dataset contains your initial conditions

        return: returns a rebound simulation with the specified initial conditions'''
    try:
        data = dataset.loc[row]
        sim = rebound.Simulation()
        sim.G=4*np.pi**2
        sim.add(m=data['p0m'], x=data['p0x'], y=data['p0y'], z=data['p0z'], vx=data['p0vx'], vy=data['p0vy'], vz=data['p0vz'])
        sim.add(m=data['p1m'], x=data['p1x'], y=data['p1y'], z=data['p1z'], vx=data['p1vx'], vy=data['p1vy'], vz=data['p1vz'])
        sim.add(m=data['p2m'], x=data['p2x'], y=data['p2y'], z=data['p2z'], vx=data['p2vx'], vy=data['p2vy'], vz=data['p2vz'])
        sim.add(m=data['p3m'], x=data['p3x'], y=data['p3y'], z=data['p3z'], vx=data['p3vx'], vy=data['p3vy'], vz=data['p3vz'])
        return sim
    except:
        print("Error reading initial condition {0}".format(row))
        return None

We can now generate the set of system row indices

In [4]:
#generates the indexes of the systems
systemNum = range(Initialdataset.shape[0])

We can note the column names and import the different feature generators

In [6]:
col = ['EMcrossnear', 'EMfracstdnear', 'EPstdnear', 'MMRstrengthnear', 'EMcrossfar', 'EMfracstdfar', 'EPstdfar', 'MMRstrengthfar', 'MEGNO', 'MEGNOstd',
        'ThetaSTD12', 'ThetaSTD23','chiSec','Zval12','Zval23','threeBRfillfac','threeBRfillstd','Tsec','InitialStable']

In [7]:
spock = FeatureClassifier()

We can then establish some helper functions that will allow us to map the spock.generate_feature function to the different systems by mapping to different row numbers and generating the correct simulation

In [8]:
def getList(features):
    '''Helper function which isolates the data list from the generate_features return'''
    return list(features[0][0].values())+[features[1]]

In [9]:
def getFeat(num):
    '''when given a index of a row, loads initial conditions and returns the spock generated features'''
    #gets features based on index num
    sim = get_sim(num,initial)
    return spock.generate_features(sim)

In [10]:
rebound.__version__

'4.3.2'

We can now map getFeat to the different rows of the Initial df, this will create each simulation and generate the spock features.

In [11]:
import sys
%time
from multiprocessing import Pool
if __name__ == "__main__":
    with Pool() as pool:
        features = pool.map(getFeat,systemNum)
        pool.close()
        pool.join()
#formats the data correctly
formattedFeat = pd.DataFrame(np.array(list(map(getList,features))), columns = col)


CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 6.68 µs


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  msk = np.array( list(map( lambda x: min_per_ratio < x[0]/float(x[1]) < max_per_ratio , res_ratios )) )
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, 

KeyboardInterrupt: 

We can then join the generated features with the corresponding labels

In [13]:
dataset = pd.DataFrame.join(formattedFeat,labels)

We can then save the new training data spreadsheet.

In [None]:
dataset.to_csv(datapath+'tryRand.csv')