# Generate synthetic data

for each robustness test we do, we will need different synthetic data. In this notebook I generate all of the synthetic datasets so they can be kept organised

In [1]:
output_dir = "./massive_data/"

In [2]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from astropy.table import Table
import pickle
import SyntheticData as sd
import Sampler
from datetime import datetime, date



Open up the dataset and create a lower-memory version of the catalogue with only the relevant entries

In [3]:
mdwarfs = Table.read('./data/200pc_mdwarfs_reduced.fits', format='fits')
all_mdwarfs = []
relevant_list = ["parallax", "mass", "solution_type"]
for row in tqdm(mdwarfs): # we don't need every object, really 
    df = dict()
    for item in relevant_list:
        working_item = item
        if item == "mass":
            working_item = "mass_single"
        df[item] = float(row[working_item])
    all_mdwarfs.append(df)

  0%|          | 0/225536 [00:00<?, ?it/s]

In [None]:
def generate_dataset(name, object_count=250000, binary_fraction=0.3, binarity_model=None, period_model=(4,1.3), mass_model=0.5, ecc_type="turnover"):
    outdata = dict()
    outdata["meta"] = dict()
    outdata["meta"]["timestamp"] = datetime.now()
    outdata["meta"]["object_count"] = object_count
    outdata["meta"]["binary_fraction"] = binary_fraction
    outdata["meta"]["binarity_model"] = f"{binarity_model}"
    outdata["meta"]["period_model"] = period_model
    outdata["meta"]["mass_model"] = mass_model
    outdata["meta"]["ecc_type"] = ecc_type
    outdata["data"] = sd.create_synthetic_data(object_count=object_count, catalogue=mdwarfs, binary_fraction=binary_fraction, binarity_model=binarity_model,
                                               period_model=period_model, mass_model=mass_model, ecc_type=ecc_type)

    outfile = open(output_dir+f'{date.today()}-{name}.pkl', "wb")
    pickle.dump(outdata, outfile)
    outfile.close()

## Step 1: the reference dataset

The main dataset to make will be one of "conventional wisdom", and all other datasets will just be slight modifications to this one

In [5]:
generate_dataset(name="reference") # this one has all the presets.

Computing Binaries:   0%|          | 0/75189 [00:00<?, ?it/s]

## Variable binary fraction

In [6]:
REF_FRACTIONS = (0.25, 0.5)
MASS_RANGE = (0.2, 0.4)
def bin_frac(m):
    sep = (m - MASS_RANGE[0]) / (MASS_RANGE[1] - MASS_RANGE[0])
    return sep * (REF_FRACTIONS[1] - REF_FRACTIONS[0]) + REF_FRACTIONS[0]
generate_dataset(name="variable_binarity", binary_fraction=None, binarity_model=bin_frac)

Computing Binaries:   0%|          | 0/100068 [00:00<?, ?it/s]

## Period test

In [7]:
generate_dataset(name="periods_42", object_count=50000, period_model=(4,2))
generate_dataset(name="periods_513", object_count=50000, period_model=(5,1.3))

Computing Binaries:   0%|          | 0/15140 [00:00<?, ?it/s]

Computing Binaries:   0%|          | 0/15096 [00:00<?, ?it/s]

## Mass ratio test

In [8]:
generate_dataset(name="flat_q", object_count=50000, mass_model=0)

Computing Binaries:   0%|          | 0/14879 [00:00<?, ?it/s]

## Eccentricity test

In [9]:
generate_dataset(name="circular", object_count=50000, ecc_type="circular")
generate_dataset(name="thermal", object_count=50000, ecc_type="thermal")

Computing Binaries:   0%|          | 0/14879 [00:00<?, ?it/s]

Computing Binaries:   0%|          | 0/15129 [00:00<?, ?it/s]