In this step the data is broken down into the subset that is planned to be analysed. The initial subset was the halo in the local stellar neighbourhood. The code here applies restrictions to obtain a relevant subset based on the literature.

In [1]:
#Load in relevant libraries
import math

import numpy as np

from astropy import units as u
from astropy import coordinates as coord
from astropy.coordinates import SkyCoord, ICRS, Galactocentric, Distance, LSR
from astropy.coordinates import FK5
from astropy.coordinates import CartesianRepresentation, CartesianDifferential
from astropy.table import Table, vstack, hstack
from astropy.table import Column
from astropy.io import ascii

import gala.potential as gp
import gala.dynamics as gd
from gala.units import galactic

from scipy import stats

from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler, normalize


import time

from matplotlib import pyplot

import pandas as pd

In [2]:
#Load in the table with recorded radial velocities
t = Table.read('Gaia_Data.fits')

print("Total Number of Stars in Dataset: " + str(len(t)))

Total Number of Stars in Dataset: 7224631


In [3]:
#Reduce the dataset so that the percentage error of the parralax is less than 20%
# Note: This only needs to be done if you are caluclating distance values using parrallax
t = t[(t['parallax_error']/t['parallax']<0.2)]

print("Total Number of Stars in Dataset: " + str(len(t)))

#Convert the parralax measurements to distance measurements
a = t['parallax']
a = a.to(u.arcsec)

#Add the new distance measurements as a new column into the table
dist = ((1/a).value)*u.parsec
dist = Column(dist,name='dist')
t.add_column(dist)

  return getattr(self.data, op)(other)


Total Number of Stars in Dataset: 6447952


In [4]:
#Create a subset that only includes stars that are 1kpc from the sun
# Note: This step was done to create a subset within close range of our sun
t = t[(t['dist']<1000) & (t['dist']>0)]

print("Total Number of Stars in Dataset: " + str(len(t)))

Total Number of Stars in Dataset: 3105498


In [5]:
#We need to calculate values into recangular galactocentric positions and velocities

#First save the coordinates of each datapoint in the current dataset as a set
# of ICRS coordinates using astropy

c = ICRS(ra=t['ra'], 
         
         dec=t['dec'], 
         
         distance=t['dist'],
         
         pm_ra_cosdec=t['pmra'],
         
         pm_dec=t['pmdec'],
         
         radial_velocity=t['radial_velocity'])

#Saved Coordinates are transformed into Galactocentric coordinates
galc = c.transform_to(Galactocentric)

#The values of the coordinates are extracted and added to the table
x = galc.x
y = galc.y
z = galc.z

x = Column(x,name='x_val')
y = Column(y,name='y_val')
z = Column(z,name='z_val')

U = galc.v_x
V = galc.v_y
W = galc.v_z

U = Column(U,name='U')
V = Column(V,name='V')
W = Column(W,name='W')

t.add_column(x)
t.add_column(y)
t.add_column(z)

t.add_column(U)
t.add_column(V)
t.add_column(W)

#The total velocity of each star is calculated and added to the 
# table by first removing the local standard of rest
X = t['U']
Y = t['V']
Z = t['W']

Y = Y - 232

Vel = np.sqrt(X*X + Y*Y + Z*Z)
V_Tot = Column(Vel, name='V_LSR')

t.add_column(V_Tot)

print("Total Number of Stars in Dataset: " + str(len(t)))

Total Number of Stars in Dataset: 3105498


In [6]:
#The Halo selection criteria based on Koppelman et al. 2018 is applied here
# Note: If you don't want halo stars don't run this cell
t = t[(t['V_LSR']>210)]

print("Total Number of Stars in Dataset: " + str(len(t)))

Total Number of Stars in Dataset: 6789


In [7]:
#The subset is saved as a fits datafile ready to have actions calculated.
t.write('Step_1_Output.fits',overwrite=True)