# GAIA Stellar parameter analysis

In [12]:

import pandas as pd
df = pd.read_csv('1702044538106O-result.csv')
df.head()

Unnamed: 0,source_id,ra,dec,parallax,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,bp_rp,bp_g,g_rp,phot_variable_flag,non_single_star,teff_gspphot,logg_gspphot,mh_gspphot,distance_gspphot,azero_gspphot,ag_gspphot,ebpminrp_gspphot
0,6047500633522220032,248.369891,-24.997493,,20.85676,21.304745,19.86384,1.440905,0.447985,0.99292,NOT_AVAILABLE,0,,,,,,,
1,6047519733242383872,248.278516,-24.782523,,,,19.735653,,,,NOT_AVAILABLE,0,,,,,,,
2,6047525097655939968,248.566063,-24.936813,,20.753935,,,,,,NOT_AVAILABLE,0,,,,,,,
3,6047500083766289664,248.321241,-25.032108,0.685722,18.119667,18.569986,16.56635,2.003635,0.450319,1.553316,NOT_AVAILABLE,0,,,,,,,
4,6047500083766290304,248.320812,-25.031495,1.037481,19.907545,21.158869,18.785051,2.373817,1.251324,1.122494,NOT_AVAILABLE,0,,,,,,,


In [7]:
df.shape

(91389, 19)

## Data Cleaning

In [4]:
#We get the name of every feature present in the dataframe
features = df.columns.tolist()

for feature in features:
    print(f'NaN total number of values in {feature}: {df[feature].isna().sum()}')

NaN total number of values in source_id: 0
NaN total number of values in ra: 0
NaN total number of values in dec: 0
NaN total number of values in parallax: 16951
NaN total number of values in phot_g_mean_mag: 242
NaN total number of values in phot_bp_mean_mag: 2821
NaN total number of values in phot_rp_mean_mag: 2228
NaN total number of values in bp_rp: 2839
NaN total number of values in bp_g: 2821
NaN total number of values in g_rp: 2451
NaN total number of values in phot_variable_flag: 0
NaN total number of values in non_single_star: 0
NaN total number of values in teff_gspphot: 71836
NaN total number of values in logg_gspphot: 71836
NaN total number of values in mh_gspphot: 71836
NaN total number of values in distance_gspphot: 71836
NaN total number of values in azero_gspphot: 71836
NaN total number of values in ag_gspphot: 71836
NaN total number of values in ebpminrp_gspphot: 71836


### Parallax
We remove the negative parallax because we do not want to deal with negative values. Having a negative parallax might be due to a mistake on the measurement taken or a misinterpretation of data.

In [18]:
# Consider only the poitive parallax excluiding negatives and NaN's
data = df[df['parallax'] >= 0]
data.shape

(55543, 19)

In [19]:
features = df.columns.tolist()

for feature in features:
    print(f'NaN total number of values in {feature}: {data[feature].isna().sum()}')

NaN total number of values in source_id: 0
NaN total number of values in ra: 0
NaN total number of values in dec: 0
NaN total number of values in parallax: 0
NaN total number of values in phot_g_mean_mag: 57
NaN total number of values in phot_bp_mean_mag: 871
NaN total number of values in phot_rp_mean_mag: 795
NaN total number of values in bp_rp: 872
NaN total number of values in bp_g: 871
NaN total number of values in g_rp: 845
NaN total number of values in phot_variable_flag: 0
NaN total number of values in non_single_star: 0
NaN total number of values in teff_gspphot: 37447
NaN total number of values in logg_gspphot: 37447
NaN total number of values in mh_gspphot: 37447
NaN total number of values in distance_gspphot: 37447
NaN total number of values in azero_gspphot: 37447
NaN total number of values in ag_gspphot: 37447
NaN total number of values in ebpminrp_gspphot: 37447


### Photometric Values

In [23]:
# We remove the instances that do not have 3 colors in phot g mean, phot bp mean and phot rp mean
data = data.loc[(data['phot_g_mean_mag'] > 0) & (data['phot_bp_mean_mag'] > 0) & (data['phot_rp_mean_mag'] > 0)]
data.shape

(54671, 19)

In [24]:
data.head(10)

Unnamed: 0,source_id,ra,dec,parallax,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,bp_rp,bp_g,g_rp,phot_variable_flag,non_single_star,teff_gspphot,logg_gspphot,mh_gspphot,distance_gspphot,azero_gspphot,ag_gspphot,ebpminrp_gspphot
3,6047500083766289664,248.321241,-25.032108,0.685722,18.119667,18.569986,16.56635,2.003635,0.450319,1.553316,NOT_AVAILABLE,0,,,,,,,
4,6047500083766290304,248.320812,-25.031495,1.037481,19.907545,21.158869,18.785051,2.373817,1.251324,1.122494,NOT_AVAILABLE,0,,,,,,,
11,6047500113831141376,248.294525,-25.033515,0.913684,20.399033,21.524858,19.403357,2.121502,1.125826,0.995676,NOT_AVAILABLE,0,,,,,,,
12,6047500113831457792,248.30795,-25.037774,3.27643,20.514648,21.35927,19.337183,2.022087,0.844622,1.177465,NOT_AVAILABLE,0,,,,,,,
13,6047500148190884480,248.31981,-25.024354,1.637632,19.96174,20.527998,19.009459,1.518539,0.566257,0.952282,NOT_AVAILABLE,0,,,,,,,
14,6047500148190887552,248.315151,-25.019749,0.662267,19.323332,20.714195,18.200197,2.513998,1.390863,1.123135,NOT_AVAILABLE,0,,,,,,,
15,6047500148191199232,248.316789,-25.022694,0.031527,20.052822,21.046618,19.07222,1.974398,0.993795,0.980602,NOT_AVAILABLE,0,,,,,,,
16,6047500148191199488,248.317547,-25.021558,0.280824,20.499277,21.823492,19.304098,2.519394,1.324215,1.195179,NOT_AVAILABLE,0,,,,,,,
17,6047500152485767936,248.322122,-25.026058,1.911935,17.311089,18.160269,16.301243,1.859026,0.84918,1.009846,NOT_AVAILABLE,0,5130.9883,4.7841,-1.6866,612.8504,2.1416,1.6223,0.8963
18,6047500152485768960,248.320462,-25.023991,1.227912,20.526451,21.369843,19.517536,1.852306,0.843391,1.008915,NOT_AVAILABLE,0,,,,,,,


### Variable and binary stars removal

In [34]:
df['phot_variable_flag'].value_counts()

NOT_AVAILABLE    91076
VARIABLE           313
Name: phot_variable_flag, dtype: int64

In [33]:
df['non_single_star'].value_counts()

0    91362
1       14
2        9
4        4
Name: non_single_star, dtype: int64

In [35]:
# Removed the vairable stars as well as the non single stars
data = data.loc[(data['phot_variable_flag'] == 'NOT_AVAILABLE') & (data['non_single_star'] == 0)]
data.shape

(54388, 19)

## Distance and Absolute Magnitude

In order to calculate the distance of a star having the parallax angle measured in mas (milli arcseconds), we can use trogonometry. Using the small angle approximation we have:

\begin{equation}
d = \frac{1}{p''} pc
\end{equation}


In [41]:
# Calculate the distance in parsecs
data['distance'] = 1/(data['parallax']/1000) # We convert to arcseconds
data[['parallax','distance']].head()

Unnamed: 0,parallax,distance
3,0.685722,1458.315919
4,1.037481,963.873329
11,0.913684,1094.470552
12,3.27643,305.210201
13,1.637632,610.637822


To compute the absolute magnitude (Carrol, 'Introduction to Modern Astrophysics'):

\begin{equation}
M = m - 5log_{10} \left ( \frac{d}{10 pc} \right )
\end{equation}

where:
- m is the apparent magnitude
- d is the distance in parsecs