# Creating the dataset from SPARC

In [6]:
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import pandas as pd
import csv

### Loading the two samples

In [3]:
# Sample from table 2 : http://astroweb.cwru.edu/SPARC/ (for the velocity curves)

files = './raw_data/SPARC_rotational_curves/*.dat'
data = glob(files)
print("{} galaxies found".format(len(data)))

# Sample from table 1 : http://astroweb.cwru.edu/SPARC/ (for the luminosities [3.6] and effective radii)

df = pd.read_csv('./raw_data/SPARC_data_mass_models.csv')

175 galaxies found


### Useful functions

In [4]:
def finding_index(name):
    return data.index('./raw_data/SPARC_rotational_curves/' + name + '_rotmod.dat')
i = finding_index('CamB')

def finding_name(galaxy):
    return galaxy.name

### Merging the two data samples and calculating stellar masses and dark matter fraction

In [18]:
# Parameters in http://astroweb.cwru.edu/SPARC/

gammadisk = 0.5
gammabul = 0.7


# Calculating the stellar masses assuming M/L ratio is equal to 0.5 (Lelli 2016)

M = 0.5*df['L3_6']*10**9


class galaxy():
    def __init__(self, i):
        # Constructor method to initialize a galaxy object with index i
        self.index = i 
        # Index of the galaxy
        self.name = data[i].split('/')[-1].split('_')[0]
        # Extracting the name of the galaxy from the data file path
        self.radius = np.loadtxt(data[i])[:, 0]
        # Loading radius data from the data file
        self.Vobs = np.loadtxt(data[i])[:, 1]
        # Loading observed velocity data from the data file
        self.errV = np.loadtxt(data[i])[:, 2]
        # Loading error in velocity data from the data file
        self.Vgas = np.loadtxt(data[i])[:, 3]
        # Loading gas velocity data from the data file
        self.Vdisk = np.loadtxt(data[i])[:, 4]
        # Loading disk velocity data from the data file
        self.Vbul = np.loadtxt(data[i])[:, 5]
        # Loading bulge velocity data from the data file
        self.SBdisk = np.loadtxt(data[i])[:, 6]
        # Loading disk surface brightness data from the data file
        self.SBbul = np.loadtxt(data[i])[:, 7]
        # Loading bulge surface brightness data from the data file
        self.Vbar = None
        # Initializing attribute for baryons velocity
        self.fDM = None
        # Initializing attribute for dark matter fraction
        self.stellarmass = None
        # Initializing attribute for stellar mass
        self.effRad = None
        # Initializing attribute for effective radius
    

    def add_Vbar(self):
        # Method to calculate the velocity of the baryons
        Vbar = np.sqrt(abs(self.Vgas) * self.Vgas + gammadisk * abs(self.Vdisk) * self.Vdisk + gammabul * abs(self.Vbul) * self.Vbul)
        # Calculating the velocity of the baryons
        self.Vbar = Vbar
        return 1
        
    def add_fDM(self):
        # Method to calculate dark matter fraction
        if self.Vbar == None:
           self.add_Vbar() 
        # Checking if the velocity of the baryons has been calculated, if not, calculate it
        fDM = (self.Vobs**2 - self.Vbar**2) / (self.Vobs**2)
        # Calculating dark matter fraction
        self.fDM = fDM
        # Assigning calculated dark matter fraction to attribute
        return 1
        
    def isintwo(self):
        # Method to check if galaxy is in the two data samples
        if (len(np.where(df['Name'] == self.name)[0]) == 1):
            j = np.where(df['Name'] == self.name)[0][0]
            if (df['Qual'].iloc[j] == 1) and (df['Type'].iloc[j] < 10):
                return 1
            else:
                return 0
        else:
            return 0
            
    def add_stellarmass(self):
        # Method to add stellar mass data (in solar mass)
        if len(np.where(df['Name'] == self.name)[0]) != 0:
            # Checking if galaxy is in the two data samples
            Mass = M[np.where(df['Name'] == self.name)[0][0]]
            # Getting stellar mass from pre-calculated data
            self.stellarmass = Mass
            # Assigning stellar mass to attribute
        return 1
        
    def add_effRad(self):
        # Method to add effective radius data (in kpc)
        if len(np.where(df['Name'] == self.name)[0]) != 0:
            # Checking if galaxy is in the two data samples
            R = df['Reff'][np.where(df['Name'] == self.name)[0][0]]
            # Getting effective radius
            self.effRad = R
            # Assigning effective radius to attribute
        return 1


# Testing

firstgal = galaxy(i)

firstgal.add_fDM()
firstgal.add_stellarmass()
firstgal.add_effRad()


print(firstgal.fDM, firstgal.stellarmass)

[0.3641071  0.4184481  0.43746365 0.39837031 0.38245181 0.39030169
 0.43111604 0.45193686] 70650500000.0


### Making the final csv file

In [19]:
# Open a CSV file for writing
with open('dataset/SPARC.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter = ',')

    # Write the name line and the unit line
    writer.writerow(["Name", "M*", "Reff", "fDM(Reff)", "sigma_fdm(Reff)", "fDM", "sigma_fdm", "Radius"])
    writer.writerow(["(SDSS)", "[Msun]", "[kpc]", "None", "None", "None", "None", "[kpc]"])

    # Loop through each element of data
    for i in range(len(data)):
        firstgal = galaxy(i)
        if firstgal.isintwo():
            firstgal.add_fDM()
            firstgal.add_stellarmass()
            firstgal.add_effRad()

            # Extracting necessary data
            name = firstgal.name
            errV = firstgal.errV
            Mass = firstgal.stellarmass
            effRad = firstgal.effRad
            rad = firstgal.radius
            rad_str = ", ".join(map(str, rad))  # Convert the NumPy array to a string
            fDM = firstgal.fDM
            fDM_str = ", ".join(map(str, fDM))
            fDMeff = np.interp(effRad, rad, firstgal.fDM)
            sigma_fdm = np.sqrt((2 * firstgal.Vbar**2 / firstgal.Vobs**3 * errV)**2)
            sigma_fdm_str = ", ".join(map(str, sigma_fdm))
            sigma_fdmeff = np.sqrt((2 * np.interp(effRad, rad, firstgal.Vbar)**2 / np.interp(effRad, rad, firstgal.Vobs)**3 * np.interp(effRad, rad, errV))**2)

            # Write the data to the CSV file with commas as separators
            writer.writerow([name, Mass, effRad, fDMeff, sigma_fdmeff , fDM_str, sigma_fdm_str, rad_str])
            
# Read the CSV file into a DataFrame
sparc = pd.read_csv('dataset/SPARC.csv')
# Test
print(sparc['fDM'].iloc[1])


0.3641071029577853, 0.41844809507631825, 0.43746364776234553, 0.3983703110860865, 0.38245180564413267, 0.39030169116612046, 0.4311160355548469, 0.4519368566792862
