In [1]:
import json
import utilfuncs
import dataloader as dl
import plotmaker

cache_path = "Data/cache.json"

In [2]:
cache = json.load(open(cache_path, "r"))

gal_list = list(cache.keys())
print(f"List of galaxies: {gal_list[1:]}")

# utilfunc.load_data(cache_path, table_columns*, kw_arg(galaxy, paper, etc))
# TODO: A universal data-loading function
# TODO: Understand what each paper is doing

# Presentation for all the literature that I've covered. Understand what's been done 
#   and how the data has been collected. Goal: 20~30 min summary talk, tell audience 
#   which reference is the best to be used for what, and how much we should trust them.
#   Include a bit of cross-comparison, compare what each study has done for the same physical value

# List of questions:
# - Instruments for each paper
# - Spectral resolution, 
# - Fitting methodlogy, and put them in README. 
# (There tend to be two modes: equivalent width, which is to fit absorption features individually,
# and spectrum synthesis, where they use a model spectrum to compare with the whole observed spectrum)
# - Check consistency between papers. 
# The idea is to check how robust the data is. Which one should be trust more than the other?
# Either answer that question, or put forward the information that will help answering that question.

List of galaxies: ['Scl', 'For', 'LeoI', 'Sex', 'LeoII', 'CVnI', 'UMi', 'Dra', 'NGC2419', 'M68', 'M15', 'UMaII', 'M2', 'M53', 'LeoA', 'Aquarius', 'SagDIG', 'TriII']


In [3]:
# Watch out for the edge case: two stars being named the same way by two different papers
# Download Apogee?
# Expand the README page to to include the sources
# Include a column to link the instrument

# A function, if specify element & dSph name, return an astropy.table with star name, ONE RE & DEC for each star, and column from either catalog
# If a star is in two references, return the values from both literatures, and make them into an astropy.table

# Add the following functions
# 1. A list of dSphs, a list of element abundances, and return a table containing name, RA, DEC, and the abundance of each element, and a column indicating source of data, and a column of galaxy
#    Generate unique ID for each star. One star may take multiple rows, but we know they're the same star given a unique ID.
#    Rows = number of measurements, columns = number of things being measured
# 2. 

In [4]:
# Set catalog_type to "VizieR" to distinguish between Vizier and manually imported data files

# TODO when refreshing the cache:
# - Saving a catalog type (i.e. VizieR)
# - Saving the vizier table name
# - Read database into an exportable table

In [1]:
import pandas as pd

META_DATA_KEY = "Meta data"

def make_dataframe(data, include_meta=True):
    """Turns data from cache into a pandas dataframe. Optionally, save it in csv format.
    
    Args:
        data: The data to be turned into a table. Stored in the same format as the cache.

    Returns:
        data_table: The data table.
        meta_table: The metadata table.
    """
    metadata_dict = data[META_DATA_KEY]
    
    data_table = data.copy()
    data_table.pop(META_DATA_KEY)

    output = pd.DataFrame()

    for gal in data_table.keys():
        gal = data_table[gal]

        gal_data = list(gal.values())

        for star in gal_data:
            cat_list = list(star.keys())

            for cat in cat_list:
                cat_data = star[cat]
                cat_data['Paper'] = cat

                cat_data_temp = pd.DataFrame.from_dict(cat_data, orient='index').T
                output = pd.concat([output, cat_data_temp], ignore_index=True)

    if include_meta:
        # Make a metadata table
        metadata_table = pd.DataFrame()

        for cat_name in list(metadata_dict['Included titles'].keys()):
            cat = metadata_dict['Included titles'][cat_name]
            columns = cat['Columns']
            source = cat["Table info"]['Data source']
            VizieR_tag = cat["Table info"]['Paper tag']
            tables_used = cat["Table info"]['Tables used']
            members = cat['Members']

            cat_info = {
                "Paper name": cat_name,
                "ViziER tag": VizieR_tag,
                "Columns": columns,
                "Members": members,
                "Data source": source,
                "Tables included": tables_used
            }

            cat_info_temp = pd.DataFrame.from_dict(cat_info, orient='index').T

            metadata_table = pd.concat([metadata_table, cat_info_temp], ignore_index=True)
        
        return output, metadata_table
    
    return output 