# Make a CSV of observations listed on Gemini web page

In [61]:
# from requests import request
import pandas as pd

gemini_url = "https://archive.gemini.edu/searchform/"
program_ids = ["GN-2024B-Q-305", "GS-2024B-Q-308"]

df = pd.concat(
    [pd.read_html(gemini_url + program_id, match="Proprietary", extract_links="all")[0] for program_id in program_ids],
    axis=0,
)

# column names from web scrape are horrible, clean them up
df = df.rename(columns={name: name[0].replace("↑↓", "") for name in df.columns})
df.drop(["Download", "WaveBand", "QA"], axis=1, inplace=True)

# Filename column also has link to FITS header, preserve it
# make this a 2-tuple so that the next step won't clobber the info we want to keep
# FITS info seems to be at https://archive.gemini.edu/fullheader/###, where ### is found in this column
fits_base_url = gemini_url.replace("/searchform/", "")
df["FITS link"] = [(fits_base_url + link, "foo") for _, link in df["Filename"]]

# cells come back a tuples, replace with first value of tuple
for col_name in df.columns:
    df[col_name] = [tuple[0] for tuple in df[col_name]]

# clean up Data Label column
df["Data Label"] = [label.replace("[A] ", "") for label in df["Data Label"]]

df

Unnamed: 0,Filename,Data Label,UT Date Time,Inst,Class,Type,Object,ExpT,FITS link
0,N20240814A0296b.fits,GN-2024B-Q-305-0-0,2024-08-14 06:22:02,ALOPEKE,science,OBJECT,TIC 384410712,0.06,https://archive.gemini.edu/fullheader/5876
1,N20240814A0296r.fits,GN-2024B-Q-305-0-0,2024-08-14 06:22:02,ALOPEKE,science,OBJECT,TIC 384410712,0.06,https://archive.gemini.edu/fullheader/5878
2,N20240814A0297b.fits,GN-2024B-Q-305-0-0,2024-08-14 06:23:04,ALOPEKE,science,OBJECT,TIC 384410712,0.06,https://archive.gemini.edu/fullheader/5886
3,N20240814A0297r.fits,GN-2024B-Q-305-0-0,2024-08-14 06:23:04,ALOPEKE,science,OBJECT,TIC 384410712,0.06,https://archive.gemini.edu/fullheader/5875
4,N20240814A0298r.fits,GN-2024B-Q-305-0-0,2024-08-14 06:24:07,ALOPEKE,science,OBJECT,TIC 384410712,0.06,https://archive.gemini.edu/fullheader/5869
...,...,...,...,...,...,...,...,...,...
61,S20240921Z0794b.fits,GS-2024B-Q-308-0-0,2024-09-21 08:15:10,ZORRO,science,OBJECT,TIC 168789840,0.06,https://archive.gemini.edu/fullheader/5714645
62,S20240921Z0795b.fits,GS-2024B-Q-308-0-0,2024-09-21 08:16:14,ZORRO,science,OBJECT,TIC 168789840,0.06,https://archive.gemini.edu/fullheader/5714647
63,S20240921Z0795r.fits,GS-2024B-Q-308-0-0,2024-09-21 08:16:14,ZORRO,science,OBJECT,TIC 168789840,0.06,https://archive.gemini.edu/fullheader/5714648
64,S20240921Z0796b.fits,GS-2024B-Q-308-0-0,2024-09-21 08:21:41,ZORRO,science,OBJECT,HR 1299,0.06,https://archive.gemini.edu/fullheader/5714651


In [54]:

df


Unnamed: 0,Filename,Data Label,UT Date Time,Inst,Class,Type,Object,ExpT
0,"(N20240814A0296b.fits, /fullheader/5876)",[A] GN-2024B-Q-305-0-0,2024-08-14 06:22:02,ALOPEKE,science,OBJECT,TIC 384410712,0.06
1,"(N20240814A0296r.fits, /fullheader/5878)",[A] GN-2024B-Q-305-0-0,2024-08-14 06:22:02,ALOPEKE,science,OBJECT,TIC 384410712,0.06
2,"(N20240814A0297b.fits, /fullheader/5886)",[A] GN-2024B-Q-305-0-0,2024-08-14 06:23:04,ALOPEKE,science,OBJECT,TIC 384410712,0.06
3,"(N20240814A0297r.fits, /fullheader/5875)",[A] GN-2024B-Q-305-0-0,2024-08-14 06:23:04,ALOPEKE,science,OBJECT,TIC 384410712,0.06
4,"(N20240814A0298r.fits, /fullheader/5869)",[A] GN-2024B-Q-305-0-0,2024-08-14 06:24:07,ALOPEKE,science,OBJECT,TIC 384410712,0.06
...,...,...,...,...,...,...,...,...
61,"(S20240921Z0794b.fits, /fullheader/5714645)",[A] GS-2024B-Q-308-0-0,2024-09-21 08:15:10,ZORRO,science,OBJECT,TIC 168789840,0.06
62,"(S20240921Z0795b.fits, /fullheader/5714647)",[A] GS-2024B-Q-308-0-0,2024-09-21 08:16:14,ZORRO,science,OBJECT,TIC 168789840,0.06
63,"(S20240921Z0795r.fits, /fullheader/5714648)",[A] GS-2024B-Q-308-0-0,2024-09-21 08:16:14,ZORRO,science,OBJECT,TIC 168789840,0.06
64,"(S20240921Z0796b.fits, /fullheader/5714651)",[A] GS-2024B-Q-308-0-0,2024-09-21 08:21:41,ZORRO,science,OBJECT,HR 1299,0.06


In [56]:
[label.replace("[A] ", "") for label in df["Data Label"]]

['GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-Q-305-0-0',
 'GN-2024B-

In [36]:
t = Table.from_pandas(df)
t["col1"][0]
#this only gets the text of the fits file, not the links that get you the header info


('N20240814A0296b.fits', '/fullheader/5876')