In [1]:
import get_nirspec_mos_info
import pandas as pd
from get_nirspec_mos_info import extract_basic_info_from_GO,get_observation_status,\
    check_csv,extract_basic_info_from_GTO,extract_basic_info_from_DDT
import time

## JWST GO cycle 1-3

**Extract basic information of NIRSpec/MOS observations from JWST GO webpages**

basic info (ID,Program Title,PI & Co-PIs,Exclusive Access Period (months),Prime/ Parallel Time (hours),Instrument/ Mode,Type,Topic,GO Cycle) will be reserved into a csv file.

In [2]:
# URLs for JWST GO Cycle 1, Cycle 2, and Cycle 3
urls = {
    "Cycle 1": "https://www.stsci.edu/jwst/science-execution/approved-programs/general-observers/cycle-1-go",
    "Cycle 2": "https://www.stsci.edu/jwst/science-execution/approved-programs/general-observers/cycle-2-go",
    "Cycle 3": "https://www.stsci.edu/jwst/science-execution/approved-programs/general-observers/cycle-3-go"
} 

In [3]:
all_data = []
headers = None

# Extract info from each cycle
for cycle_name, url in urls.items():
    cycle_data, cycle_headers = extract_basic_info_from_GO(url, cycle_name)
    if headers is None:
        headers = cycle_headers + ["Topic", "GO Cycle"]
    all_data.extend(cycle_data)

# Save the extracted data to a CSV file
if all_data:
    df = pd.DataFrame(all_data, columns=headers)
    df.to_csv('NIRSpec_MOS_pps_GO.csv', index=False)
    print("Filtered proposal info with topics and cycles has been written to NIRSpec_MOS_pps_GO.csv")
else:
    print("No NIRSpec/MOS info found.")


Extracting data for Cycle 1...
Extracting data for Cycle 2...
Extracting data for Cycle 3...
Filtered proposal data with topics and cycles has been written to NIRSpec_MOS_pps_GO.csv


**Extract observation status of each proposal**

In [2]:
df = pd.read_csv('NIRSpec_MOS_pps_GO.csv')

In [3]:
all_status_data = []
final_headers = list(df.columns)  

# Iterate through each proposal and fetch its observation status
for index, row in df.iterrows():
    proposal_id = row['ID']
    
    status_data, headers = get_observation_status(proposal_id)
    
    if status_data:
        # Update the final headers with any new ones found
        for header in headers:
            if header not in final_headers:
                final_headers.append(header)
        
        for entry in status_data:
            # Create a dictionary for this row to map headers to their respective data
            new_row_dict = dict(zip(df.columns, row.tolist()))  # Convert row to dict
            new_row_dict.update(entry)  # Add/Update with status entry data
            
            # Append row in the correct header order
            new_row = [new_row_dict.get(header, '') for header in final_headers]
            all_status_data.append(new_row)
    
    time.sleep(3)  # Add a delay to avoid overloading the server

# Create the final DataFrame and write to CSV
if all_status_data:
    df_status = pd.DataFrame(all_status_data, columns=final_headers)
    df_status.to_csv('NIRSpec_MOS_pps_GO_with_detailed_status.csv', index=False)
    print("Updated proposal info with detailed observation status has been written to NIRSpec_MOS_pps_GO_with_detailed_status.csv")
else:
    print("No NIRSpec MultiObject Spectroscopy info found.")


Fetching status for Proposal ID: 1433
Connection error for Proposal ID 1433: HTTPSConnectionPool(host='www.stsci.edu', port=443): Read timed out. (read timeout=60). Retrying...
Headers: ['Observation', 'Visit', 'Status', 'Targets', 'Template', 'Hours', 'Start UT', 'End UT', 'Repeat']
Number of rows found: 5
Row data: ['10', '1', 'Archived', 'MACS0647+7015', 'NIRCam Imaging', '3.31', 'Sep 23, 2022 11:42:17', 'Sep 23, 2022 15:05:56', '']
Row data: ['21', '1', 'Archived', 'MACS0647-MSA-TARGETS', 'NIRSpec MultiObject Spectroscopy', '3.82', 'Jan 8, 2023 06:23:57', 'Jan 8, 2023 09:17:00', '']
Row data: ['20', '1', 'Archived', 'MACS0647+7015', 'NIRCam Imaging', '1.12', 'Jan 8, 2023 16:57:45', 'Jan 8, 2023 18:27:01', '']
Row data: ['23', '1', 'Archived', 'MACS0647-MSA-OBS23', 'NIRSpec MultiObject Spectroscopy', '3.83', 'Feb 20, 2023 17:57:32', 'Feb 20, 2023 21:14:48', 'Repeat of observation 22 visit 1 in this program byWOPR88662']
Row data: ['22', '1', 'FailedArchived', 'MACS0647-MSA-TARGETS',

Check if all the proposals (with NIRSpec/MOS observations) have been checked for status.

In [1]:
file1 = 'NIRSpec_MOS_pps_GO.csv'
file2 = 'NIRSpec_MOS_pps_GO_with_detailed_status.csv'
check_csv(file1, file2)

All checked


## JWST GTO (all cycle)

**Extract basic information of NIRSpec/MOS observations from JWST GTO webpage**

basic info (ID,Program Title,Principal Investigator,AR?,Instrument/Mode,Allocated Hours) will be reserved into a csv file.

In [5]:
url = "https://www.stsci.edu/jwst/science-execution/approved-programs/guaranteed-time-observations"

nirspec_data, nirspec_headers = extract_basic_info_from_GTO(url)

if nirspec_data:
    df = pd.DataFrame(nirspec_data, columns=nirspec_headers)
    df.to_csv('NIRSpec_MOS_pps_GTO.csv', index=False)
    print("NIRSpec/MOS GTO proposal info has been written to NIRSpec_MOS_pps_GTO.csv")
else:
    print("No NIRSpec/MOS info found.")


Extracting info for GTO...
NIRSpec/MOS GTO proposal info has been written to NIRSpec_MOS_pps_GTO.csv


**Extract observation status of each proposal**

In [6]:
df = pd.read_csv('NIRSpec_MOS_pps_GTO.csv')

all_status_data = []
final_headers = list(df.columns) 

for index, row in df.iterrows():
    proposal_id = row['ID']
    status_data, headers = get_observation_status(proposal_id)
    
    if status_data:
        for header in headers:
            if header not in final_headers:
                final_headers.append(header)
        
        for entry in status_data:
            new_row_dict = dict(zip(df.columns, row.tolist()))  
            new_row_dict.update(entry)  
            
            new_row = [new_row_dict.get(header, '') for header in final_headers]
            all_status_data.append(new_row)

    time.sleep(3)  

if all_status_data:
    df_status = pd.DataFrame(all_status_data, columns=final_headers)
    df_status.to_csv('NIRSpec_MOS_pps_GTO_with_detailed_status.csv', index=False)
    print("Updated proposal info with detailed observation status has been written to NIRSpec_MOS_pps_GTO_with_detailed_status.csv")
else:
    print("No NIRSpec MultiObject Spectroscopy info found.")

Fetching status for Proposal ID: 4527
Headers: ['Observation', 'Visit', 'Status', 'Targets', 'Template', 'Hours', 'Plan Windows']
Number of rows found: 1
Row data: ['2', '1', 'Implementation', 'macs1149ncf-photutils-v2p0p2', 'NIRSpec MultiObject Spectroscopy', '5.96', 'May 15, 2025 - Jun 13, 2025 (2025.135 - 2025.164)']
Fetching status for Proposal ID: 4552
Headers: ['Observation', 'Visit', 'Status', 'Targets', 'Template', 'Hours', 'Plan Windows']
Number of rows found: 2
Row data: ['1', '1', 'Implementation', 'v16', 'NIRSpec MultiObject Spectroscopy', '6.38', 'May 11, 2025 - Jun 11, 2025 (2025.131 - 2025.162)']
Row data: ['2', '1', 'Implementation', 'v16', 'NIRSpec MultiObject Spectroscopy', '7.07', 'May 11, 2025 - Jun 11, 2025 (2025.131 - 2025.162)']
Fetching status for Proposal ID: 2758
Headers: ['Observation', 'Visit', 'Status', 'Targets', 'Template', 'Hours', 'Start UT', 'End UT']
Number of rows found: 1
Row data: ['21', '1', 'Archived', 'TM_M1149PAR_v10_fin', 'NIRSpec MultiObject 

In [7]:
file1 = 'NIRSpec_MOS_pps_GTO.csv'
file2 = 'NIRSpec_MOS_pps_GTO_with_detailed_status.csv'

check_csv(file1, file2)

All checked


## JWST DDT (all cycle)

Basic info (PID,Title,PI,Instruments,Allocated Hours,Cycle) will be reserved into a csv file.



In [2]:
url = 'https://www.stsci.edu/jwst/science-execution/approved-programs/directors-discretionary-time'

nirspec_data, nirspec_headers = extract_basic_info_from_DDT(url)

if nirspec_data:
    df = pd.DataFrame(nirspec_data, columns=nirspec_headers)
    df.to_csv('NIRSpec_pps_DDT.csv', index=False)
    print("NIRSpec DDT proposal info has been written to NIRSpec_MOS_pps_DDT.csv")
else:
    print("No NIRSpec info found.")

Extracting info for DDT...
NIRSpec DDT proposal info has been written to NIRSpec_MOS_pps_DDT.csv


In [4]:
df = pd.read_csv('NIRSpec_pps_DDT.csv')

all_status_data = []
final_headers = list(df.columns) 

for index, row in df.iterrows():
    proposal_id = row['PID']
    status_data, headers = get_observation_status(proposal_id)
    
    if status_data:
        for header in headers:
            if header not in final_headers:
                final_headers.append(header)
        
        for entry in status_data:
            new_row_dict = dict(zip(df.columns, row.tolist()))  
            new_row_dict.update(entry)  
            
            new_row = [new_row_dict.get(header, '') for header in final_headers]
            all_status_data.append(new_row)

    time.sleep(3)  

if all_status_data:
    df_status = pd.DataFrame(all_status_data, columns=final_headers)
    df_status.to_csv('NIRSpec_MOS_pps_DDT_with_detailed_status.csv', index=False)
    print("Updated proposal info with detailed observation status has been written to NIRSpec_MOS_pps_DDT_with_detailed_status.csv")
else:
    print("No NIRSpec MultiObject Spectroscopy info found.")

Fetching status for Proposal ID: 6742
Headers: ['Observation', 'Visit', 'Status', 'Targets', 'Template', 'Hours', 'Plan Windows']
Number of rows found: 2
Row data: ['4', '1', 'Implementation', 'C2024E1', 'NIRSpec IFU Spectroscopy', '2.53', 'Apr 16, 2025 - Apr 20, 2025 (2025.106 - 2025.110)Apr 21, 2025 - Apr 29, 2025 (2025.111 - 2025.119)']
Row data: ['5', '1', 'Implementation', 'C2024E1-sky', 'NIRSpec IFU Spectroscopy', '1.95', 'Apr 16, 2025 - Apr 20, 2025 (2025.106 - 2025.110)Apr 22, 2025 - Apr 30, 2025 (2025.112 - 2025.120)']
Fetching status for Proposal ID: 6716
Headers: ['Observation', 'Visit', 'Status', 'Targets', 'Template', 'Hours', 'Plan Windows']
Number of rows found: 2
Row data: ['3', '1', 'Implementation', 'SN2024ggi', 'NIRSpec Fixed Slit Spectroscopy', '1.32', 'Dec 21, 2024 - Feb 4, 2025 (2024.356 - 2025.035)']
Row data: ['4', '1', 'Implementation', 'SN2024ggi', 'NIRSpec Fixed Slit Spectroscopy', '1.50', 'Apr 27, 2025 - May 15, 2025 (2025.117 - 2025.135)']
Fetching status f