# R2R Data Pull
Pulling data from the rvdata.us API. https://www.rvdata.us/about/technical-details/services/api
Code prototyped with Google Gemini.

In [3]:
import pandas as pd
import requests # This library is essential for making HTTP requests
from datetime import datetime as dt # Still useful if you need to manually construct datetimes

def get_cruise_data(cruise_id):
    """
    Fetches cruise data from the rvdata.us API and parses it into a pandas DataFrame.

    Args:
        cruise_id (str): The ID of the cruise to fetch (e.g., "RR2402").

    Returns:
        pandas.DataFrame: A DataFrame containing the cruise data, or an empty
                          DataFrame if data fetching fails or is empty.
    """
    # url = f"https://service.rvdata.us/api/cruise/cruise_id/{cruise_id}"
    url = "https://service.rvdata.us/api/cruise/vessel/Palmer"

    try:
        response = requests.get(url)
        response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
        data = response.json() # Parse the JSON response into a Python dictionary

        # Check if the status is OK and data exists
        if data.get("status") == 200 and data.get("data"):
            # The actual records are in the 'data' key, which is a list of dictionaries
            df = pd.json_normalize(data['data'])

            # --- Post-processing (as in the previous example) ---
            # Parse the 'keyword' column into a list
            if 'keyword' in df.columns:
                df['keyword_list'] = df['keyword'].apply(
                    lambda x: [item.strip() for item in x.split(',') if item.strip()] if pd.notna(x) else []
                )
                df = df.drop(columns=['keyword'])

            # Convert date columns to datetime objects
            date_columns = ['depart_date', 'arrive_date', 'release_date', 'release_date_sent', 'release_sent']
            for col in date_columns:
                if col in df.columns:
                    df[col] = pd.to_datetime(df[col], errors='coerce') # 'coerce' will turn unparseable dates into NaT

            # Convert specific numeric columns (like lat/lon min/max) that might be strings
            numeric_cols = ['longitude_min', 'longitude_max', 'latitude_min', 'latitude_max']
            for col in numeric_cols:
                if col in df.columns:
                    df[col] = pd.to_numeric(df[col], errors='coerce')

            return df
        else:
            print(f"API returned status: {data.get('status')}, message: {data.get('status_message', 'No message')}")
            return pd.DataFrame() # Return an empty DataFrame if no valid data
    except requests.exceptions.HTTPError as e:
        print(f"HTTP error occurred: {e}")
        return pd.DataFrame()
    except requests.exceptions.ConnectionError as e:
        print(f"Connection error occurred: {e}")
        return pd.DataFrame()
    except requests.exceptions.Timeout as e:
        print(f"Timeout error occurred: {e}")
        return pd.DataFrame()
    except requests.exceptions.RequestException as e:
        print(f"An unexpected request error occurred: {e}")
        return pd.DataFrame()
    except json.JSONDecodeError as e:
        print(f"Failed to decode JSON response: {e}")
        return pd.DataFrame()
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return pd.DataFrame()

# --- How to use it ---
cruise_id_to_fetch = "RR2402"
df_cruise = get_cruise_data(cruise_id_to_fetch)

if not df_cruise.empty:
    print(df_cruise.info())
    print("\nFirst 5 rows of the DataFrame:")
    print(df_cruise.head())

    print("\nKeywords for the first row:")
    if 'keyword_list' in df_cruise.columns:
        print(df_cruise['keyword_list'].iloc[0])
else:
    print(f"Could not retrieve data for cruise ID: {cruise_id_to_fetch}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 262 entries, 0 to 261
Data columns (total 41 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   cruise_id               262 non-null    object        
 1   cruise_name             259 non-null    object        
 2   cruise_type             262 non-null    object        
 3   cruise_url              43 non-null     object        
 4   cruise_doi              259 non-null    object        
 5   is_registered           259 non-null    object        
 6   vessel_ices_code        262 non-null    object        
 7   vessel_id               262 non-null    object        
 8   vessel_name             262 non-null    object        
 9   vessel_shortname        262 non-null    object        
 10  operator_id             262 non-null    object        
 11  operator_name           262 non-null    object        
 12  scheduler_id            262 non-null    object    

In [None]:
https://service.rvdata.us/data/cruise/RR2402/fileset/615519

In [4]:
df_cruise

Unnamed: 0,cruise_id,cruise_name,cruise_type,cruise_url,cruise_doi,is_registered,vessel_ices_code,vessel_id,vessel_name,vessel_shortname,...,is_ingmrt,is_blanket_release,vessel_acronym,longitude_min,longitude_max,latitude_min,latitude_max,cruise_reports,redirect_to_mgds,keyword_list
0,NBP2304,Transit,transit,,10.7284/910224,True,3206,Nathaniel B. Palmer,Nathaniel B. Palmer,Palmer,...,False,,NBP,,,,,,False,[usap marine managerpalmernbp2304acqsysacquisi...
1,NBP2303,Biodiversity and Systematics,op_science,,10.7284/910223,True,3206,Nathaniel B. Palmer,Nathaniel B. Palmer,Palmer,...,False,,NBP,,,,,,False,"[mahon, andrewpalmernbp2303acqsysacquisition s..."
2,NBP2302,Bart Coring,op_science,,10.7284/909920,True,3206,Nathaniel B. Palmer,Nathaniel B. Palmer,Palmer,...,False,,NBP,165.795051,-143.201263,-77.842218,-55.975644,,False,"[bart, philippalmernbp2302acqsysacquisition sy..."
3,NBP2301,Bart Seismic,op_science,,10.7284/909919,True,3206,Nathaniel B. Palmer,Nathaniel B. Palmer,Palmer,...,False,,NBP,172.955906,-178.345358,-77.473966,-55.095390,,False,"[bart, philippalmernbp2301acqsysacquisition sy..."
4,NBP2205,Sanders/Bernard/Huckstadt,op_science,,10.7284/909918,True,3206,Nathaniel B. Palmer,Nathaniel B. Palmer,Palmer,...,False,,NBP,-72.770944,-61.125255,-69.293656,-58.241163,https://service.rvdata.us/data/cruise/NBP2205/...,False,"[sanders, robertbernard, kimhuckstadt, luispal..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,NBP9206,Tropic Coupling Between Mesopelagic and Sea-Ic...,op_science,,10.7284/905386,True,3206,Nathaniel B. Palmer,Nathaniel B. Palmer,Palmer,...,False,,NBP,-49.823300,-41.546700,-61.550000,-60.216700,,False,[tropic coupling between mesopelagic and sea-i...
258,NBP9205,Transit/Engineering Ice Trials,transit,,10.7284/905385,True,3206,Nathaniel B. Palmer,Nathaniel B. Palmer,Palmer,...,False,,NBP,,,,,,False,[transit/engineering ice trialsnathaniel b. pa...
259,NBP9204,Research on Antarctic Ecological Rates,op_science,,10.7284/905384,True,3206,Nathaniel B. Palmer,Nathaniel B. Palmer,Palmer,...,False,,NBP,-70.906200,-60.471600,-66.969600,-52.356400,,False,[research on antarctic ecological ratesnathani...
260,NBP9202,Weddell-1 US-Russia Drifting Ice Camp: Recover...,transit,,10.7284/905383,True,3206,Nathaniel B. Palmer,Nathaniel B. Palmer,Palmer,...,False,,NBP,-68.916000,-43.620200,-65.888700,-52.395000,,False,[weddell-1 us-russia drifting ice camp: recove...
