In [1]:
import pandas as pd

In [2]:
def initialize_qobs(
    file_path: str,
    column_index: int,
    station_id: int
) -> pd.DataFrame:
    """
    Read a CSV file, process it based on a column index,
    and save the processed data to a pandas DataFrame.

    Parameters:
    - file_path (str): The path to the CSV file.
    - column_index (int, optional): The index number of the column to include in the processed DataFrame.
    - station_id (int): The station ID to be used as the name of the first column.

    Returns:
    - df (pandas.DataFrame): The processed DataFrame containing the specified data from the CSV file.
    """

    # Check if the file_path is valid
    try:
        open(file_path)
    except FileNotFoundError:
        raise FileNotFoundError(f"File '{file_path}' not found.")

    # Read the CSV file
    try:
        df = pd.read_csv(file_path)
    except Exception as e:
        raise Exception(f"Error reading CSV file: {str(e)}")
        
    # Convert the date column to datetime if not already
    if not isinstance(df.iloc[:, 0], pd.DatetimeIndex):
        df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])
        

    # Process the DataFrame based on column index
    if column_index is not None:
        df = df.iloc[:, [0, column_index]]
        
    # Convert flows from cfs to cms
 #   df.iloc[:, 1] = df.iloc[:, 1] * 0.0283168    # Keep as cfs for PRMS
    
    # Replace 0 values with '-9999'
   # df.iloc[:, 1].replace(0, '-9999', inplace=True)
    
    # Set the first column as the index (column with index 0)
    df.set_index(df.columns[0], inplace=True)
    
    # Name the index and the first column
    df = df.rename_axis('date').rename(columns={df.columns[0]: f'{station_id}'})
    
    # Check and update the index range
    expected_index = pd.date_range('1980-01-01', '2015-12-31')
    if not df.index.equals(expected_index):
        df = df.reindex(expected_index, fill_value=-9999)

    return df


In [3]:
def addto_qobs(
    flow_file_path: str,
    column_index: int,
    station_id: int,
    df: pd.DataFrame
) -> pd.DataFrame:
    """
    Read flow data from a CSV file and add it as a new column to the input DataFrame.

    Parameters:
    - df (pandas.DataFrame): The input DataFrame.
    - flow_file_path (str): The path to the CSV file containing flow data.
    - flow_column_index (int): The index number of the flow data column in the flow data CSV file.
    - new_column_id (str): The ID to be used as the name of the new flow data column.

    Returns:
    - df (pandas.DataFrame): The input DataFrame with an additional flow data column.
    """

    # Check if the flow_file_path is valid
    try:
        open(flow_file_path)
    except FileNotFoundError:
        raise FileNotFoundError(f"File '{flow_file_path}' not found.")

    # Read the flow data CSV file
    try:
        flow_df = pd.read_csv(flow_file_path)
    except Exception as e:
        raise Exception(f"Error reading flow data CSV file: {str(e)}")
        
    # Convert the date column to datetime if not already
    if not isinstance(flow_df.iloc[:, 0], pd.DatetimeIndex):
        flow_df.iloc[:, 0] = pd.to_datetime(flow_df.iloc[:, 0])

    # Process the DataFrame based on column index
    if column_index is not None:
        flow_df = flow_df.iloc[:, [0, column_index]]
        
    # Convert flows from cfs to cms
 #   flow_df.iloc[:, 1] = flow_df.iloc[:, 1] * 0.0283168    # Keep as cms
    
    # Replace 0 values with '-9999'
  #  flow_df.iloc[:, 1].replace(0, '-9999', inplace=True)
    
    # Set the first column as the index (column with index 0)
    flow_df.set_index(flow_df.columns[0], inplace=True)

    
        # Rename the flow data column based on the station ID
    flow_df.rename(columns={flow_df.columns[0]: f'{station_id}'}, inplace=True)
    
    # Merge flow_df with df based on matching index (dates)
    df = df.merge(flow_df, left_index=True, right_index=True, how='left')
    
        # Check and update the index range
    expected_index = pd.date_range('1980-01-01', '2015-12-31')
    if not df.index.equals(expected_index):
        df = df.reindex(expected_index, fill_value=-9999)
    
    return df


In [4]:
data_file_path= '/home/paulc600/scratch/calprms/obsin/data/sf_data'

In [5]:
qobs = initialize_qobs('/home/paulc600/local/Natural_flows/nat_gauges/SWCSB.csv', 4, 24)

In [6]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/SMRBB.csv', 4, 13, qobs)

In [7]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/SMRIB.csv', 4, 10, qobs)

In [8]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/MRWIB.csv', 4, 195, qobs)

In [9]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/NFKMR.csv', 4, 429, qobs)

In [10]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/MREIB.csv', 4, 95, qobs)

In [11]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/BSCMO.csv', 4, 133, qobs)

In [12]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/CLCMO.csv', 4, 212, qobs)

In [13]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/LDCIB.csv', 4, 143, qobs)

In [14]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/BTCIB.csv', 4, 150, qobs)

In [15]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/PPCMO.csv', 4, 205, qobs)

In [16]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/FRRIB.csv', 4, 77, qobs)

In [17]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/BCBMO.csv', 4, 115, qobs)

In [18]:
qobs= addto_qobs('/home/paulc600/local/Natural_flows/nat_gauges/RKCMO.csv', 4, 79, qobs)

In [19]:
qobs

Unnamed: 0,24,13,10,195,429,95,133,212,143,150,205,77,115,79
1980-01-01,37.743924,115.593924,130.593924,35.0,0.0,35.0,0.00,0.00,0,0.000000,0.00,139.785333,5.7,1.34
1980-01-02,36.028291,113.878291,128.878291,35.0,0.0,35.0,0.00,0.00,0,0.000000,0.00,131.562667,5.4,1.25
1980-01-03,36.028291,111.878291,130.878291,35.0,0.0,35.0,0.00,0.00,0,0.000000,0.00,123.340000,5.0,1.15
1980-01-04,34.312658,109.162659,116.162659,32.0,0.0,32.0,0.00,0.00,0,0.000000,0.00,98.672000,4.7,0.86
1980-01-05,34.312658,104.162659,111.162659,28.0,0.0,28.0,0.00,0.00,0,0.000000,0.00,65.781333,4.4,0.47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-12-27,47.219559,289.219559,337.219559,28.5,0.0,28.5,2.27,1.38,0.204968035,3.446500,3.19,6.652540,2.2,1.46
2015-12-28,44.949388,274.949388,324.949388,29.1,0.0,29.1,2.57,1.49,0.230589039,3.877312,3.59,8.870053,2.9,1.96
2015-12-29,42.981906,264.981906,302.981906,29.1,0.0,29.1,2.57,1.49,0.230589039,3.877312,3.59,8.870053,2.9,1.96
2015-12-30,43.435941,252.435940,293.435941,28.4,0.0,28.4,2.57,1.49,0.230589039,3.877312,3.59,8.870053,2.9,1.96


In [20]:
# Convert the index to datetime
qobs.index = pd.to_datetime(qobs.index)

In [21]:
# Write sf_data to a text file
with open(data_file_path, 'w') as file:
    # write header
    custom_header = """Created by paul NO INFILL
/////////////////////////////////////////////////////////////////////////
// Station IDs for runoff:
// ID
// Swiftcurrent Creek at Sherburne Reservoir
// St. Mary River near Babb, MT
// St. Mary River at International Boundary
// Milk River at Western Crossing of International Boundary
// North Fork Milk River above St Mary Canal near Browning
// Milk River at Eastern Crossing
// Big Sandy Creek at Mouth
// Clear Creek at Mouth
// Lodge Creek at International Boundary
// Battle Creek at International Boundary
// Peoples Creek at Mouth
// Frenchman River at International Boundary
// Beaver Creek Bowdoin
// Rock Creek at Mouth
/////////////////////////////////////////////////////////////////////////
// Unit: runoff = cfs
/////////////////////////////////////////////////////////////////////////
"""
    file.write(custom_header)
    # Number of gauges
    file.write('runoff 14\n')
    file.write('################################################\n')
 

    # Write data
    for date_str, row in zip(qobs.index, qobs.itertuples(index=False)):
            
        # Extract year, month, and day from the Timestamp object
        year = date_str.year
        month = date_str.month
        day = date_str.day
        
        # Create a list with year, month, day, and three zeros, followed by data
        output_row = [year, month, day, '0', '0', '0'] + [str(val) for val in row]
        
        # Write the row to the file
        file.write(' '.join(map(str, output_row)) + '\n')