In [24]:
import numpy as np
import pandas as pd
import os

### Libraries

In [25]:
def extract_FuelMoist(df):
    # fuel_moisture = df.iloc[:, -10:]
    # df_time = df[["ObsDate", "ObsTime"]]
    # fuel_M = pd.concat([df_time, fuel_moisture], axis = 1)
    df_reordered = df[['ObsDate', 'ObsTime', '1 Hour DFM', '10 Hour DFM', '100 Hour DFM', '1000 Hour DFM', 'GreenHerb', 'GreenShrub', 'Fuel Temperature']] #'SR_SOW', 'SR_WetFlag', 'SnowFlag']]
    return df_reordered

def find_first_valid_line(file_path, start_line=34):
    try:
        with open(file_path, 'r') as file:
            # Skip the first 'start_line' lines
            for line_num, line in enumerate(file, start=1):
                if line_num < start_line:
                    continue

                # Check if the line does not contain "NA"
                if "NA" not in line:
                    return line_num

        # If no valid line was found
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None
    
def Extract(file_path, return_val, year):
    #return_value is ERC, BI, SC or IC
    rows_to_skip = find_first_valid_line(file_path, start_line=34)
    df = pd.read_csv(
        file_path,
        delimiter='\s+',  # Assumes the columns are separated by whitespace
        skiprows=rows_to_skip-1,  # Skips the first 33 rows (0-indexed, so it will start reading data from line 34)
        nrows = 398 - rows_to_skip
    )



    # df.columns = new_column_names = [
    #     "Period Begins", "No. Years", "Mean", "Std Dev", "Critical Pcntile",
    #     "Highest Avg. Year", "Lowest Avg. Year", "| High, Year", "High Avg.", "High Std Dev", 
    #     "High Median", "Low, Year", "Low Avg.", "Low Std Dev", "Low Median", "Period Begins"
    # ]
    df.loc[-1] = df.columns 
    df.index = df.index + 1  # Shift the index by 1 to make space for the new row
    df = df.sort_index() 
    names = [
        "Period Begins", "No. Years", "Mean", "Std Dev", "Critical Pcntile",
        "Highest Avg." , "Highest Year", "Lowest Avg.", "Lowest Year", "| High", "Year", "High Avg.", "High Std Dev", 
        "High Median", "Low", "Year", "Low Avg.", "Low Std Dev", "Low Median", "Period Begins"
    ]
    len(df.columns), len(names)
    df.columns = names

    df = df[["Period Begins", "Mean"]]
    df.columns = ["Date", "D", return_val]
    df = df.drop("D", axis = 1)
    df['Date'] = df['Date'].apply(lambda x: x + "/" + year)
    return df


def Concat_One_Year(file_list, year):
    vars_list= ['ERC', 'BI', 'IC', 'SC']
    df_ls = []
    for i in range(4):
        file_path = file_list[i]
        val = vars_list[i]
        df = Extract(file_path, val, year)
        df_ls.append(df)
    df = pd.concat(df_ls, axis=1)
    return df.T.drop_duplicates().T

def help(year, file_paths):
    # Filter files containing '2022'
        files_2022 = [file for file in file_paths if year in file]

        # Sort the files based on the category order: ERC, BI, IC, SC
        category_order = ['ERC', 'BI', 'IC', 'SC']
        sorted_files_2022 = sorted(files_2022, key=lambda x: next(i for i, category in enumerate(category_order) if category in x))

        # Print the sorted list
        return sorted_files_2022
def output_pipe(years_ls, file_paths):
    df_list = []
    for year in years_ls:
        list_of_year_files = help(year, file_paths)
        df = Concat_One_Year(list_of_year_files, year)
        df_list.append(df)
    return pd.concat(df_list,axis = 0).dropna()





def df_to_csv(df, moisture_bool=False):
    cwd = os.getcwd()
    output_folder = os.path.join(cwd, "output")

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"Created folder: {output_folder}")

    if moisture_bool:
        csv_path = os.path.join(output_folder, "fuel_moisture.csv")
    else:
        csv_path = os.path.join(output_folder, "output.csv")


    df.to_csv(csv_path, index=False)
    print(f"DataFrame saved as CSV at: {csv_path}")
    

def main(RAWS_readings, output_file_list, years_ls, moisture_data = False ):
    x_train = pd.read_csv(RAWS_readings)
    columns = ["StationID", "ObsDate", "ObsTime","Type", "SOW", "Temp(F)",
    "RH",
    "24hr Precip",
    "Duration",
    "Wind Speed",
    "Direction",
    "Azimuth",
    "Max Temp",
    "Min Temp",
    "Max RH",
    "Min RH",
    "WetFlag",
    "SolarRad",
    "Gust Dir",
    "Gust Speed",
    "Hourly Precip",
    "OMC 10",
    "Season",
    "GreenHerb",
    "GreenShrub",
    "SR_SOW",
    "SR_WetFlag",
    "SnowFlag",
    "1 Hour DFM",
    "10 Hour DFM",
    "100 Hour DFM",
    "1000 Hour DFM",
    "Fuel Temperature"]
    x_train.columns = columns
    NFDRS_stats = output_pipe(years_ls, output_file_list)
    df_to_csv(NFDRS_stats)
    if moisture_data == True:
        fuel = extract_FuelMoist(x_train)
        df_to_csv(fuel, moisture_data)
        return NFDRS_stats, fuel
    return NFDRS_stats


    

        
    
    


  delimiter='\s+',  # Assumes the columns are separated by whitespace


In [9]:
# import tkinter as tk
# from tkinter import filedialog

# def upload_file():
#     # Open a file dialog to select a file
#     file_path = filedialog.askopenfilename(title="Select a file",
#                                            filetypes=(("Text files", "*.txt"),
#                                                       ("All files", "*.*")))
#     if file_path:
#         print(f"Selected File: {file_path}")
#         # You can now open and process the file as needed
#         with open(file_path, 'r') as file:
#             content = file.read()
#             print(content)

# # Create the main application window
# root = tk.Tk()
# root.title("File Uploader")

# # Create and configure the "Upload File" button
# upload_button = tk.Button(root, text="Upload File", command=upload_file)
# upload_button.pack(pady=20)

# # Run the application
# root.mainloop()

### Reading Input (Working Area)

In [34]:
x_train = pd.read_csv('/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/X_train.csv')
x_train.head(5)

  x_train = pd.read_csv('/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/X_train.csv')


Unnamed: 0,042009,09/23/2022,00:00,R,Unnamed: 5,63,71,0.00,0,4,...,349,0.00.1,.1,.2,0.1,16.38,19.68,20.00,20.00.1,16.38.1
0,42009,09/23/2022,01:00,R,,61,76,0.0,0,0,...,5,0.0,,,0,16.54,19.6,20.0,20.0,16.54
1,42009,09/23/2022,02:00,R,,60,80,0.0,0,0,...,3,0.0,,,0,17.68,19.56,20.0,20.0,17.68
2,42009,09/23/2022,03:00,R,,61,76,0.0,0,2,...,21,0.0,,,0,17.97,20.13,20.0,20.0,17.97
3,42009,09/23/2022,04:00,R,,60,78,0.0,0,1,...,36,0.0,,,0,17.78,20.67,20.01,20.0,17.78
4,42009,09/23/2022,05:00,R,,58,84,0.0,0,1,...,179,0.0,,,0,18.78,21.13,20.05,20.0,18.78


In [35]:
columns = [
    "StationID",
    "ObsDate",
    "ObsTime",
    "Type",
    "SOW",
    "Temp(F)",
    "RH",
    "24hr Precip",
    "Duration",
    "Wind Speed",
    "Direction",
    "Azimuth",
    "Max Temp",
    "Min Temp",
    "Max RH",
    "Min RH",
    "WetFlag",
    "SolarRad",
    "Gust Dir",
    "Gust Speed",
    "Hourly Precip",
    "OMC 10",
    "Season",
    "GreenHerb",
    "GreenShrub",
    "SR_SOW",
    "SR_WetFlag",
    "SnowFlag",
    "1 Hour DFM",
    "10 Hour DFM",
    "100 Hour DFM",
    "1000 Hour DFM",
    "Fuel Temperature"
]
x_train.columns = columns

In [36]:
# #StationID: Identifier for the weather station that is recording the data.
# ObsDate: The observation date and time in MM/DD/YY HH:mm format, representing the hour for which this data is relevant.
# Type: Data type (e.g., "R" could indicate that this is an "R Table" entry, which may represent hourly recorded data or fire danger outputs).
# SOW (State of Weather): Indicates the observed weather conditions at that hour, often using codes for cloud cover, precipitation, or other atmospheric states.
# Temp(F): Temperature in degrees Fahrenheit.
# RH (Relative Humidity): Percentage of air moisture, representing how much moisture is in the air compared to the maximum amount it could hold.
# 24hr Precip: Total precipitation over the last 24 hours, measured in inches.
# Duration: Duration of precipitation events during the past hour, usually in minutes.
# Wind Speed: Wind speed measured in miles per hour (mph) during the hour.
# Direction: Wind direction in degrees (0 to 360), where 0 = North, 90 = East, 180 = South, 270 = West.
# Azimuth: The angle or direction in which the wind or sun is coming from, measured in degrees from a fixed point (often related to compass direction).
# Max Temp: Maximum temperature recorded for that hour, in degrees Fahrenheit.
# Min Temp: Minimum temperature recorded for that hour, in degrees Fahrenheit.
# Max RH: Maximum relative humidity (%) recorded during that hour.
# Min RH: Minimum relative humidity (%) recorded during that hour.
# WetFlag: Indicator for whether wet conditions were observed (1 for wet, 0 for dry).
# SolarRad: Solar radiation measured in watts per square meter (W/m²), which can indicate cloud cover or sunlight intensity.
# Gust Dir: Direction of the wind gusts during that hour, measured in degrees (similar to Direction).
# Gust Speed: Maximum wind gust speed during that hour, measured in miles per hour (mph).
# Precipitation and Moisture Data:
# Hourly Precip: Amount of precipitation in inches during the specific hour.
# OMC 10: Observed moisture content for 10-hour fuels (small dead fuels like twigs), which dry out and respond quickly to weather changes. Measured as a percentage of weight.
# Season: Indicates the season in which the data is recorded (e.g., summer, winter). This may influence fire behavior and fuel moisture content.
# GreenHerb: Moisture content in live herbaceous (non-woody) plants.
# GreenShrub: Moisture content in live woody shrubs or similar vegetation.
# SR_SOW: Possibly a calculated state of weather value based on observed data, used in fire danger modeling.
# SR_WetFlag: Similar to WetFlag, this likely indicates whether a specific weather condition triggered a "wet" event in the model or system.
# SnowFlag: Indicator for snow presence (1 if snow was observed, 0 if no snow).
# Fire Danger and Fuel Moisture Data:
# 1 Hour DFM: Dead fuel moisture for 1-hour fuels, which are small dead fuels (like grass) that dry out quickly and respond to weather changes within one hour.
# 10 Hour DFM: Dead fuel moisture for 10-hour fuels, which are slightly larger fuels (like twigs) that respond to changes in humidity or weather within a 10-hour period.
# 100 Hour DFM: Dead fuel moisture for 100-hour fuels, larger dead fuels (like small branches), which take up to 100 hours to adjust to changes in weather.
# 1000 Hour DFM: Dead fuel moisture for 1000-hour fuels, which are large dead fuels (like logs) that dry out very slowly and respond to weather over long periods.
# Fuel Temperature: Temperature of the fuels (e.g., grasses, shrubs, dead wood), which can impact how easily they will ignite and burn. Measured in degrees Fahrenheit.

In [37]:
# fuel_moisture = df.iloc[:, -10:]
# df_time = df[["ObsDate", "ObsTime"]]
# fuel_M = df.concat([df_time, fuel_moisture], axis = 1)
# fuel_M.show()

In [38]:
x_train

Unnamed: 0,StationID,ObsDate,ObsTime,Type,SOW,Temp(F),RH,24hr Precip,Duration,Wind Speed,...,GreenHerb,GreenShrub,SR_SOW,SR_WetFlag,SnowFlag,1 Hour DFM,10 Hour DFM,100 Hour DFM,1000 Hour DFM,Fuel Temperature
0,42009,09/23/2022,01:00,R,,61,76,0.0,0,0,...,5,0.0,,,0,16.54,19.60,20.00,20.00,16.54
1,42009,09/23/2022,02:00,R,,60,80,0.0,0,0,...,3,0.0,,,0,17.68,19.56,20.00,20.00,17.68
2,42009,09/23/2022,03:00,R,,61,76,0.0,0,2,...,21,0.0,,,0,17.97,20.13,20.00,20.00,17.97
3,42009,09/23/2022,04:00,R,,60,78,0.0,0,1,...,36,0.0,,,0,17.78,20.67,20.01,20.00,17.78
4,42009,09/23/2022,05:00,R,,58,84,0.0,0,1,...,179,0.0,,,0,18.78,21.13,20.05,20.00,18.78
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17232,42009,09/23/2024,13:00,R,1,88,27,0.0,0,5,...,225,0.0,1,0,0,8.38,15.42,17.74,19.15,8.38
17233,42009,09/23/2024,14:00,R,1,93,24,0.0,0,6,...,244,0.0,,,0,7.06,13.68,18.04,19.13,7.06
17234,42009,09/23/2024,15:00,R,0,95,20,0.0,0,8,...,221,0.0,,,0,6.05,12.25,17.54,19.12,6.05
17235,42009,09/23/2024,16:00,R,0,95,21,0.0,0,11,...,234,0.0,,,0,5.37,11.13,17.02,19.06,5.37


In [54]:
def extract_FuelMoist(df):
    # fuel_moisture = df.iloc[:, -10:]
    # df_time = df[["ObsDate", "ObsTime"]]
    # fuel_M = pd.concat([df_time, fuel_moisture], axis = 1)
    df_reordered = df[['ObsDate', 'ObsTime', '1 Hour DFM', '10 Hour DFM', '100 Hour DFM', '1000 Hour DFM', 'GreenHerb', 'GreenShrub', 'Fuel Temperature', 'SR_SOW', 'SR_WetFlag', 'SnowFlag']]
    return df_reordered


extract_FuelMoist(df)

NameError: name 'df' is not defined

### Output File Pipeline (Working Area)

In [9]:
# def Extract(file_path, return_val, year):
#     #return_value is ERC, BI, SC or IC
#     df = pd.read_csv(
#         file_path,
#         delimiter='\s+',  # Assumes the columns are separated by whitespace
#         skiprows=33,  # Skips the first 33 rows (0-indexed, so it will start reading data from line 34)
#         nrows = 364
#     )



#     # df.columns = new_column_names = [
#     #     "Period Begins", "No. Years", "Mean", "Std Dev", "Critical Pcntile",
#     #     "Highest Avg. Year", "Lowest Avg. Year", "| High, Year", "High Avg.", "High Std Dev", 
#     #     "High Median", "Low, Year", "Low Avg.", "Low Std Dev", "Low Median", "Period Begins"
#     # ]
#     df.loc[-1] = df.columns 
#     df.index = df.index + 1  # Shift the index by 1 to make space for the new row
#     df = df.sort_index() 
#     names = [
#         "Period Begins", "No. Years", "Mean", "Std Dev", "Critical Pcntile",
#         "Highest Avg." , "Highest Year", "Lowest Avg.", "Lowest Year", "| High", "Year", "High Avg.", "High Std Dev", 
#         "High Median", "Low", "Year", "Low Avg.", "Low Std Dev", "Low Median", "Period Begins"
#     ]
#     len(df.columns), len(names)
#     df.columns = names

#     df = df[["Period Begins", "Mean"]]
#     df.columns = ["Date", "D", return_val]
#     df = df.drop("D", axis = 1)
#     df['Date'] = df['Date'].apply(lambda x: x + "/" + year)
#     return df
# Extract("/Users/lukacheney/Documents/FireFamilyPlus/output/BI_SantaRosa2023.txt", "BI", "2023")

In [12]:
def find_first_valid_line(file_path, start_line=34):
    try:
        with open(file_path, 'r') as file:
            # Skip the first 'start_line' lines
            for line_num, line in enumerate(file, start=1):
                if line_num < start_line:
                    continue

                # Check if the line does not contain "NA"
                if "NA" not in line:
                    return line_num

        # If no valid line was found
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None
    
def Extract(file_path, return_val, year):
    #return_value is ERC, BI, SC or IC
    rows_to_skip = find_first_valid_line(file_path, start_line=34)
    df = pd.read_csv(
        file_path,
        delimiter='\s+',  # Assumes the columns are separated by whitespace
        skiprows=rows_to_skip-1,  # Skips the first 33 rows (0-indexed, so it will start reading data from line 34)
        nrows = 398 - rows_to_skip
    )



    # df.columns = new_column_names = [
    #     "Period Begins", "No. Years", "Mean", "Std Dev", "Critical Pcntile",
    #     "Highest Avg. Year", "Lowest Avg. Year", "| High, Year", "High Avg.", "High Std Dev", 
    #     "High Median", "Low, Year", "Low Avg.", "Low Std Dev", "Low Median", "Period Begins"
    # ]
    df.loc[-1] = df.columns 
    df.index = df.index + 1  # Shift the index by 1 to make space for the new row
    df = df.sort_index() 
    names = [
        "Period Begins", "No. Years", "Mean", "Std Dev", "Critical Pcntile",
        "Highest Avg." , "Highest Year", "Lowest Avg.", "Lowest Year", "| High", "Year", "High Avg.", "High Std Dev", 
        "High Median", "Low", "Year", "Low Avg.", "Low Std Dev", "Low Median", "Period Begins"
    ]
    len(df.columns), len(names)
    df.columns = names

    df = df[["Period Begins", "Mean"]]
    df.columns = ["Date", "D", return_val]
    df = df.drop("D", axis = 1)
    df['Date'] = df['Date'].apply(lambda x: x + "/" + year)
    return df


  delimiter='\s+',  # Assumes the columns are separated by whitespace


In [14]:
Extract("/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/BI_SantaRosa2022.txt", "BI", "2022")

Unnamed: 0,Date,BI
0,09/23/2022,140.0
1,09/24/2022,130.7
2,09/25/2022,118.6
3,09/26/2022,91.4
4,09/27/2022,78.0
...,...,...
95,12/27/2022,0.0
96,12/28/2022,48.9
97,12/29/2022,0.0
98,12/30/2022,0.0


In [44]:
ls = ["ERC", "BI", "SC", "IC"]
file_ls = [
    "/Users/lukacheney/Documents/FireFamilyPlus/output/ERC_SantaRosa2023.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/BI_SantaRosa2023.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/SC_SantaRosa2023.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/IC_SantaRosa2023.txt"]


def Concat_One_Year(file_list, year):
    vars_list= ['ERC', 'BI', 'IC', 'SC']
    df_ls = []
    for i in range(4):
        file_path = file_list[i]
        val = vars_list[i]
        df = Extract(file_path, val, year)
        df_ls.append(df)
    df = pd.concat(df_ls, axis=1)
    return df.T.drop_duplicates().T


Concat_One_Year(file_ls, "2023")




Unnamed: 0,Date,ERC,BI,IC,SC
0,01/01/2023,42.3,127.4,81.2,10.1
1,01/02/2023,0.0,0.0,0.0,0.0
2,01/03/2023,2.2,10.4,6.7,0.0
3,01/04/2023,0.0,0.0,0.0,0.0
4,01/05/2023,0.0,0.0,0.0,0.0
...,...,...,...,...,...
360,12/27/2023,0.0,0.0,0.0,0.0
361,12/28/2023,0.0,0.0,0.0,0.0
362,12/29/2023,0.0,0.0,0.0,0.0
363,12/30/2023,3.4,14.3,8.7,0.1


In [35]:
def help(year, file_paths):
    # Filter files containing '2022'
        files_2022 = [file for file in file_paths if year in file]

        # Sort the files based on the category order: ERC, BI, IC, SC
        category_order = ['ERC', 'BI', 'IC', 'SC']
        sorted_files_2022 = sorted(files_2022, key=lambda x: next(i for i, category in enumerate(category_order) if category in x))

        # Print the sorted list
        return sorted_files_2022
def output_pipe(years_ls, file_paths):
    df_list = []
    for year in years_ls:
        list_of_year_files = help(year, file_paths)
        df = Concat_One_Year(list_of_year_files, year)
        df_list.append(df)
    return pd.concat(df_list,axis = 0).dropna()
        
    
    

In [36]:
years = ["2022","2023","2024"]
file_paths = [
    "/Users/lukacheney/Documents/FireFamilyPlus/output/BI_SantaRosa2022.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/BI_SantaRosa2023.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/BI_SantaRosa2024.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/ERC_SantaRosa2022.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/ERC_SantaRosa2023.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/ERC_SantaRosa2024.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/IC_SantaRosa2022.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/IC_SantaRosa2023.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/IC_SantaRosa2024.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/SC_SantaRosa2022.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/SC_SantaRosa2023.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/output/SC_SantaRosa2024.txt"
]
NFDRS_stats = output_pipe(years, file_paths)
NFDRS_stats

Unnamed: 0,Date,ERC,BI,IC,SC
0,09/23/2022,66.1,140.0,26.3,63.8
1,09/24/2022,73.0,130.7,27.5,49.7
2,09/25/2022,45.1,118.6,9.9,65.2
3,09/26/2022,35.1,91.4,4.9,47.6
4,09/27/2022,33.8,78.0,4.1,34.9
...,...,...,...,...,...
261,09/19/2024,41.1,115.5,6.2,67.5
262,09/20/2024,28.6,77.4,2.7,40.7
263,09/21/2024,34.5,85.8,3.8,42.2
264,09/22/2024,58.1,98.3,6.4,33.7


In [17]:
pd.merge(NFDRS_stats, x_train, left_on='Date', right_on='ObsDate', how='inner')

Unnamed: 0,Date,ERC,BI,IC,SC,StationID,ObsDate,ObsTime,Type,SOW,...,GreenHerb,GreenShrub,SR_SOW,SR_WetFlag,SnowFlag,1 Hour DFM,10 Hour DFM,100 Hour DFM,1000 Hour DFM,Fuel Temperature
0,09/23/2022,66.1,140.0,26.3,63.8,42009,09/23/2022,01:00,R,,...,5,0.0,,,0,16.54,19.60,20.00,20.00,16.54
1,09/23/2022,66.1,140.0,26.3,63.8,42009,09/23/2022,02:00,R,,...,3,0.0,,,0,17.68,19.56,20.00,20.00,17.68
2,09/23/2022,66.1,140.0,26.3,63.8,42009,09/23/2022,03:00,R,,...,21,0.0,,,0,17.97,20.13,20.00,20.00,17.97
3,09/23/2022,66.1,140.0,26.3,63.8,42009,09/23/2022,04:00,R,,...,36,0.0,,,0,17.78,20.67,20.01,20.00,17.78
4,09/23/2022,66.1,140.0,26.3,63.8,42009,09/23/2022,05:00,R,,...,179,0.0,,,0,18.78,21.13,20.05,20.00,18.78
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17208,09/23/2024,72.9,136.0,23.2,54.4,42009,09/23/2024,13:00,R,1,...,225,0.0,1,0,0,8.38,15.42,17.74,19.15,8.38
17209,09/23/2024,72.9,136.0,23.2,54.4,42009,09/23/2024,14:00,R,1,...,244,0.0,,,0,7.06,13.68,18.04,19.13,7.06
17210,09/23/2024,72.9,136.0,23.2,54.4,42009,09/23/2024,15:00,R,0,...,221,0.0,,,0,6.05,12.25,17.54,19.12,6.05
17211,09/23/2024,72.9,136.0,23.2,54.4,42009,09/23/2024,16:00,R,0,...,234,0.0,,,0,5.37,11.13,17.02,19.06,5.37


### Main Function

#### Input RAW File Path as a string
#### Input the NFDRS Stats (ERC, BI, IC, SC) paths as a list of strings
#### Input the list of years as strings as well (for example ["2020","2021"])
#### If you want to get Fuel Moisture data back as well, you should set moisture_data to True

In [26]:
RAWS_file_path = "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/X_train.csv"
NFDRS_Stat_path_ls = [
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/BI_SantaRosa2022.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/BI_SantaRosa2023.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/BI_SantaRosa2024.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/ERC_SantaRosa2022.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/ERC_SantaRosa2023.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/ERC_SantaRosa2024.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/IC_SantaRosa2022.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/IC_SantaRosa2023.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/IC_SantaRosa2024.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/SC_SantaRosa2022.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/SC_SantaRosa2023.txt",
    "/Users/lukacheney/Documents/FireFamilyPlus/NFDRS_Stats/SC_SantaRosa2024.txt"
]

years_list = ["2022","2023", "2024"]
moisture_data = False

In [27]:
NFDRS_stats = main(RAWS_file_path, NFDRS_Stat_path_ls, ["2022","2023", "2024"], moisture_data = moisture_data)

  x_train = pd.read_csv(RAWS_readings)


DataFrame saved as CSV at: /Users/lukacheney/Documents/FireFamilyPlus/output/output.csv


In [28]:
NFDRS_stats

Unnamed: 0,Date,ERC,BI,IC,SC
0,09/23/2022,66.1,140.0,26.3,63.8
1,09/24/2022,73.0,130.7,27.5,49.7
2,09/25/2022,45.1,118.6,9.9,65.2
3,09/26/2022,35.1,91.4,4.9,47.6
4,09/27/2022,33.8,78.0,4.1,34.9
...,...,...,...,...,...
261,09/19/2024,41.1,115.5,6.2,67.5
262,09/20/2024,28.6,77.4,2.7,40.7
263,09/21/2024,34.5,85.8,3.8,42.2
264,09/22/2024,58.1,98.3,6.4,33.7


In [29]:
NFDRS_stats, fuel_moisture = main(RAWS_file_path, NFDRS_Stat_path_ls, ["2022","2023", "2024"], moisture_data = True)

  x_train = pd.read_csv(RAWS_readings)


DataFrame saved as CSV at: /Users/lukacheney/Documents/FireFamilyPlus/output/output.csv
DataFrame saved as CSV at: /Users/lukacheney/Documents/FireFamilyPlus/output/fuel_moisture.csv


In [30]:
fuel_moisture

Unnamed: 0,ObsDate,ObsTime,1 Hour DFM,10 Hour DFM,100 Hour DFM,1000 Hour DFM,GreenHerb,GreenShrub,Fuel Temperature
0,09/23/2022,01:00,16.54,19.60,20.00,20.00,5,0.0,16.54
1,09/23/2022,02:00,17.68,19.56,20.00,20.00,3,0.0,17.68
2,09/23/2022,03:00,17.97,20.13,20.00,20.00,21,0.0,17.97
3,09/23/2022,04:00,17.78,20.67,20.01,20.00,36,0.0,17.78
4,09/23/2022,05:00,18.78,21.13,20.05,20.00,179,0.0,18.78
...,...,...,...,...,...,...,...,...,...
17232,09/23/2024,13:00,8.38,15.42,17.74,19.15,225,0.0,8.38
17233,09/23/2024,14:00,7.06,13.68,18.04,19.13,244,0.0,7.06
17234,09/23/2024,15:00,6.05,12.25,17.54,19.12,221,0.0,6.05
17235,09/23/2024,16:00,5.37,11.13,17.02,19.06,234,0.0,5.37


Do readings for two stations
Make visualizations on year basis for both
Make presentation for Dr Shiyh