get closest weather stations for each fire station

In [None]:
import numpy as np
import pandas as pd

# Load CSV files
fire_stations = pd.read_csv("all fire stations in area_ fire.csv")
weather_stations = pd.read_csv("ghcnd-stations.csv")

def dist(x, y):
    """Calculate Euclidean distance between two coordinate points."""
    return np.sqrt((x[0] - y[0])**2 + (x[1] - y[1])**2)

def find_closest(fire_stations: pd.DataFrame, weather_stations: pd.DataFrame) -> pd.DataFrame:
    """
    Find the closest weather station to each fire station.
    
    Parameters:
        fire_stations (pd.DataFrame): DataFrame with columns ['fire_id', 'fire_name', 'lat', 'lon']
        weather_stations (pd.DataFrame): DataFrame with columns ['weather_id', 'lat', 'lon']
    
    Returns:
        pd.DataFrame: DataFrame with ['fire_id', 'fire_name', 'closest_weather_id', 'distance']
    """
    results = []

    for _, fire in fire_stations.iterrows():
        fire_coords = (fire["lat"], fire["lon"])
        min_dist = float("inf")
        closest_weather = None

        for _, weather in weather_stations.iterrows():
            weather_coords = (weather["lat"], weather["lon"])
            distance = dist(fire_coords, weather_coords)

            if distance < min_dist:
                min_dist = distance
                closest_weather = weather["weather_id"]

        # ✅ Fix: Correct column access using `.loc`
        results.append([fire["fire_id"], fire["fire_name"], closest_weather, min_dist])

    # Convert results to DataFrame
    closest_df = pd.DataFrame(results, columns=["fire_id", "fire_name", "closest_weather_id", "distance"])
    return closest_df

# Run function
closest_df = find_closest(fire_stations, weather_stations)
print(closest_df)



   fire_id                              fire_name closest_weather_id  distance
0      AEU            Amador - El Dorado CAL FIRE        US1CAAM0003  0.044820
1      ANF                Angeles National Forest        USR0000CCHI  0.069613
2      BRR  Bitter Creek National Wildlife Refuge        USC00046754  0.023635
3      BTU                         Butte CAL FIRE        USC00046685  0.000916
4      MCP       Camp Pendleton Marine Corps Base        USW00000369  0.027352
..     ...                                    ...                ...       ...
78     TCU          Tuolumne - Calaveras CAL FIRE        USC00046172  0.083267
79     VLJ                     Vallejo Fire Dept.        USC00045333  0.045435
80     AFV              Vandenberg Air Force Base        USW00093214  0.026880
81     VNC                         Ventura County        US1CAVT0031  0.067191
82     YNP                 Yosemite National Park        USC00049855  0.115259

[83 rows x 4 columns]


In [21]:
fireweather_conv=closest_df

fetch weather data from NOAA ftp with the list of station id

In [17]:
weatherstn_list = closest_df['closest_weather_id']


In [None]:
import os
import requests

# Base URL
base_url = "https://www.ncei.noaa.gov/pub/data/ghcn/daily/by_station/"



# Directory to save downloaded files
download_dir = "weather_ftpfetched"
os.makedirs(download_dir, exist_ok=True)

for station_id in weatherstn_list:
    file_name = f"{station_id}.csv.gz"  # NOAA files are in .csv.gz format
    file_url = base_url + file_name
    local_file_path = os.path.join(download_dir, file_name)

    # Download the file
    response = requests.get(file_url, stream=True)
    
    if response.status_code == 200:
        with open(local_file_path, "wb") as file:
            for chunk in response.iter_content(chunk_size=1024):
                file.write(chunk)
        print(f"Downloaded: {file_name}")
    else:
        print(f"Failed to download: {file_name} (Status Code: {response.status_code})")



convert csv.gz to csv

In [None]:
import os
import gzip
import pandas as pd

# Define the source folder containing .csv.gz files
data_folder = "weather_ftpfetched"

# Define the destination folder for converted .csv files
converted_folder = "weathercsv_converted"

# Create the converted folder if it doesn't exist
os.makedirs(converted_folder, exist_ok=True)

# Get a list of all .csv.gz files in the data folder
all_files = [f for f in os.listdir(data_folder) if f.endswith('.csv.gz')]

# Process each .csv.gz file
for file in all_files:
    input_path = os.path.join(data_folder, file)  # Full path to input file
    output_filename = file.replace(".csv.gz", ".csv")  # Change file extension
    output_path = os.path.join(converted_folder, output_filename)  # Full path to output file

    # Open the .gz file and read it using pandas
    with gzip.open(input_path, 'rt', encoding='utf-8') as f:  # Read in text mode
        try:
            # Read the CSV file, skipping bad lines
            df = pd.read_csv(f, low_memory=False, on_bad_lines='skip', sep=',')

            # Print row count for debugging
            print(f"✅ Read {file} with {len(df)} rows.")

            # Save the converted .csv file
            df.to_csv(output_path, index=False)

            print(f"📁 Saved converted file to: {output_path}")

        except Exception as e:
            print(f"❌ Error processing {file}: {e}")


next step: combine all the csv into 1

In [30]:
import os
import pandas as pd

# Define the folder where your CSV files are stored
data_folder = "weathercsv_converted"

# List all CSV files in the folder
csv_files = [f for f in os.listdir(data_folder) if f.endswith(".csv")]

# Define the expected columns
expected_columns = ['id', 'date', 'obs', 'obs_value']

# Iterate through all CSV files to ensure they have the same structure
for file in csv_files:
    file_path = os.path.join(data_folder, file)
    
    try:
        # Read the current CSV file
        df = pd.read_csv(file_path,low_memory=False)
        
        # Drop columns with 'Unnamed' in the name (extra columns)
        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
        
        # Check if the number of columns matches the expected structure
        if len(df.columns) >= 4:
            # Ensure the first four columns are the expected ones
            df = df.iloc[:, :4]  # Select the first 4 columns
            df.columns = expected_columns  # Rename the columns

            # Save the fixed CSV file
            df.to_csv(file_path, index=False)
            print(f"Fixed and saved {file}")
        else:
            print(f"Skipping {file}: Not enough columns to modify.")
    
    except Exception as e:
        print(f"Error processing {file}: {e}")

# After this, all CSVs in the folder should have the same structure


Fixed and saved US1CAAM0003.csv
Fixed and saved US1CACL0001.csv
Fixed and saved US1CADN0012.csv
Fixed and saved US1CAFR0033.csv
Fixed and saved US1CAHM0029.csv
Fixed and saved US1CAHM0144.csv
Fixed and saved US1CALK0018.csv
Fixed and saved US1CAMD0033.csv
Fixed and saved US1CAMR0002.csv
Fixed and saved US1CAMR0011.csv
Fixed and saved US1CASC0006.csv
Fixed and saved US1CASD0026.csv
Fixed and saved US1CASK0016.csv
Fixed and saved US1CASL0040.csv
Fixed and saved US1CASU0005.csv
Fixed and saved US1CASZ0043.csv
Fixed and saved US1CAVT0017.csv
Fixed and saved US1CAVT0031.csv
Fixed and saved USC00040134.csv
Fixed and saved USC00040161.csv
Fixed and saved USC00040204.csv
Fixed and saved USC00040332.csv
Fixed and saved USC00040543.csv
Fixed and saved USC00040798.csv
Fixed and saved USC00041018.csv
Fixed and saved USC00041075.csv
Fixed and saved USC00041784.csv
Fixed and saved USC00041799.csv
Fixed and saved USC00041805.csv
Fixed and saved USC00041906.csv
Fixed and saved USC00042027.csv
Fixed an

Load the fire dataset

In [16]:
fire_data= pd.read_csv(r'fire_data.csv')
fire_data= fire_data.dropna()