In [9]:
# This code updates the combined_incidents_clean.csv using the updated incidents_2012 data
# focusing on 2012 rows and ensuring that the Last_Report_Date field is updated from incidents_2012_updated.json.

import pandas as pd
import numpy as np

# Load the clean CSV file
try:
    df_clean = pd.read_csv('combined_incidents_clean.csv')
    print('Loaded combined_incidents_clean.csv')
except Exception as e:
    print('Error loading combined_incidents_clean.csv:', e)
    raise e

# Load the updated incidents_2012 JSON file
try:
    df_2012 = pd.read_json('structured_incidents_data_2012.json')
    print('Loaded structured_incidents_2012.json')
except Exception as e:
    print('Error loading incidents_2012_updated.json:', e)
    raise e

# Ensure Last_Report_Date column exists in df_clean; if not, create it
if 'Last_Report_Date' not in df_clean.columns:
    df_clean['Last_Report_Date'] = np.nan

# We'll update rows in df_clean that correspond to year 2012
mask_2012 = df_clean['Year'] == 2012

# Merge the 2012 records from the clean CSV file with the updated incidents 2012 JSON based on 'Name'
# (You might need to adjust if the key is different; here we assume 'Name' is common.)

# Select only necessary columns from df_2012: Name and Last_Report_Date
if 'Name' not in df_2012.columns or 'Last_Report_Date' not in df_2012.columns:
    print('Missing required columns in incidents_2012_updated.json')
else:
    df_2012_subset = df_2012[['Name', 'Contain/_Control Date']].copy()
    # The updated Last_Report_Date is stored in the 'Contain/_Control Date' column from our previous merge for consistency
    df_2012_subset = df_2012_subset.rename(columns={'Contain/_Control Date': 'Last_Report_Date_new'})

    # Merge with the 2012 subset of the clean csv
    df_clean_2012 = df_clean[mask_2012].merge(df_2012_subset, on='Name', how='left')

    # Update Last_Report_Date if missing in df_clean or if new value is provided
    df_clean_2012['Last_Report_Date'] = df_clean_2012.apply(
        lambda row: row['Last_Report_Date_new'] if pd.isna(row['Last_Report_Date']) and pd.notna(row['Last_Report_Date_new']) else row['Last_Report_Date'], axis=1
    )

    # Remove the temporary column
    df_clean_2012.drop(columns=['Last_Report_Date_new'], inplace=True)

    # Update the main dataframe with the updated 2012 records
    df_clean.update(df_clean_2012)

    # Save the updated CSV
    df_clean.to_csv('combined_incidents_clean_updated.csv', index=False)
    print('\
Updated combined_incidents_clean.csv saved as combined_incidents_clean_updated.csv')

# Show the updated 2012 rows
print('\
Updated 2012 rows from combined_incidents_clean_updated.csv (first 10 rows):')
print(df_clean[mask_2012].head(10))

Loaded combined_incidents_clean.csv
Loaded structured_incidents_2012.json
Missing required columns in incidents_2012_updated.json
Updated 2012 rows from combined_incidents_clean_updated.csv (first 10 rows):
                      Name   GACC  State  Start_Date Last_Report_Date  \
150              Long Draw     NW     OR  07/08/2012              NaN   
151               Holloway  NW/WB  OR/NV  08/05/2012              NaN   
152        Mustang Complex     EB     ID  07/30/2012              NaN   
153                   Rush     NO     CA  08/12/2012              NaN   
154       Whitewater-Baldy     SW     NM  05/16/2012              NaN   
155              Ash Creek     NR     MT  06/25/2012              NaN   
156            Kinyon Road     EB     ID  07/07/2012              NaN   
157               Halstead     EB     ID  07/27/2012              NaN   
158  Rosebud Creek Complex     NR     MT  08/01/2012              NaN   
159       Miller Homestead     NW     OR  07/08/2012           

In [11]:
# This code updates the 'Last_Report_Date' column for 2012 rows in combined_incidents_clean.csv   
# using the 'Contain_Control_Date' value.  
  
import pandas as pd  
  
# Load the clean CSV file  
df_clean = pd.read_csv('combined_incidents_clean.csv')  
print('Loaded combined_incidents_clean.csv')  
  
# Filter rows from 2012 where Last_Report_Date is missing  
mask_2012 = (df_clean['Year'] == 2012) & (pd.isna(df_clean['Last_Report_Date']))  
  
print('Before update, number of 2012 rows with missing Last_Report_Date:', df_clean.loc[mask_2012].shape[0])  
  
# Update the Last_Report_Date for these rows with the value from Contain_Control_Date  
df_clean.loc[mask_2012, 'Last_Report_Date'] = df_clean.loc[mask_2012, 'Contain_Control_Date']  
  
# Check if the update was successful  
mask_2012_missing_after = (df_clean['Year'] == 2012) & (pd.isna(df_clean['Last_Report_Date']))  
print('After update, number of 2012 rows with missing Last_Report_Date:', df_clean.loc[mask_2012_missing_after].shape[0])  
  
# Save the updated CSV file  
df_clean.to_csv('combined_incidents_clean_updated.csv', index=False)  
print('\nUpdated CSV saved as combined_incidents_clean_updated.csv')  
  
# Show a sample of the updated rows for 2012  
mask_2012_all = (df_clean['Year'] == 2012)  
print('\nSample of updated 2012 rows:')  
print(df_clean.loc[mask_2012_all].head(10))  

Loaded combined_incidents_clean.csv
Before update, number of 2012 rows with missing Last_Report_Date: 51
After update, number of 2012 rows with missing Last_Report_Date: 0

Updated CSV saved as combined_incidents_clean_updated.csv

Sample of updated 2012 rows:
                      Name   GACC  State  Start_Date Last_Report_Date  \
150              Long Draw     NW     OR  07/08/2012       07/30/2012   
151               Holloway  NW/WB  OR/NV  08/05/2012       08/23/2012   
152        Mustang Complex     EB     ID  07/30/2012       10/18/2012   
153                   Rush     NO     CA  08/12/2012       09/04/2012   
154       Whitewater-Baldy     SW     NM  05/16/2012       07/31/2012   
155              Ash Creek     NR     MT  06/25/2012       07/11/2012   
156            Kinyon Road     EB     ID  07/07/2012       07/19/2012   
157               Halstead     EB     ID  07/27/2012       10/18/2012   
158  Rosebud Creek Complex     NR     MT  08/01/2012       08/16/2012   
159      

In [15]:
import pandas as pd  
import os  
  
# Function to load and extract date information from a structured JSON file.  
def load_and_extract_dates(filepath, year, date_column):  
    try:  
        df_raw = pd.read_json(filepath)  
        if 'significant_incidents' in df_raw.columns:  
            df = pd.json_normalize(df_raw['significant_incidents'])  
        else:  
            df = df_raw  
        # Select only the Name and the specified date column, and add the Year column.  
        if date_column in df.columns:  
            df_dates = df[['Name', date_column]].copy()  
            df_dates['Year'] = int(year)  
            return df_dates  
        else:  
            print('Date column ' + date_column + ' not found in ' + filepath)  
            return None  
    except Exception as e:  
        print('Error processing ' + filepath + ': ' + str(e))  
        return None  
  
# Define filepaths and date columns for each year.  
# If the JSON filename consistently uses the pattern structured_incidents_data_YEAR.json then...  
date_columns = {  
    '2009': 'Contain_Control_Date',  
    '2010': 'Contain_Control_Date',  
    '2011': 'Contain_Control_Date',  
    '2021': 'Contain_Control_Date'  
}  
  
# Load the combined_incidents_clean.csv file.  
df_clean = pd.read_csv('combined_incidents_clean.csv')  
print('Loaded combined_incidents_clean.csv with ' + str(len(df_clean)) + ' rows.')  
  
# Process each year from the JSON structured files.  
updates_by_year = {}  
for year, date_column in date_columns.items():  
    filepath = f'structured_incidents_data_{year}.json'  
    if os.path.exists(filepath):  
        print('\\nProcessing ' + year + ' data from ' + filepath)  
        df_dates = load_and_extract_dates(filepath, year, date_column)  
        if df_dates is not None:  
            # Filter df_clean for the current year.  
            df_clean_year = df_clean[df_clean['Year'] == int(year)].copy()  
            # Merge on 'Name' and 'Year'. This will add a column named '<date_column>_y'  
            df_merged = df_clean_year.merge(df_dates, on=['Name', 'Year'], how='left')  
            # Before update, count missing Last_Report_Date values.  
            mask_year = (df_clean['Year'] == int(year)) & (pd.isna(df_clean['Last_Report_Date']))  
            before_count = df_clean.loc[mask_year].shape[0]  
              
            # For the matching rows, update Last_Report_Date with the value from the structured JSON.  
            # The merged dataframe has two date columns: original CSV (as e.g. Contain_Control_Date_x)  
            # and the JSON data as Contain_Control_Date_y (or Contain_or_Last_Report_Date_y).  
            json_date_col = date_column + '_y'  
            for idx, row in df_merged.iterrows():  
                if pd.isna(row['Last_Report_Date']) and pd.notna(row[json_date_col]):  
                    df_clean.loc[(df_clean['Name'] == row['Name']) & (df_clean['Year'] == int(year)), 'Last_Report_Date'] = row[json_date_col]  
              
            # After update, count remaining missing values.  
            mask_year = (df_clean['Year'] == int(year)) & (pd.isna(df_clean['Last_Report_Date']))  
            after_count = df_clean.loc[mask_year].shape[0]  
            updates_by_year[year] = before_count - after_count  
            print('Year ' + year + ': Updated ' + str(updates_by_year[year]) + ' rows with Last_Report_Date')  
    else:  
        print('File not found: ' + filepath)  
  
# Optionally, if you want to replace all values of Last_Report_Date (with structured JSON if available)  
# You could relax the condition and update whenever a JSON date exists, even overwriting non-null ones.  
  
# Save the updated CSV file.  
output_filename = 'combined_incidents_clean_updated_all_years.csv'  
df_clean.to_csv(output_filename, index=False)  
print('\\nUpdated CSV saved as ' + output_filename)  
  
# Summary of updates  
print('\\nSummary of updates:')  
for year, count in updates_by_year.items():  
    print('Year ' + year + ': ' + str(count) + ' rows updated.')  
  
# Check remaining missing Last_Report_Date values per year.  
print('\\nRemaining rows with missing Last_Report_Date:')  
for year in ['2009', '2010', '2011', '2021']:  
    mask_year = (df_clean['Year'] == int(year)) & (pd.isna(df_clean['Last_Report_Date']))  
    count = df_clean.loc[mask_year].shape[0]  
    print('Year ' + year + ': ' + str(count) + ' rows still missing Last_Report_Date')  
  
# Show a sample of the updated rows for each year.  
for year in ['2009', '2010', '2011', '2021']:  
    mask_year = (df_clean['Year'] == int(year))  
    print('\\nSample of updated ' + year + ' rows:')  
    print(df_clean.loc[mask_year].head(3))  

Loaded combined_incidents_clean.csv with 470 rows.
\nProcessing 2009 data from structured_incidents_data_2009.json
Year 2009: Updated 27 rows with Last_Report_Date
\nProcessing 2010 data from structured_incidents_data_2010.json
Year 2010: Updated 9 rows with Last_Report_Date
\nProcessing 2011 data from structured_incidents_data_2011.json
Year 2011: Updated 41 rows with Last_Report_Date
\nProcessing 2021 data from structured_incidents_data_2021.json
Date column Contain_Control_Date not found in structured_incidents_data_2021.json
\nUpdated CSV saved as combined_incidents_clean_updated_all_years.csv
\nSummary of updates:
Year 2009: 27 rows updated.
Year 2010: 9 rows updated.
Year 2011: 41 rows updated.
\nRemaining rows with missing Last_Report_Date:
Year 2009: 0 rows still missing Last_Report_Date
Year 2010: 0 rows still missing Last_Report_Date
Year 2011: 0 rows still missing Last_Report_Date
Year 2021: 24 rows still missing Last_Report_Date
\nSample of updated 2009 rows:
            Na

In [17]:
import pandas as pd  
import os  
  
# Function to load and extract date information from a structured JSON file.  
def load_and_extract_dates(filepath, year, date_column):  
    try:  
        df_raw = pd.read_json(filepath)  
        if 'significant_incidents' in df_raw.columns:  
            df = pd.json_normalize(df_raw['significant_incidents'])  
        else:  
            df = df_raw  
        # Select only the Name and the specified date column, and add the Year column.  
        if date_column in df.columns:  
            df_dates = df[['Name', date_column]].copy()  
            df_dates['Year'] = int(year)  
            return df_dates  
        else:  
            print('Date column ' + date_column + ' not found in ' + filepath)  
            return None  
    except Exception as e:  
        print('Error processing ' + filepath + ': ' + str(e))  
        return None  
  
# Dictionary for defining which date column to use per year  
date_columns = {  
    '2009': 'Contain_Control_Date',  
    '2010': 'Contain_Control_Date',  
    '2011': 'Contain_Control_Date',  
    '2021': 'Contain_or_Last_Report_Date'  
}  
  
# Load the combined_incidents_clean.csv file.  
df_clean = pd.read_csv('combined_incidents_clean.csv')  
print('Loaded combined_incidents_clean.csv with ' + str(len(df_clean)) + ' rows.')  
  
# For each year, update Last_Report_Date with the date from the JSON file.  
updates_by_year = {}  
  
for year, json_date_column in date_columns.items():  
    filepath = f'structured_incidents_data_{year}.json'  
    if os.path.exists(filepath):  
        print('\\nProcessing ' + year + ' data from ' + filepath)  
        df_dates = load_and_extract_dates(filepath, year, json_date_column)  
        if df_dates is not None:  
            # Merge df_clean (for the year) with the JSON dates based on Name and Year.  
            mask_year = (df_clean['Year'] == int(year))  
            df_clean_year = df_clean.loc[mask_year].copy()  
              
            df_merged = df_clean_year.merge(df_dates, on=['Name', 'Year'], how='left', suffixes=('', '_json'))  
              
            # Count how many rows we can update: update Last_Report_Date if JSON date exists.  
            updated_count = 0  
            for idx, row in df_merged.iterrows():  
                json_date = row.get(json_date_column + '_json')  
                # If the JSON date is not missing, update Last_Report_Date.  
                if pd.notna(json_date):  
                    # Update in the original df_clean  
                    mask = (df_clean['Name'] == row['Name']) & (df_clean['Year'] == int(year))  
                    df_clean.loc[mask, 'Last_Report_Date'] = json_date  
                    updated_count += 1  
            updates_by_year[year] = updated_count  
            print('Year ' + year + ': Updated ' + str(updated_count) + ' rows in Last_Report_Date.')  
    else:  
        print('File not found: ' + filepath)  
  
# Save the updated combined file  
output_filename = 'combined_incidents_clean_updated_all_years.csv'  
df_clean.to_csv(output_filename, index=False)  
print('\\nUpdated CSV saved as ' + output_filename)  
  
# Summary of updates  
print('\\nSummary of updates:')  
for year, count in updates_by_year.items():  
    print('Year ' + year + ': ' + str(count) + ' rows updated.')  
  
# Check remaining missing Last_Report_Date values by year.  
print('\\nRemaining rows with missing Last_Report_Date:')  
for year in ['2009', '2010', '2011', '2021']:  
    mask_year = (df_clean['Year'] == int(year)) & (pd.isna(df_clean['Last_Report_Date']))  
    count = df_clean.loc[mask_year].shape[0]  
    print('Year ' + year + ': ' + str(count) + ' rows still missing Last_Report_Date')  
  
# Show a sample of the updated rows for each year.  
for year in ['2009', '2010', '2011', '2021']:  
    mask_year = (df_clean['Year'] == int(year))  
    print('\\nSample of updated ' + year + ' rows:')  
    print(df_clean.loc[mask_year].head(3))  

Loaded combined_incidents_clean.csv with 470 rows.
\nProcessing 2009 data from structured_incidents_data_2009.json
Year 2009: Updated 27 rows in Last_Report_Date.
\nProcessing 2010 data from structured_incidents_data_2010.json
Year 2010: Updated 9 rows in Last_Report_Date.
\nProcessing 2011 data from structured_incidents_data_2011.json
Year 2011: Updated 41 rows in Last_Report_Date.
\nProcessing 2021 data from structured_incidents_data_2021.json
Year 2021: Updated 0 rows in Last_Report_Date.
\nUpdated CSV saved as combined_incidents_clean_updated_all_years.csv
\nSummary of updates:
Year 2009: 27 rows updated.
Year 2010: 9 rows updated.
Year 2011: 41 rows updated.
Year 2021: 0 rows updated.
\nRemaining rows with missing Last_Report_Date:
Year 2009: 0 rows still missing Last_Report_Date
Year 2010: 0 rows still missing Last_Report_Date
Year 2011: 0 rows still missing Last_Report_Date
Year 2021: 24 rows still missing Last_Report_Date
\nSample of updated 2009 rows:
            Name GACC Sta

In [19]:
import pandas as pd  
import os  
  
# Define configuration for each year with JSON file path and the corresponding date column to use  
year_configs = {  
    '2008': {  
        'filepath': 'structured_incidents_data.json',  
        'date_column': 'Contain_Control_Date'  
    },  
    '2009': {  
        'filepath': 'structured_incidents_data_2009.json',  
        'date_column': 'Contain_Control_Date'  
    },  
    '2010': {  
        'filepath': 'structured_incidents_data_2010.json',  
        'date_column': 'Contain_Control_Date'  
    },  
    '2011': {  
        'filepath': 'structured_incidents_data_2011.json',  
        'date_column': 'Contain_Control_Date'  
    },  
    '2021': {  
        'filepath': 'structured_incidents_data_2021.json',  
        'date_column': 'Contain_or_Last_Report_Date'  
    }  
}  
  
# Function to load and extract date information from a given JSON file for a specific year.  
def load_and_extract_dates(config, year):  
    filepath = config['filepath']  
    date_column = config['date_column']  
    try:  
        df_raw = pd.read_json(filepath)  
        # If the data is nested under "significant_incidents", flatten it  
        if 'significant_incidents' in df_raw.columns:  
            df = pd.json_normalize(df_raw['significant_incidents'])  
        else:  
            df = df_raw  
        # Ensure the date column is available  
        if date_column in df.columns:  
            df_dates = df[['Name', date_column]].copy()  
            df_dates['Year'] = int(year)  # add Year column for merging  
            return df_dates  
        else:  
            print('Date column ' + date_column + ' not found in ' + filepath)  
            return None  
    except Exception as e:  
        print('Error processing ' + filepath + ': ' + str(e))  
        return None  
  
# Load the CSV file  
df_clean = pd.read_csv('combined_incidents_clean.csv')  
print('Loaded combined_incidents_clean.csv with ' + str(len(df_clean)) + ' rows.')  
  
# Dictionary to record how many rows are updated per year.  
updates_by_year = {}  
  
# Process each year according to the mapping  
for year, config in year_configs.items():  
    filepath = config['filepath']  
    if os.path.exists(filepath):  
        print('\\nProcessing year ' + year + ' data from ' + filepath)  
        df_dates = load_and_extract_dates(config, year)  
        if df_dates is not None:  
            # Filter CSV rows corresponding to the year  
            mask_year = (df_clean['Year'] == int(year))  
            df_clean_year = df_clean.loc[mask_year].copy()  
          
            # Merge CSV data with JSON dates on Name and Year.  
            df_merged = df_clean_year.merge(df_dates, on=['Name', 'Year'], how='left')  
            # Example: JSON date value appears in a column named like date_column+'_y'  
            json_date_col = config['date_column']  
            # After merging, the JSON column is renamed to e.g. 'Contain_Control_Date_y'  
            merged_date_col = json_date_col + '_y'  
              
            if merged_date_col not in df_merged.columns:  
                print('Merged date column not found for year ' + year + '. Skipping update.')  
                continue  
  
            updated_count = 0  
            # Loop over merged rows and update CSV if JSON date is available.  
            for idx, row in df_merged.iterrows():  
                json_date = row[merged_date_col]  
                if pd.notna(json_date):  
                    # Update in original df_clean using the Name and Year  
                    mask = (df_clean['Name'] == row['Name']) & (df_clean['Year'] == int(year))  
                    df_clean.loc[mask, 'Last_Report_Date'] = json_date  
                    updated_count += 1  
            updates_by_year[year] = updated_count  
            print('Year ' + year + ': Updated ' + str(updated_count) + ' rows in Last_Report_Date.')  
    else:  
        print('File not found for year ' + year + ': ' + filepath)  
  
# Save the updated CSV file.  
output_filename = 'combined_incidents_clean_updated_all_years.csv'  
df_clean.to_csv(output_filename, index=False)  
print('\\nUpdated CSV saved as ' + output_filename)  
  
# Summary of updates.  
print('\\nSummary of updates:')  
for year, count in updates_by_year.items():  
    print('Year ' + year + ': ' + str(count) + ' rows updated.')  
  
# Check remaining missing Last_Report_Date values by year.  
print('\\nRemaining rows with missing Last_Report_Date:')  
for year in year_configs.keys():  
    mask_year = (df_clean['Year'] == int(year)) & (pd.isna(df_clean['Last_Report_Date']))  
    count = df_clean.loc[mask_year].shape[0]  
    print('Year ' + year + ': ' + str(count) + ' rows still missing Last_Report_Date')  
  
# Display a sample of updated rows for each processed year.  
for year in year_configs.keys():  
    mask_year = (df_clean['Year'] == int(year))  
    print('\\nSample of updated ' + year + ' rows:')  
    print(df_clean.loc[mask_year].head(3))  

Loaded combined_incidents_clean.csv with 470 rows.
\nProcessing year 2008 data from structured_incidents_data.json
Year 2008: Updated 0 rows in Last_Report_Date.
\nProcessing year 2009 data from structured_incidents_data_2009.json
Year 2009: Updated 27 rows in Last_Report_Date.
\nProcessing year 2010 data from structured_incidents_data_2010.json
Year 2010: Updated 9 rows in Last_Report_Date.
\nProcessing year 2011 data from structured_incidents_data_2011.json
Year 2011: Updated 41 rows in Last_Report_Date.
\nProcessing year 2021 data from structured_incidents_data_2021.json
Merged date column not found for year 2021. Skipping update.
\nUpdated CSV saved as combined_incidents_clean_updated_all_years.csv
\nSummary of updates:
Year 2008: 0 rows updated.
Year 2009: 27 rows updated.
Year 2010: 9 rows updated.
Year 2011: 41 rows updated.
\nRemaining rows with missing Last_Report_Date:
Year 2008: 0 rows still missing Last_Report_Date
Year 2009: 0 rows still missing Last_Report_Date
Year 2010:

In [21]:
# Let's examine the date columns in our dataset to understand what we're working with
import pandas as pd
import numpy as np
import json
from datetime import datetime

# Load the CSV file with the updated Year values
csv_file = 'combined_incidents_clean_final.csv'
df = pd.read_csv(csv_file)
print(f"Loaded {csv_file} with {len(df)} rows")

# Check the date columns
print("\
Date columns in the dataset:")
date_columns = ['Start_Date', 'Last_Report_Date', 'Contain_Control_Date']
for col in date_columns:
    non_null_count = df[col].notna().sum()
    print(f"{col}: {non_null_count} non-null values out of {len(df)} rows")

# Check for rows where Last_Report_Date is null but Contain_Control_Date is not null
condition = df['Last_Report_Date'].isna() & df['Contain_Control_Date'].notna()
count = condition.sum()
print(f"\
Rows where Last_Report_Date is null but Contain_Control_Date is not null: {count}")

# Show a few examples of these rows
if count > 0:
    print("\
Sample rows where Last_Report_Date is null but Contain_Control_Date is not null:")
    sample = df[condition].head(5)
    print(sample[['Name', 'Year', 'Last_Report_Date', 'Contain_Control_Date']])

print("\
Done")

Loaded combined_incidents_clean_final.csv with 470 rows
Date columns in the dataset:
Start_Date: 470 non-null values out of 470 rows
Last_Report_Date: 372 non-null values out of 470 rows
Contain_Control_Date: 151 non-null values out of 470 rows
Rows where Last_Report_Date is null but Contain_Control_Date is not null: 74
Sample rows where Last_Report_Date is null but Contain_Control_Date is not null:
                       Name    Year Last_Report_Date Contain_Control_Date
9                Glass Fire  2008.0              NaN           03/02/2008
10          Klamath Theater  2008.0              NaN           09/26/2008
11            Basin Complex  2008.0              NaN           07/29/2008
12  Iron & Alps \nComplexes  2008.0              NaN           09/04/2008
13       Dunn Mtn. \nAssist  2008.0              NaN           09/02/2008
Done


In [23]:
# Copy Contain_Control_Date to Last_Report_Date where Last_Report_Date is null
import pandas as pd

# Load the CSV file
csv_file = 'combined_incidents_clean_final.csv'
df = pd.read_csv(csv_file)
print(f"Loaded {csv_file} with {len(df)} rows")

# Count nulls in Last_Report_Date before update
null_last_report_before = df['Last_Report_Date'].isna().sum()
print(f"Null values in Last_Report_Date before update: {null_last_report_before}")

# Identify rows where Last_Report_Date is null but Contain_Control_Date is not null
condition = df['Last_Report_Date'].isna() & df['Contain_Control_Date'].notna()
rows_to_update = condition.sum()
print(f"Rows to update: {rows_to_update}")

# Copy Contain_Control_Date to Last_Report_Date for these rows
df.loc[condition, 'Last_Report_Date'] = df.loc[condition, 'Contain_Control_Date']

# Count nulls in Last_Report_Date after update
null_last_report_after = df['Last_Report_Date'].isna().sum()
print(f"Null values in Last_Report_Date after update: {null_last_report_after}")
print(f"Rows updated: {null_last_report_before - null_last_report_after}")

# Save the updated CSV
output_file = 'combined_incidents_clean_final_updated.csv'
df.to_csv(output_file, index=False)
print(f"\
Saved updated CSV to {output_file}")

# Show a few examples of the updated rows
print("\
Sample rows after update:")
sample = df[condition].head(5)
print(sample[['Name', 'Year', 'Last_Report_Date', 'Contain_Control_Date']])

print("\
Done")

Loaded combined_incidents_clean_final.csv with 470 rows
Null values in Last_Report_Date before update: 98
Rows to update: 74
Null values in Last_Report_Date after update: 24
Rows updated: 74
Saved updated CSV to combined_incidents_clean_final_updated.csv
Sample rows after update:
                       Name    Year Last_Report_Date Contain_Control_Date
9                Glass Fire  2008.0       03/02/2008           03/02/2008
10          Klamath Theater  2008.0       09/26/2008           09/26/2008
11            Basin Complex  2008.0       07/29/2008           07/29/2008
12  Iron & Alps \nComplexes  2008.0       09/04/2008           09/04/2008
13       Dunn Mtn. \nAssist  2008.0       09/02/2008           09/02/2008
Done


The JSON is a list with 24 elements. Sample:
{'Name': 'Dixie', 'GACC': 'NO', 'State': 'CA', 'Start_Date': '13-Jul-21', 'Contain_or_Last_Report_Date': '10/23', 'Size_Acres': '963,309', 'Cause': 'U', 'Estimated_Cost': '$637,428,216'}
{'Name': 'Bootleg', 'GACC': 'NW', 'State': 'OR', 'Start_Date': '06-Jul-21', 'Contain_or_Last_Report_Date': '8/13', 'Size_Acres': '413,717', 'Cause': 'L', 'Estimated_Cost': '$100,900,000'}
{'Name': 'Monument', 'GACC': 'NO', 'State': 'CA', 'Start_Date': '31-Jul-21', 'Contain_or_Last_Report_Date': '10/25', 'Size_Acres': '223,124', 'Cause': 'L', 'Estimated_Cost': '$163,739,291'}
{'Name': 'Caldor', 'GACC': 'NO', 'State': 'CA', 'Start_Date': '14-Aug-21', 'Contain_or_Last_Report_Date': '10/20', 'Size_Acres': '221,835', 'Cause': 'H', 'Estimated_Cost': '$271,147,512'}
{'Name': 'River Complex', 'GACC': 'NO', 'State': 'CA', 'Start_Date': '30-Jul-21', 'Contain_or_Last_Report_Date': '10/24', 'Size_Acres': '199,359', 'Cause': 'L', 'Estimated_Cost': '$95,340,595'}
Total in

Loaded combined_incidents_clean_final_updated.csv with 470 rows
Number of 2021 incidents in CSV: 24
Extracted 0 date values from structured_incidents_data_2021.json
Sample of extracted dates:
Updated 0 rows with dates from structured_incidents_data_2021.json
Remaining null values in Last_Report_Date: 24
Saved updated CSV to combined_incidents_clean_final_updated_2021.csv
Sample of updated 2021 incidents:
              Name    Year  Start_Date Last_Report_Date
315          Dixie  2021.0  07/13/2021              NaN
316        Bootleg  2021.0  07/06/2021              NaN
317       Monument  2021.0  07/31/2021              NaN
318         Caldor  2021.0  08/14/2021              NaN
319  River Complex  2021.0  07/30/2021              NaN
Done
