In [None]:
import pandas as pd
import datetime
import json
from datetime import datetime
from datetime import timedelta

# Define the important variables here

target = {
    "fips": 6009,
    "target_county_name": "Calaveras",
    "target_data_file_name": "",
    "target_data_field_name": "fmc"
}
target['target_data_file_name'] = f"Datasets/merged_tp_precip_wind_fmc_{target['target_county_name']}.csv"

# Use the adjacent county of the target county to find referral data like aws or fmc
# Use the following link to find the adjacent county
# https://gis.data.ca.gov/datasets/8713ced9b78a4abb97dc130a691a8695/explore?location=39.765076%2C-121.456785%2C8.00
referral = {
    "referral_fips": 6005,
    "referral_country_name": "Amador",
    "referral_data_file_name": "Datasets/fuel_with_fips.csv",
    "referral_data_field_name": "percent"
}

#
# Main program starts here
#
# Load the target dataset
target_dataset = pd.read_csv(target['target_data_file_name'])
print(f"The dataset {target['target_data_file_name']} contains (row, column) = ")
print(target_dataset.shape)

# Initial the referral dataset from a csv
referral_dataset = pd.read_csv(referral['referral_data_file_name'])

# Check if the county exists in the referral dataset
referral_count = referral_dataset['county'].str.contains(referral['referral_country_name']).sum()
print(f"The referral county {referral['referral_country_name']} has {referral_count} rows of data in the {referral['referral_data_file_name']}")  
# If not data found, exit the program
if ( int(referral_count) <= 0):
    #quit()
    raise SystemExit("The county doesn't exist in the referral dataset. Exits the program. ")
    
# Extract the whole volume 
def filter_dataframe_by_value(df, column_name, value_to_find):
    filtered_rows = df[df[column_name] == value_to_find]
    return filtered_rows

# Create a new DataFrame containing only rows with the specific value
referral_dataset_fips_only = filter_dataframe_by_value(referral_dataset, 'fips', referral['referral_fips'])

# Copy the matched column from the referral dataset to the target dataset
# the datetime must be close to each other between two datasets
def merge_dataframes_on_match(df1, df2, df1_column1, df2_column1, referral_data_field_name, target_data_field_name):
    for index1, row1 in df1.iterrows():
        # Convert to a datatime object
        row1_datetime = datetime.strptime(str(row1[df1_column1]),  "%Y%m%d")
        # Iniatize the second datetime
        row2_datetime = row1_datetime; 
        data_to_fill = -1
        data_to_fill_datetime = row2_datetime
        data_to_fill_delta_days = 0
        for index2, row2 in df2.iterrows():
            row2_datetime = datetime.strptime(row2[df2_column1], "%Y-%m-%d")
            delta_days = (row1_datetime - row2_datetime).days
            if (delta_days < 0):
                delta_days = -delta_days
                
            # print(f"Found a row with diff days {delta_days} between {row1_datetime} and {row2_datetime}")
            if (delta_days < 100):
                data_to_fill = row2[referral_data_field_name]
                data_to_fill_datetime = row2_datetime
                data_to_fill_delta_days = delta_days
            if (delta_days < 50):
                data_to_fill = row2[referral_data_field_name]
                data_to_fill_datetime = row2_datetime
                data_to_fill_delta_days = delta_days
            if (delta_days < 25):
                data_to_fill = row2[referral_data_field_name]
                data_to_fill_datetime = row2_datetime
                data_to_fill_delta_days = delta_days
            if (delta_days < 10):
                data_to_fill = row2[referral_data_field_name]
                data_to_fill_datetime = row2_datetime
                data_to_fill_delta_days = delta_days
            if (delta_days < 5):
                data_to_fill = row2[referral_data_field_name]
                data_to_fill_datetime = row2_datetime
                data_to_fill_delta_days = delta_days
            if (delta_days < 3):
                data_to_fill = row2[referral_data_field_name]
                data_to_fill_datetime = row2_datetime
                data_to_fill_delta_days = delta_days
            if (delta_days < 1):
                data_to_fill = row2[referral_data_field_name]
                data_to_fill_datetime = row2_datetime
                data_to_fill_delta_days = delta_days
                break   

        if (data_to_fill != -1):
            print(f"Add data to the original {row1_datetime} <- {data_to_fill_datetime} - {data_to_fill_delta_days} days: {data_to_fill}")
            df1.at[index1, target_data_field_name] = data_to_fill
                
# Add the new data from the referral dataset to the target dataset
merge_dataframes_on_match(target_dataset, referral_dataset_fips_only.sort_values("date"), 
                          'DATE', 'date', 
                          referral['referral_data_field_name'], target['target_data_field_name'])

# Save the data to its original file
target_dataset.to_csv(target['target_data_file_name'])
print(f"Saved to {target['target_data_file_name']}")
