In [None]:
import pandas as pd
import datetime
import json
from datetime import datetime
from datetime import timedelta

# Define the important variables here

target = {
    "fips": 6063,
    "target_county_name" : "Plumas",
    "target_data_file_name" : ""
}
target['target_data_file_name'] = f"Datasets/merged_tp_precip_wind_fmc_{target['target_county_name']}.csv"

# Use the adjacent county of the target county to find referral data like aws or fmc
# Use the following link to find the adjacent county
# https://gis.data.ca.gov/datasets/8713ced9b78a4abb97dc130a691a8695/explore?location=39.765076%2C-121.456785%2C8.00
referral = {
    "referral_fips" : 6035,
    "referral_country_name" : "Lassen",
    "referral_data_file_name" : "Datasets/wind_with_fips.csv",
    "referral_data_field_name" : "aws"
}

#
# Main program starts here
#
# Load the target dataset
target_dataset = pd.read_csv(target['target_data_file_name'])
print(f"The dataset {target['target_data_file_name']} contains (row, column) = ")
print(target_dataset.shape)

# Initial the referral dataset from a csv
referral_dataset = pd.read_csv(referral['referral_data_file_name'])

# Check if the county exists in the referral dataset
referral_count = referral_dataset['county'].str.contains(referral['referral_country_name']).sum()
print(f"The referral county {referral['referral_country_name']} has {referral_count} rows of data in the wind_with_fips.csv")  
# If not data found, exit the program
if ( int(referral_count) <= 0):
    #quit()
    raise SystemExit("The county doesn't exist in the referral dataset. Exits the program. ")
    
# Extract the whole volume 
def filter_dataframe_by_value(df, column_name, value_to_find):
    filtered_rows = df[df[column_name] == value_to_find]
    return filtered_rows

# Create a new DataFrame containing only rows with the specific value
referral_dataset_fips_only = filter_dataframe_by_value(referral_dataset, 'fips', referral['referral_fips'])

# Copy the matched column from the referral dataset to the target dataset
# the datetime must be close to each other between two datasets
def merge_dataframes_on_match(df1, df2, df1_column1, df2_column1, df2_column_to_sum1):
    for index1, row1 in df1.iterrows():
        # Convert to a datatime object
        row1_datetime = datetime.strptime(str(row1[df1_column1]),  "%Y%m%d")
        # Iniatize the second datetime
        row2_datetime = row1_datetime; 
        aws_to_fill = 0.0
        for index2, row2 in df2.iterrows():
            row2_datetime = datetime.strptime(row2[df2_column1], "%Y-%m-%d")
            delta_days = (row1_datetime - row2_datetime).days
            # print(f"Found a row with diff days {delta_days} between {row1_datetime} and {row2_datetime}")
            if (delta_days < 50):
                aws_to_fill = row2[df2_column_to_sum1]
            if (delta_days < 25):
                aws_to_fill = row2[df2_column_to_sum1]
            if (delta_days < 10):
                aws_to_fill = row2[df2_column_to_sum1]
            if (delta_days < 5):
                aws_to_fill = row2[df2_column_to_sum1]
            if (delta_days < 3):
                aws_to_fill = row2[df2_column_to_sum1]
            if (delta_days < 1):
                aws_to_fill = row2[df2_column_to_sum1]
                break   
                
        print(f"Add data to the original {row1_datetime} <- {row2_datetime} : {aws_to_fill}")
        df1.at[index1, df2_column_to_sum1] = aws_to_fill
                
# Add the new data from the referral dataset to the target dataset
merge_dataframes_on_match(target_dataset, referral_dataset_fips_only.sort_values("date"), 'DATE', 'date', referral['referral_data_field_name'])

# Save the data to its original file
target_dataset.to_csv(target['target_data_file_name'])
print(f"Saved to {target['target_data_file_name']}")


The dataset Datasets/merged_tp_precip_wind_fmc_Plumas.csv contains (row, column) = 
(428, 18)
The referral county Lassen has 10523 rows of data in the wind_with_fips.csv
Add data to the original 2006-08-11 00:00:00 <- 2006-08-11 00:00:00 : 1.8133978
Add data to the original 2006-07-09 00:00:00 <- 2006-07-09 00:00:00 : 0.60901606
Add data to the original 2009-11-08 00:00:00 <- 2009-11-08 00:00:00 : 1.5692291
Add data to the original 2006-08-12 00:00:00 <- 2006-08-12 00:00:00 : 1.6201488000000002
Add data to the original 2007-06-28 00:00:00 <- 2007-06-28 00:00:00 : 3.6319876
Add data to the original 2009-05-28 00:00:00 <- 2009-05-28 00:00:00 : 0.7891748000000001
Add data to the original 2007-04-28 00:00:00 <- 2007-04-28 00:00:00 : 1.5163405
Add data to the original 2008-08-17 00:00:00 <- 2008-08-17 00:00:00 : 0.704914
Add data to the original 2009-09-12 00:00:00 <- 2009-09-12 00:00:00 : 0.6560578
Add data to the original 2008-08-17 00:00:00 <- 2008-08-17 00:00:00 : 0.704914
Add data to t