In [None]:
import pandas as pd
import datetime
import json
from datetime import datetime
from datetime import timedelta

# Define the important variables here

target = {
    #"fips": 6071,
    "target_county_name": "Calaveras",
    "date_column_name": "DATE",
    "datetime_format": "%Y%m%d",
    "target_data_file_name": "",
    "target_data_field_name": "prcp"
}
target['target_data_file_name'] = f"Datasets/merged_tp_precip_wind_fmc_{target['target_county_name']}.csv"

# Use the adjacent county of the target county to find referral data like aws or fmc
# Use the following link to find the adjacent county
# https://gis.data.ca.gov/datasets/8713ced9b78a4abb97dc130a691a8695/explore?location=39.765076%2C-121.456785%2C8.00
referral = {
    "referral_fips": 6051,
    "referral_country_name": "Tuolumne",
    "date_column_name": "date",
    "datetime_format": "%Y%m%d",
    "referral_data_file_name": "Datasets/tp_zipcode_county_CA.csv",
    "data_fields_to_be_copied": ["prcp", "tmax", "tmin", "tavg"]
}

# When compare the date between data sets, compare the following day delta 
date_delta_range = [0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, -14, 14, -15, 15, -16, 16, -17, 17, -18, 18, -19, 19, -20, 20]

# Extract the whole volume 
def filter_dataframe_by_value(df, column_name, value_to_find):
    filtered_rows = df[df[column_name] == value_to_find]
    return filtered_rows

# Copy the matched column from the referral dataset to the target dataset
# The match key is the date. Note the date column might have different names
# The datetime must be close to each other between two datasets
def merge_dataframes_on_near_date(target_df, referral_df, 
                            target_data_field_name, referral_data_fields_to_be_copied):

    #referral_df.head()
    
    # Sort the referral data set by Date, ascending 
    referral_df = referral_df.sort_values(referral['date_column_name'])

    for index1, row1 in target_df.iterrows():
        # Get the target datetime. 
        # Need to convert the string to a datetime object
        row1_datetime = datetime.strptime(str(row1[target['date_column_name']]),  target['datetime_format'])

        row1_near_date = []
        for i in date_delta_range:

            # Convert the day to int in order to compare with the referral_df.date, which is loaded as int by default
            next_day = row1_datetime + timedelta( days = i)
            referral_next_day = int(next_day.strftime(referral['datetime_format']))
            row1_near_date.append( referral_next_day)

            # Find in the referral dataframe by date
            found_df = referral_df[referral_df[referral['date_column_name']] == referral_next_day]
            if (found_df.size >= 0):
                print(f"Add data to the original {row1_datetime} <- {referral_next_day}  delta days {i}: ")
                for f in referral_data_fields_to_be_copied:
                    # Found the value in the first cell and assign to the target dataframe
                    target_df.at[index1, f] = found_df.iloc[0][f]
                    print(f"{f} = {found_df.iloc[0][f]}")
                break;

#
# Main program starts here
#
def merge_data_referral_to_target(target, referral): 

    # Load the target dataset
    target_dataset = pd.read_csv(target['target_data_file_name']) #, index_col=target['date_column_name'])
    print(f"The dataset {target['target_data_file_name']} contains (row, column) = ")
    print(target_dataset.shape)

    # Initial the referral dataset from a csv
    referral_dataset = pd.read_csv(referral['referral_data_file_name']) #, index_col=referral['date_column_name'])

    # Check if the county exists in the referral dataset
    referral_count = referral_dataset['county'].str.contains(referral['referral_country_name']).sum()
    print(f"The referral county {referral['referral_country_name']} has {referral_count} rows of data in the {referral['referral_data_file_name']}")  
    # If no data is found, exit the program
    if ( int(referral_count) <= 0):
        print("The county doesn't exist in the referral dataset. Exits the program.")
        return 
    
    # Create a new data frame containing only rows with the specific value
    referral_dataset_fips_only = filter_dataframe_by_value(referral_dataset, 'fips', referral['referral_fips'])
    print(f"referral_dataset with fips {referral['referral_fips']} = {referral_dataset_fips_only.shape}")

                
    # Add the new data from the referral dataset to the target dataset
    # merge_dataframes_on_match
    merge_dataframes_on_near_date(target_dataset, referral_dataset_fips_only, 
                            target['target_data_field_name'], referral['data_fields_to_be_copied'])

    # Drop all the index columns unnamed: 0 before saving 
    target_dataset.drop(target_dataset.filter(regex="Unname"),axis=1, inplace=True)
    # Save the data to its original file
    target_dataset.to_csv(target['target_data_file_name'])
    print(f"Saved to {target['target_data_file_name']}")

#
# Main program
#
merge_data_referral_to_target(target, referral)
