## Functional

In [25]:
%%time

import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

# Define the file path for the dataset
file_path = 'G:/001Project/Output/Sheets/cleaned_Results/irrigation_wUse/Section_IWU_2021.csv'

# Load the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Define the list of columns to drop (excluding "LN_ID" and "CLIMATE")
columns_to_drop = ["COUNTY", "CLIMATE", "STATE", "CROP_DIV", "DOM_CROP", "PR_DAYMET", "PR_gridMET", "PR_IMERG", "PR_NLDAS", "PR_PRISM"]
df = df.drop(columns=columns_to_drop)

# Define the lists of products and ensembles
precipitation_products1 = ['DAYMET', 'gridMET', 'IMERG', 'NLDAS', 'PRISM']
evapotranspiration_products1 = ['DISALEXI', 'EEMETRIC', 'GEESEBAL', 'PTJPL', 'SIMS', 'SSEBOP']
precipitation_products2 = set()
evapotranspiration_products2 = set()

# Initialize DataFrames to store results
result_df_list = []
result_df_list2 = []

# Loop through each unique 'LN_ID' and perform the analysis
unique_ln_ids = df['LN_ID'].unique()
for ln_id in unique_ln_ids:
    # Filter the DataFrame for the current 'LN_ID'
    ln_id_df = df[df['LN_ID'] == ln_id].copy()
    ln_id_df = ln_id_df.clip(lower=0)

    ensembles =  ['DISALEXI_DAYMET', 'EEMETRIC_DAYMET', 'GEESEBAL_DAYMET', 'PTJPL_DAYMET', 'SIMS_DAYMET', 'SSEBOP_DAYMET',
                  'DISALEXI_gridMET', 'EEMETRIC_gridMET', 'GEESEBAL_gridMET', 'PTJPL_gridMET', 'SIMS_gridMET', 'SSEBOP_gridMET',
                  'DISALEXI_IMERG', 'EEMETRIC_IMERG', 'GEESEBAL_IMERG', 'PTJPL_IMERG', 'SIMS_IMERG', 'SSEBOP_IMERG',
                  'DISALEXI_NLDAS', 'EEMETRIC_NLDAS', 'GEESEBAL_NLDAS', 'PTJPL_NLDAS', 'SIMS_NLDAS', 'SSEBOP_NLDAS',
                  'DISALEXI_PRISM', 'EEMETRIC_PRISM', 'GEESEBAL_PRISM', 'PTJPL_PRISM', 'SIMS_PRISM', 'SSEBOP_PRISM']
    
    # Extract unique precipitation and evapotranspiration products for the second dataset
    for ensemble in ensembles:
        parts = ensemble.split('_')
        if len(parts) == 2:
            precipitation_products2.add(parts[1])
            evapotranspiration_products2.add(parts[0])

    # Compute the P and ET Variance Contributions
    for prc in precipitation_products1:
        columns = [col for col in ln_id_df.columns if col.endswith(prc)]
        ln_id_df[f'{prc}_AVG'] = ln_id_df[columns].mean(axis=1)

    for eta in evapotranspiration_products1:
        columns = [col for col in ln_id_df.columns if col.startswith(eta)]
        ln_id_df[f'{eta}_AVG'] = ln_id_df[columns].mean(axis=1)

    ln_id_df['Overall_Avg'] = ln_id_df[ensembles].mean(axis=1)

    for prc in precipitation_products1:
        ln_id_df[f'{prc}_VAR'] = (ln_id_df[f'{prc}_AVG'] - ln_id_df['Overall_Avg'])**2

    for eta in evapotranspiration_products1:
        ln_id_df[f'{eta}_VAR'] = (ln_id_df[f'{eta}_AVG'] - ln_id_df['Overall_Avg'])**2

    num_precipitation_products1 = len(precipitation_products1)
    ln_id_df['Precipitation_Variance_Sum'] = ln_id_df[[f'{prc}_VAR' for prc in precipitation_products1]].sum(axis=1) / num_precipitation_products1

    num_evapotranspiration_products1 = len(evapotranspiration_products1)
    ln_id_df['Evapotranspiration_Variance_Sum'] = ln_id_df[[f'{eta}_VAR' for eta in evapotranspiration_products1]].sum(axis=1) / num_evapotranspiration_products1

    
    result_df_list.append(ln_id_df)
    
    ln_id_df2 = df[df['LN_ID'] == ln_id].copy()
    ln_id_df2 = ln_id_df2.clip(lower=0)
    
    # Compute the Cross-Variance Contributions (similar to earlier step but for every ensemble)
    for precip_product in precipitation_products2:
        precip_columns = [col for col in ln_id_df2.columns if col.endswith(precip_product)]
        precip_avg = ln_id_df2[precip_columns].mean(axis=1)
        for col in precip_columns:
            ln_id_df2[col + '_DIFF'] = ln_id_df2[col] - precip_avg

    for evapotrans_product in evapotranspiration_products2:
        evapotrans_columns = [col for col in ln_id_df2[ensembles] if col.startswith(evapotrans_product)]
        ln_id_df2[evapotrans_product + '_AVG'] = ln_id_df2[evapotrans_columns].mean(axis=1)

    ln_id_df2['Ensemble_Average'] = ln_id_df2[ensembles].mean(axis=1)

    for evapotrans_product in evapotranspiration_products2:
        for precip_product in precipitation_products2:
            precip_col = evapotrans_product + '_' +  precip_product + '_DIFF'
            evapotrans_avg_col = evapotrans_product + '_AVG'
            ln_id_df2[evapotrans_product + '_' + precip_product + '_COV'] = (ln_id_df2[precip_col] - ln_id_df2[evapotrans_avg_col] + ln_id_df2['Ensemble_Average'])**2

    ensemble_cov_columns = [col for col in ln_id_df2.columns if col.endswith('_COV')]
    average_ensemble_covariance = ln_id_df2[ensemble_cov_columns].sum(axis=1) / (num_precipitation_products1 * num_evapotranspiration_products1)

    ln_id_df2['Average_Ensemble_Variance'] = average_ensemble_covariance

    # Append the result DataFrame to the list
    result_df_list2.append(ln_id_df2)
    
# Concatenate the two lists of DataFrames into a single DataFrame
combined_df = pd.concat([pd.concat(result_df_list), pd.concat(result_df_list2)], axis=1)

# Reset the index of the combined DataFrame
combined_df.reset_index(drop=True, inplace=True)

#Select Columns to be exported to new Dataframe
selected_columns = ["LN_ID", "Precipitation_Variance_Sum", "Evapotranspiration_Variance_Sum", "Average_Ensemble_Variance"]

# Create a new DataFrame containing only the selected columns
selected_df = combined_df[selected_columns]

selected_df.to_csv('G:/001Project/Output/Sheets/cleaned_Results/irrigation_wUse/Var_partition/Var_partitioning_2021.csv', index=False)
selected_df

CPU times: total: 1h 11min 55s
Wall time: 1h 42min 12s


Unnamed: 0,LN_ID,LN_ID.1,Precipitation_Variance_Sum,Evapotranspiration_Variance_Sum,Average_Ensemble_Variance
0,2401,2401,2195.712133,4541.873152,1.238617e-27
1,2413,2413,54.891528,7843.227898,2.154116e-27
2,2416,2416,163.261266,857.612928,6.893172e-27
3,2422,2422,253.088774,3415.876800,1.723293e-27
4,2424,2424,450.933550,3136.134896,1.217076e-26
...,...,...,...,...,...
114867,2701140,2701140,224.403528,8573.903720,1.238617e-27
114868,2701142,2701142,144.690665,7731.624172,2.668910e+01
114869,2701143,2701143,64.183314,6494.760732,4.589467e+01
114870,2701144,2701144,229.911296,4320.079364,3.015763e-27


## Add "CLIMATE" Column to the VAR Dataset

In [15]:
## Add "CLIMATE" Column to the VAR Dataset
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

Var_data = pd.read_csv('G:/001Project/Output/Sheets/cleaned_Results/irrigation_wUse/Var_partition/Var_partitioning_2016.csv')
IWU_data = pd.read_csv('G:/001Project/Output/Sheets/cleaned_Results/irrigation_wUse/Section_IWU_2016.csv')

## merge the two dataframes
merged_df = pd.merge(Var_data, IWU_data, on='LN_ID')

# Replace values in the "CLIMATE" column
merged_df['CLIMATE'] = merged_df['CLIMATE'].replace({
    'BSh': 'Semi Arid',
    'BSk': 'Semi Arid',
    'Dfa': 'Humid',
    'Cfa': 'Humid',
    'Dwa': 'Humid'
})

#Select Columns to be exported to new Dataframe
selected_columns = ["LN_ID", "CLIMATE", "Precipitation_Variance_Sum", "Evapotranspiration_Variance_Sum", "Average_Ensemble_Variance"]

aggregated_df = merged_df[selected_columns]
aggregated_df.to_csv('G:/001Project/Output/Sheets/cleaned_Results/irrigation_wUse/Var_partition/W_Climate/Var_partitioning_2016.csv', index=False)

# aggregated_df = data_to_export.groupby('CLIMATE').agg({
#     'Precipitation_Variance_Sum': 'mean',
#     'Evapotranspiration_Variance_Sum': 'mean',
#     'Average_Ensemble_Variance': 'mean'
# }).reset_index()

# Calculate Variance_P as the sum of Precipitation_Variance_Sum and Average_Ensemble_Variance
aggregated_df['Variance_P'] = aggregated_df['Precipitation_Variance_Sum'] + aggregated_df['Average_Ensemble_Variance']

# Calculate Variance_ET as the sum of Evapotranspiration_Variance_Sum and Average_Ensemble_Variance
aggregated_df['Variance_ET'] = aggregated_df['Evapotranspiration_Variance_Sum'] + aggregated_df['Average_Ensemble_Variance']

# Calculate Total_Variance as the sum of Variance_P and Variance_ET
aggregated_df['Total_Variance'] = aggregated_df['Variance_P'] + aggregated_df['Variance_ET']

# Calculate the percentage of Variance_P and Variance_ET from Total_Variance
aggregated_df['Percentage_Variance_P'] = (aggregated_df['Variance_P'] / aggregated_df['Total_Variance']) * 100
aggregated_df['Percentage_Variance_ET'] = (aggregated_df['Variance_ET'] / aggregated_df['Total_Variance']) * 100

# Save dataframe to a CSV file
aggregated_df.to_csv('G:/001Project/Output/Sheets/cleaned_Results/irrigation_wUse/Var_partition/W_Climate/Per_Variance/Percentage_var_2016.csv', index=False)