In [1]:
import pandas as pd
import numpy as np
import os
import csv

In [9]:
# Path to the directory containing the CSV files
input_dir = "C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/"

# List of files to process
files_to_process = [
    "L1_cum_interpolated_output.csv",
    "L2_cum_interpolated_output.csv",
    "L3_cum_interpolated_output.csv",
    "L4_cum_interpolated_output.csv",
]

# Columns to drop while processing
columns_to_drop = [
    'WellName', 'Well', 'Point', 'X', 'Y', 'Zland', 'INTERVALSTART', 'INTERVALEND',
    'Coarse', 'Kxy', 'SY', 'Ss', 'Kv', 'INTERVAL_MIDPOINT', 'INTERVALLENGTH','X.1', 'Y.1',
]

# Directory to save cleaned files
cleaned_dir = os.path.join(input_dir, "cleaned_files")
os.makedirs(cleaned_dir, exist_ok=True)

# Process each file to drop columns
for file_name in files_to_process:
    file_path = os.path.join(input_dir, file_name)
    
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        continue

    # Load the file
    print(f"Processing file: {file_path}")
    layer_df = pd.read_csv(file_path, low_memory=False)
    
    # Drop the specified columns if they exist
    columns_present = [col for col in columns_to_drop if col in layer_df.columns]
    print(f"Dropping columns: {columns_present}")
    layer_df = layer_df.drop(columns=columns_present)

    # Save the cleaned file
    cleaned_file_path = os.path.join(cleaned_dir, file_name)
    layer_df.to_csv(cleaned_file_path, index=False)
    print(f"Cleaned file saved: {cleaned_file_path}")

Processing file: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/L1_cum_interpolated_output.csv
Dropping columns: ['WellName', 'Well', 'Point', 'X', 'Y', 'Zland', 'INTERVALSTART', 'INTERVALEND', 'Coarse', 'Kxy', 'SY', 'Ss', 'Kv', 'INTERVAL_MIDPOINT', 'INTERVALLENGTH', 'X.1', 'Y.1']
Cleaned file saved: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/cleaned_files\L1_cum_interpolated_output.csv
Processing file: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/L2_cum_interpolated_output.csv
Dropping columns: ['WellName', 'Well', 'Point', 'X', 'Y', 'Zland', 'INTERVALSTART', 'INTERVALEND', 'Coarse', 'Kxy', 'SY', 'Ss', 'Kv', 'INTERVAL_MIDPOINT', 'INTERVALLENGTH', 'X.1', 'Y.1']
Cleaned file saved: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/cleaned_files\L2_cum_interpolated_output.csv
Processing file: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/L3_cum_interpolated_output.csv
Dropping columns: ['WellName', 'Well', 'Point', 'X', 'Y', 'Zland', 'INTERVALSTART', 'IN

In [11]:
# Combine the cleaned files into a single DataFrame

# Path to the directory containing the cleaned files
cleaned_dir = "C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/cleaned_files"

# Initialize an empty DataFrame for the combined result
combined_df = None

# Process each cleaned file
for file_name in files_to_process:
    cleaned_file_path = os.path.join(cleaned_dir, file_name)
    
    if not os.path.exists(cleaned_file_path):
        print(f"File not found: {cleaned_file_path}")
        continue

    # Load the cleaned file
    print(f"Processing cleaned file: {cleaned_file_path}")
    layer_df = pd.read_csv(cleaned_file_path, low_memory=False)

    # Rename 'RASTERVALU' column to the current layer
    layer_name = file_name.split('_')[0]  # Extract layer name (e.g., 'L1', 'L2', etc.)
    layer_df = layer_df.rename(columns={'RASTERVALU': layer_name})
    
    # Merge with the combined DataFrame
    if combined_df is None:
        combined_df = layer_df[['OBJECTID', layer_name]]  # Initialize with the first file's data
    else:
        combined_df = pd.merge(combined_df, layer_df[['OBJECTID', layer_name]], on=['OBJECTID'], how='inner')

# Save the combined DataFrame to a new CSV file
output_file = os.path.join(input_dir, "Combined_Cumulative_Layers.csv")
if combined_df is not None:
    combined_df.to_csv(output_file, index=False)
    print(f"Combined DataFrame saved to: {output_file}")
else:
    print("No valid data processed.")

Processing cleaned file: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/cleaned_files\L1_cum_interpolated_output.csv
Processing cleaned file: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/cleaned_files\L2_cum_interpolated_output.csv
Processing cleaned file: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/cleaned_files\L3_cum_interpolated_output.csv
Processing cleaned file: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/cleaned_files\L4_cum_interpolated_output.csv
Combined DataFrame saved to: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/Combined_Cumulative_Layers.csv


In [13]:
# Path to the directory containing the CSV files
input_dir = "C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/"

# Files to concatenate
filtered_file = os.path.join(input_dir, "L1_cum_interpolated_output.csv")
combined_file = os.path.join(input_dir, "Combined_Cumulative_Layers.csv")

# Output file path
output_file = os.path.join(input_dir, "Updated_Filtered_WCRs_AEM.csv")

# Check if the files exist
if not os.path.exists(filtered_file):
    raise FileNotFoundError(f"Filtered file not found: {filtered_file}")
if not os.path.exists(combined_file):
    raise FileNotFoundError(f"Combined file not found: {combined_file}")

# Read the CSV files
print(f"Reading file: {filtered_file}")
filtered_df = pd.read_csv(filtered_file, low_memory=False)

print(f"Reading file: {combined_file}")
combined_df = pd.read_csv(combined_file, low_memory=False)

# Ensure the files have the same number of rows
if len(filtered_df) != len(combined_df):
    raise ValueError("The two files have a different number of rows. Cannot concatenate column-wise.")

# Concatenate the two DataFrames column-wise
print("Concatenating DataFrames column-wise...")
updated_df = pd.concat([filtered_df, combined_df], axis=1)

# Save the updated DataFrame to a new file
updated_df.to_csv(output_file, index=False)
print(f"Updated file saved to: {output_file}")

Reading file: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/L1_cum_interpolated_output.csv
Reading file: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/Combined_Cumulative_Layers.csv
Concatenating DataFrames column-wise...
Updated file saved to: C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/Updated_Filtered_WCRs_AEM.csv


In [15]:
columns_to_drop = [
        'RASTERVALU', 'X.1', 'Y.1', 'OBJECTID'
]

# Drop specified columns
updated_df = updated_df.drop(columns=columns_to_drop, errors='ignore')

# Display the first few rows
updated_df.head()

Unnamed: 0,WellName,Well,Point,X,Y,Zland,INTERVALSTART,INTERVALEND,Coarse,Kxy,SY,Ss,Kv,INTERVAL_MIDPOINT,INTERVALLENGTH,L1,L2,L3,L4
0,WCR2024-000122,308698,1,665784.728675,4109370.0,740.8212179,0.0,55.0,12.0,12.0,12.0,12.0,12.0,27.5,55.0,150.549377,2694.378174,2742.664551,3472.556885
1,WCR2024-000122,308698,2,665784.728675,4109370.0,740.8212179,55.0,57.0,5.0,0.016402,3.0,0.0025,0.0005,56.0,112.0,150.549377,2694.378174,2742.664551,3472.556885
2,WCR2024-000122,308698,3,665784.728675,4109370.0,740.8212179,57.0,63.0,5.0,0.016402,3.0,0.0025,0.0005,60.0,120.0,150.549377,2694.378174,2742.664551,3472.556885
3,WCR2024-000122,308698,4,665784.728675,4109370.0,740.8212179,63.0,75.0,5.0,0.016402,3.0,0.0025,0.0005,69.0,138.0,150.549377,2694.378174,2742.664551,3472.556885
4,WCR2024-000122,308698,5,665784.728675,4109370.0,740.8212179,75.0,77.0,5.0,0.016402,3.0,0.0025,0.0005,76.0,152.0,150.549377,2694.378174,2742.664551,3472.556885


# IntervalEND > = Layer base's C2VSimFG , select the data and save selected as a new dataframe

In [18]:
# List of cumulative layers
layers = ['L1', 'L2', 'L3', 'L4']

# Directory to save the output files
output_dir = "C:/Users/betebari/Documents/C2VSim_Texture/Aq-Params/Grouped_Results/"
os.makedirs(output_dir, exist_ok=True)

# Initialize dictionaries to store data for each layer and grouped results
layer_dataframes = {}
grouped_results = {}

# Track previously processed rows to exclude them
previous_rows = pd.Series([False] * len(updated_df))

def process_group(group, layer_column):
    """Apply logic to adjust INTERVALEND based on the current layer depth."""
    last_interval_end = group.iloc[-1]['INTERVALEND']
    depth_for_layer = group.iloc[-1][layer_column]

    if last_interval_end >= depth_for_layer:
        group.at[group.index[-1], 'INTERVALEND'] = depth_for_layer
    elif depth_for_layer >= 0.75 * last_interval_end:
        group = None
    return group

# Process each layer
for layer in layers:
    print(f"Processing layer: {layer}")

    # Step 1: Filter rows for the current layer, excluding previously processed rows
    current_layer_rows = (updated_df['INTERVALSTART'] <= updated_df[layer]) & ~previous_rows
    layer_df = updated_df[current_layer_rows]

    # Step 2: Sort the DataFrame by 'WellName' and 'INTERVALSTART'
    layer_df = layer_df.sort_values(by=['WellName', 'INTERVALSTART'])

    # Step 3: Replace -9999.0 in the 'Kxy' column with 0.0001 and handle NaN values
    layer_df['Kxy'] = layer_df['Kxy'].replace(-9999.0, 0.0001).fillna(0.0001)

    # Step 4: Add calculated columns
    layer_df['MultiplyCoarse'] = layer_df['INTERVALLENGTH'] * layer_df['Coarse']
    layer_df['MultiplyKxy'] = layer_df['INTERVALLENGTH'] * layer_df['Kxy']
    layer_df['MultiplyKv'] = layer_df['INTERVALLENGTH'] * layer_df['Kv']
    layer_df['MultiplySy'] = layer_df['INTERVALLENGTH'] * layer_df['SY']
    layer_df['MultiplySs'] = layer_df['INTERVALLENGTH'] * layer_df['Ss']

    # Debugging: Check statistics for `Kxy`
    print(f"Statistics for 'Kxy' in {layer}:")
    print(layer_df['Kxy'].describe())

    # Step 5: Apply group-specific logic
    filtered_groups = []
    for _, group in layer_df.groupby('WellName'):
        processed_group = process_group(group, layer)
        if processed_group is not None:
            filtered_groups.append(processed_group)

    # Combine the filtered groups back into a DataFrame
    layer_dataframes[layer] = pd.concat(filtered_groups) if filtered_groups else pd.DataFrame()

    # Update `previous_rows` to include the rows processed in this layer
    previous_rows = previous_rows | current_layer_rows

    # Display the first few rows of the processed DataFrame
    print(f"First few rows of the DataFrame for {layer}:")
    print(layer_dataframes[layer].head())

Processing layer: L1
Statistics for 'Kxy' in L1:
count    5.095101e+06
mean     2.238967e+01
std      3.731831e+01
min      1.431496e-07
25%      1.361523e-01
50%      2.472949e+00
75%      1.911594e+01
max      2.500000e+02
Name: Kxy, dtype: float64
First few rows of the DataFrame for L1:
        WellName  Well  Point              X             Y            Zland  \
347403         1     1     51  713861.829177  4.107089e+06  103.36000330752   
2224612        1     1     71  819457.149950  3.899480e+06  927.58002968256   
4805721        1     1     25  620122.947435  4.320000e+06   52.11000166752   
7535827        1     1      1  578943.523506  4.395457e+06   172.4000055168   
4805722        1     1     26  620122.947435  4.320000e+06   52.11000166752   

         INTERVALSTART  INTERVALEND  Coarse     Kxy  ...  INTERVALLENGTH  \
347403             0.0     3.000000     8.0  0.0001  ...        3.000000   
2224612            0.0    25.000001     5.0  0.0001  ...       25.000001   
480572

In [None]:
# Compute averages for each parameter in each layer
for layer in layers:
    print(f"Processing grouped operations for {layer}...")

    # Access the processed DataFrame for the current layer
    layer_df = layer_dataframes[layer]

    # Ensure valid data for 'Multiply' and 'Interval_Length'
    if 'MultiplyCoarse' not in layer_df.columns or 'INTERVALLENGTH' not in layer_df.columns:
        raise ValueError(f"'Multiply' or 'Interval_Length' column is missing in {layer}")

    # Ensure required columns exist for each parameter
    for col in ['Coarse','Kxy','Kv', 'Ss', 'SY']:
        if col not in layer_df.columns:
            layer_df[col] = 0  # Assign a default value if the column is missing

    # Compute averages for each parameter using distinct logic
    grouped_df = layer_df.groupby(['WellName', 'X', 'Y']).apply(
        lambda group: pd.Series({
            'Avg_Coarse': group['MultiplyCoarse'].sum() / group['INTERVALLENGTH'].sum(),
            'Avg_Kxy': group['MultiplyKxy'].sum() / group['INTERVALLENGTH'].sum(),
            'Avg_Kv': group['MultiplyKv'].sum() / group['INTERVALLENGTH'].sum(),
            'Avg_Ss': group['MultiplySs'].sum() / group['INTERVALLENGTH'].sum(),
            'Avg_Sy': group['MultiplySy'].sum() / group['INTERVALLENGTH'].sum(),
        })
    ).reset_index()

    # Store the grouped result
    grouped_results[layer] = grouped_df

    # Display the first few rows of the results
    print(f"Grouped results for {layer}:")
    print(grouped_df.head())

# Save the grouped results to CSV files
for layer, grouped_df in grouped_results.items():
    output_file = os.path.join(output_dir, f"Grouped_Averages_{layer}.csv")
    grouped_df.to_csv(output_file, index=False)
    print(f"Grouped averages for {layer} saved to: {output_file}")

print("Processing complete.")

Processing grouped operations for L1...


  grouped_df = layer_df.groupby(['WellName', 'X', 'Y']).apply(


Grouped results for L1:
  WellName              X             Y  Avg_Coarse  Avg_Kxy    Avg_Kv  \
0        1  578943.523506  4.395457e+06   49.345973   0.0001  0.230451   
1        1  620122.947435  4.320000e+06   42.388978   0.0001  0.176190   
2        1  713861.829177  4.107089e+06   35.072852   0.0001  0.182915   
3        1  819457.149950  3.899480e+06   30.757834   0.0001  0.234428   
4       10  587088.229224  4.280778e+06   37.165659   0.0001  0.203169   

     Avg_Ss     Avg_Sy  
0  0.001553  10.847443  
1  0.001235  17.056492  
2  0.001195  13.817648  
3  0.001564  15.644755  
4  0.001576  11.186811  
Processing grouped operations for L2...


  grouped_df = layer_df.groupby(['WellName', 'X', 'Y']).apply(


Grouped results for L2:
  WellName              X             Y  Avg_Coarse  Avg_Kxy    Avg_Kv  \
0        1  578943.523506  4.395457e+06   55.980616   0.0001  0.300086   
1    10002  764899.120947  4.105654e+06   13.841682   0.0001  0.068814   
2    10004  608454.874527  4.193323e+06   36.575095   0.0001  0.002518   
3    10005  608453.614092  4.193312e+06   17.200000   0.0001  0.023642   
4    10005  764473.781381  4.052965e+06   29.267875   0.0001  0.220896   

     Avg_Ss     Avg_Sy  
0  0.001031  12.596351  
1  0.002012   8.798748  
2  0.003618  11.691065  
3  0.000365  17.776364  
4  0.001618  14.913320  
Processing grouped operations for L3...


  grouped_df = layer_df.groupby(['WellName', 'X', 'Y']).apply(


Grouped results for L3:
  WellName              X             Y  Avg_Coarse  Avg_Kxy        Avg_Kv  \
0    10056  820781.580838  4.065784e+06   10.000000   0.0001  1.000000e-07   
1    10071  800910.162596  4.082171e+06   32.500000   0.0001  2.502500e-01   
2    10072  800495.833418  4.081557e+06   32.500000   0.0001  2.502500e-01   
3    10083  740406.153000  4.003619e+06    9.107812   0.0001  3.780641e-02   
4    10098  781237.370694  4.090744e+06   32.291667   0.0001  2.483580e-01   

     Avg_Ss     Avg_Sy  
0  0.000001   0.090000  
1  0.001500  16.500000  
2  0.001500  16.500000  
3  0.002351   5.016563  
4  0.001508  16.397727  
Processing grouped operations for L4...
