In [1]:
import os
from PIL import Image
import numpy as np
import pandas as pd

# Define the base path where your 'assets' folder is located.
# Assuming 'assets' is in the same directory as your script or notebook.
BASE_PATH = 'assets'
PERM_FOLDER = os.path.join(BASE_PATH, 'permeability')
PORO_FOLDER = os.path.join(BASE_PATH, 'porosity')

# Define the range of sample numbers we will process for images.
# Based on our previous discussion, porosity maps go up to 756.
MIN_SAMPLE_NUM = 1
MAX_SAMPLE_NUM = 756 # As confirmed, porosity maps go up to poro_map_0756

permeability_maps = []
porosity_maps = []
processed_sample_numbers = [] # To keep track of samples successfully processed

print("Starting image loading and preprocessing...")

for sample_num in range(MIN_SAMPLE_NUM, MAX_SAMPLE_NUM + 1):
    perm_filename = os.path.join(PERM_FOLDER, f'perm_map_{sample_num:04d}.tiff')
    poro_filename = os.path.join(PORO_FOLDER, f'poro_map_{sample_num:04d}.tiff')

    try:
        # Load permeability map
        with Image.open(perm_filename) as perm_img:
            perm_array = np.array(perm_img, dtype=np.float32)
            # Normalize to [0, 1] range. Assuming max pixel value is known or it's implicitly handled.
            # If data is already normalized or has a specific max value (e.g., 255 for 8-bit images), adjust accordingly.
            # For scientific data, max_value might be higher or vary, so we'll normalize by its max value
            # or a theoretical maximum if known. Here, assuming we need to normalize based on current max.
            perm_array_normalized = perm_array / perm_array.max() if perm_array.max() > 0 else perm_array
            permeability_maps.append(perm_array_normalized)

        # Load porosity map
        with Image.open(poro_filename) as poro_img:
            poro_array = np.array(poro_img, dtype=np.float32)
            # Normalize to [0, 1] range
            poro_array_normalized = poro_array / poro_array.max() if poro_array.max() > 0 else poro_array
            porosity_maps.append(poro_array_normalized)

        processed_sample_numbers.append(sample_num)

    except FileNotFoundError:
        print(f"Warning: Missing file for sample {sample_num}. Skipping.")
        # If a file is missing for a sample, we should not include this sample
        # in our final image datasets to maintain consistency.
        continue
    except Exception as e:
        print(f"Error processing sample {sample_num}: {e}. Skipping.")
        continue

# Convert lists of arrays to a single NumPy array
# Add a channel dimension for CNN input (e.g., (N, H, W, C) where C=1 for grayscale images)
permeability_maps_np = np.array(permeability_maps)[..., np.newaxis]
porosity_maps_np = np.array(porosity_maps)[..., np.newaxis]

print(f"\nFinished image loading and preprocessing for {len(processed_sample_numbers)} samples.")
print(f"Shape of processed permeability maps: {permeability_maps_np.shape}")
print(f"Shape of processed porosity maps: {porosity_maps_np.shape}")

# Now, we need to filter our numerical dataframe one more time
# to ensure it only contains the sample numbers for which images were successfully loaded.
# Use df_filtered_by_images from previous step.
# Make sure df_filtered_by_images is defined, if not, re-run the previous cells or define it here for execution
# For continuity, let's assume df_filtered_by_images is already defined from previous steps.

# If df_filtered_by_images wasn't already available from the previous run,
# uncomment and run these lines to ensure it's defined:
# df = pd.read_csv('data.xlsx - Sheet1.csv')
# df_cleaned = df.dropna(subset=['Initial Sw', 'Oil Rate (m3/day)', 'Cumulative Oil (M m3)'])
# df_filtered_by_images = df_cleaned[df_cleaned['sample number'] <= MAX_SAMPLE_NUM].copy()


# Filter the numerical data to match the successfully loaded images
df_final_numerical = df_filtered_by_images[df_filtered_by_images['sample number'].isin(processed_sample_numbers)].copy()

print(f"\nShape of final numerical DataFrame after aligning with processed images: {df_final_numerical.shape}")
print(f"Number of unique samples in final numerical DataFrame: {df_final_numerical['sample number'].nunique()}")
print("First few rows of the final numerical DataFrame:")
print(df_final_numerical.head())

# At this point, you have:
# - permeability_maps_np: A NumPy array of all processed permeability maps.
# - porosity_maps_np: A NumPy array of all processed porosity maps.
# - df_final_numerical: A pandas DataFrame containing the numerical data (Initial Sw, Oil Rate, Cumulative Oil)
#                       for exactly the same samples for which images were loaded.

# The next step will be to prepare these data structures for input into a deep neural network,
# including splitting them into training, validation, and test sets.

Starting image loading and preprocessing...


  poro_array_normalized = poro_array / poro_array.max() if poro_array.max() > 0 else poro_array



Finished image loading and preprocessing for 756 samples.
Shape of processed permeability maps: (756, 64, 64, 1)
Shape of processed porosity maps: (756, 64, 64, 1)


NameError: name 'df_filtered_by_images' is not defined

In [4]:
from PIL import Image
import numpy as np

sample_image_path = './assets/permeability/perm_map_0001.tiff'
img = Image.open(sample_image_path)
img_array = np.array(img)

print(f"نوع داده‌ای پیکسل‌ها: {img_array.dtype}")
print(f"کمترین مقدار پیکسل در تصویر نمونه: {img_array.min()}")
print(f"بیشترین مقدار پیکسل در تصویر نمونه: {img_array.max()}")

نوع داده‌ای پیکسل‌ها: float32
کمترین مقدار پیکسل در تصویر نمونه: 0.003000000026077032
بیشترین مقدار پیکسل در تصویر نمونه: 261.2349853515625


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Assuming permeability_maps_np, porosity_maps_np, and df_final_numerical are already defined from previous successful execution.
# If you are restarting the session, you might need to re-run the previous code cells to define these variables.

# --- 1. Prepare Numerical Inputs and Targets ---
# The numerical DataFrame (df_final_numerical) already contains Initial Sw, Oil Rate, and Cumulative Oil
# aligned with the processed image sample numbers.

# Sort the numerical dataframe by 'sample number' and 'Month' to ensure consistency
df_final_numerical = df_final_numerical.sort_values(by=['sample number', 'Month (2026)']).reset_index(drop=True)

# Extract Initial Sw values. Since Sw is per sample, and images are per sample,
# we need to replicate Sw for each month's entry for that sample in the training/splitting process.
# However, a cleaner approach for multi-modal input is to pass Sw as a separate branch in the model.
# For splitting, we will use unique sample numbers.

# Get unique sample numbers for splitting
all_sample_numbers = df_final_numerical['sample number'].unique()

# Separate the numerical input (Initial Sw) and the target variables
# Note: Initial Sw is constant per sample, so we can extract it once per unique sample.
# The `Month (2026)` column also serves as a numerical input.
numerical_input_data = df_final_numerical[['Initial Sw', 'Month (2026)']].values
target_oil_rate = df_final_numerical['Oil Rate (m3/day)'].values
target_cumulative_oil = df_final_numerical['Cumulative Oil (M m3)'].values

# --- 2. Splitting the Dataset (by Sample Number) ---
# It's crucial to split based on unique sample numbers to prevent data leakage.
# Data from all months for a given sample must go into the same split (train/val/test).

# We will split the `all_sample_numbers` first, then use these lists of sample numbers
# to filter the image arrays and the numerical DataFrame.

# First, split into training and a temporary set (validation + test)
train_samples, temp_samples = train_test_split(
    all_sample_numbers,
    test_size=0.3, # e.g., 70% train, 30% temp
    random_state=42 # for reproducibility
)

# Then, split the temporary set into validation and test sets
val_samples, test_samples = train_test_split(
    temp_samples,
    test_size=0.5, # 50% of temp, so 15% of total for validation and 15% for test
    random_state=42 # for reproducibility
)

print(f"\nTotal unique samples: {len(all_sample_numbers)}")
print(f"Training samples: {len(train_samples)}")
print(f"Validation samples: {len(val_samples)}")
print(f"Test samples: {len(test_samples)}")

# --- 3. Create Dictionaries/Mapping for Image Data ---
# Map sample number to its index in the image arrays (0 to 755 for 756 samples)
# This mapping assumes that images were loaded sequentially for sample numbers 1 to 756.
# We need to map the actual sample number (e.g., 1) to its index in the np arrays (e.g., 0).
# The 'processed_sample_numbers' list generated during image loading gives us this mapping.

sample_num_to_img_idx = {num: idx for idx, num in enumerate(processed_sample_numbers)}

# --- 4. Prepare Final Train/Validation/Test Datasets ---
# Initialize lists to hold the data for each split
X_train_perm, X_train_poro, X_train_sw_month = [], [], []
y_train_oil_rate, y_train_cumulative_oil = [], []

X_val_perm, X_val_poro, X_val_sw_month = [], [], []
y_val_oil_rate, y_val_cumulative_oil = [], []

X_test_perm, X_test_poro, X_test_sw_month = [], [], []
y_test_oil_rate, y_test_cumulative_oil = [], []


# Function to append data to lists for a given set of samples
def populate_data_lists(sample_list, perm_list, poro_list, sw_month_list, oil_rate_list, cumulative_oil_list, df_data, img_perm_arr, img_poro_arr, mapping):
    for sample in sample_list:
        # Get the corresponding rows from the numerical DataFrame for this sample
        sample_rows = df_data[df_data['sample number'] == sample]

        # Get the image index for this sample
        img_idx = mapping.get(sample)
        if img_idx is None:
            # This should ideally not happen if filtering was done correctly
            # but acts as a safeguard.
            print(f"Error: Image index not found for sample {sample}. Skipping.")
            continue

        # Append data for each month of this sample
        for _, row in sample_rows.iterrows():
            perm_list.append(img_perm_arr[img_idx])
            poro_list.append(img_poro_arr[img_idx])
            sw_month_list.append([row['Initial Sw'], row['Month (2026)']]) # Combine Sw and Month as a single numerical input for this entry
            oil_rate_list.append(row['Oil Rate (m3/day)'])
            cumulative_oil_list.append(row['Cumulative Oil (M m3)'])

# Populate training data
print("\nPopulating training data...")
populate_data_lists(
    train_samples,
    X_train_perm, X_train_poro, X_train_sw_month,
    y_train_oil_rate, y_train_cumulative_oil,
    df_final_numerical, permeability_maps_np, porosity_maps_np, sample_num_to_img_idx
)

# Populate validation data
print("Populating validation data...")
populate_data_lists(
    val_samples,
    X_val_perm, X_val_poro, X_val_sw_month,
    y_val_oil_rate, y_val_cumulative_oil,
    df_final_numerical, permeability_maps_np, porosity_maps_np, sample_num_to_img_idx
)

# Populate test data
print("Populating test data...")
populate_data_lists(
    test_samples,
    X_test_perm, X_test_poro, X_test_sw_month,
    y_test_oil_rate, y_test_cumulative_oil,
    df_final_numerical, permeability_maps_np, porosity_maps_np, sample_num_to_img_idx
)

# Convert lists to NumPy arrays
X_train_perm_np = np.array(X_train_perm)
X_train_poro_np = np.array(X_train_poro)
X_train_sw_month_np = np.array(X_train_sw_month)
y_train_oil_rate_np = np.array(y_train_oil_rate)
y_train_cumulative_oil_np = np.array(y_train_cumulative_oil)

X_val_perm_np = np.array(X_val_perm)
X_val_poro_np = np.array(X_val_poro)
X_val_sw_month_np = np.array(X_val_sw_month)
y_val_oil_rate_np = np.array(y_val_oil_rate)
y_val_cumulative_oil_np = np.array(y_val_cumulative_oil)

X_test_perm_np = np.array(X_test_perm)
X_test_poro_np = np.array(X_test_poro)
X_test_sw_month_np = np.array(X_test_sw_month)
y_test_oil_rate_np = np.array(y_test_oil_rate)
y_test_cumulative_oil_np = np.array(y_test_cumulative_oil)

print("\n--- Final Dataset Shapes ---")
print(f"X_train_perm_np shape: {X_train_perm_np.shape}")
print(f"X_train_poro_np shape: {X_train_poro_np.shape}")
print(f"X_train_sw_month_np shape: {X_train_sw_month_np.shape}")
print(f"y_train_oil_rate_np shape: {y_train_oil_rate_np.shape}")
print(f"y_train_cumulative_oil_np shape: {y_train_cumulative_oil_np.shape}")

print(f"\nX_val_perm_np shape: {X_val_perm_np.shape}")
print(f"X_val_poro_np shape: {X_val_poro_np.shape}")
print(f"X_val_sw_month_np shape: {X_val_sw_month_np.shape}")
print(f"y_val_oil_rate_np shape: {y_val_oil_rate_np.shape}")
print(f"y_val_cumulative_oil_np shape: {y_val_cumulative_oil_np.shape}")

print(f"\nX_test_perm_np shape: {X_test_perm_np.shape}")
print(f"X_test_poro_np shape: {X_test_poro_np.shape}")
print(f"X_test_sw_month_np shape: {X_test_sw_month_np.shape}")
print(f"y_test_oil_rate_np shape: {y_test_oil_rate_np.shape}")
print(f"y_test_cumulative_oil_np shape: {y_test_cumulative_oil_np.shape}")

# At this point, your data is fully prepared and split into:
# Training Data: (X_train_perm_np, X_train_poro_np, X_train_sw_month_np) for inputs
#                (y_train_oil_rate_np, y_train_cumulative_oil_np) for targets
# Validation Data: Similar arrays for validation
# Test Data: Similar arrays for testing

# The next step is to define and build the Deep Neural Network architecture.

NameError: name 'df_final_numerical' is not defined

In [1]:
import pandas as pd
import numpy as np
import joblib
from tensorflow import keras

# 1. Load data
data = pd.read_csv('processed_tabular_data.csv')
X_numerical_all = data['Initial Sw'].values.reshape(-1, 1)
sample_numbers = data['sample number'].values

# 2. Load scalers and model
num_scaler = joblib.load('num_scaler.gz')
y_scaler = joblib.load('y_scaler.gz')
model = keras.models.load_model('best_model.keras')

# 3. Prepare image data (فرض بر اینه که X_image رو داری یا باید بسازی)
# X_image = ...  # باید این رو طبق پروژه‌ات بسازی یا لود کنی

# 4. Scale numerical data
X_numerical_scaled_all = num_scaler.transform(X_numerical_all)

# 5. Scale image data (فرض بر اینه که image_data مرتب با دیتاسته)
X_image_scaled_all = X_image.copy()
if (perm_max - perm_min) != 0:
    X_image_scaled_all[:, :, :, 0] = (X_image_scaled_all[:, :, :, 0] - perm_min) / (perm_max - perm_min)
if (poro_max - poro_min) != 0:
    X_image_scaled_all[:, :, :, 1] = (X_image_scaled_all[:, :, :, 1] - poro_min) / (poro_max - poro_min)

# 6. Predict
predictions_scaled = model.predict({
    'image_input': np.array(X_image_scaled_all),
    'numerical_input': np.array(X_numerical_scaled_all)
})

# 7. Inverse scaling
predictions_original = y_scaler.inverse_transform(predictions_scaled)

# 8. Post-process negative values
predictions_original[predictions_original < 0] = 0

# 9. Save to DataFrame
target_cols = ['param1', 'param2', ..., 'param12']  # اسامی ۱۲ پارامتر خروجی مدل
df_predictions = pd.DataFrame(predictions_original, columns=target_cols)
df_predictions.insert(0, 'sample number', sample_numbers)

# 10. Save to Excel
df_predictions.to_excel('full_dataset_predictions.xlsx', index=False)
print("✅ Predictions saved to 'full_dataset_predictions.xlsx'")

  saveable.load_own_variables(weights_store.get(inner_path))


NameError: name 'X_image' is not defined