In [8]:
# Cell 1: Import Libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model, Model, Sequential
from tensorflow.keras.layers import Input, Dense, Concatenate, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os
import joblib
import random

print("TensorFlow Version:", tf.__version__)
print("Libraries imported.")

TensorFlow Version: 2.19.0
Libraries imported.


In [9]:
# Cell 2: Load All Datasets and Pre-trained Models
print("Loading all datasets and pre-trained models...")

# --- Load Datasets ---
try:
    env_df = pd.read_csv('../data/simulated/environmental_data.csv', parse_dates=['timestamp'])
    soil_df = pd.read_csv('../data/simulated/soil_microbe_data.csv')
    image_dir = '../data/raw/leaf_images/'
    if not os.path.exists(image_dir) or not os.listdir(image_dir): raise FileNotFoundError
except FileNotFoundError as e:
    print(f"ERROR: A required data file or directory is missing. Please ensure all previous steps are complete. Details: {e}")
    raise

# --- Load Models ---
try:
    env_model_path = '../backend/models/environmental_yield_model.h5'
    soil_model_path = '../backend/models/soil_microbe_yield_model.h5'
    vision_model_path = '../backend/models/leaf_vision_model.h5'
    soil_preprocessor_path = '../backend/models/soil_data_preprocessor.joblib'

    # FIX: Added compile=False to prevent deserialization errors.
    # We only need the model architecture and weights, not the old training configuration.
    env_model = load_model(env_model_path, compile=False)
    soil_model = load_model(soil_model_path, compile=False)
    vision_model = load_model(vision_model_path, compile=False)
    soil_preprocessor = joblib.load(soil_preprocessor_path)
except Exception as e:
    print(f"ERROR: Failed to load a model or preprocessor. Make sure all previous notebooks ran successfully. Details: {e}")
    raise

print("All datasets and models loaded successfully.")

Loading all datasets and pre-trained models...
All datasets and models loaded successfully.


In [14]:
# Cell 3: Prepare Data for Multi-Input Model
print("\nPreparing data for the fusion model...")

# We need to create a single dataset where each row corresponds to one final yield prediction
# and has pointers to the environmental sequence, soil data, and a representative leaf image.

# Let's use the soil_df as our base, as it has one row per plot_id and contains the final yield.
fusion_df = soil_df.copy()

# 1. Prepare Environmental Data Sequences
SEQUENCE_LENGTH = 168 # Must be the same as in notebook 2
env_sequences = []
# For each plot, we'll just grab the first sequence of environmental data for simplicity
for plot_id in fusion_df['plot_id']:
    plot_env_data = env_df[env_df['plot_id'] == plot_id].head(SEQUENCE_LENGTH)
    # Note: In a real scenario, you might average sequences or use a more complex sampling method.
    env_sequences.append(plot_env_data[['temperature', 'humidity', 'soil_moisture', 'soil_ph']].values)

X_env = np.array(env_sequences)
# Normalize the environmental data just like we did in notebook 2
# This requires a new scaler or reusing the old one if saved. For simplicity, we'll fit a new one.
from sklearn.preprocessing import MinMaxScaler
env_scaler = MinMaxScaler()
# We need to reshape for the scaler, then reshape back
X_env_reshaped = X_env.reshape(-1, X_env.shape[-1])
X_env_scaled_reshaped = env_scaler.fit_transform(X_env_reshaped)
X_env = X_env_scaled_reshaped.reshape(X_env.shape)


# 2. Prepare Soil Data
X_soil_raw = fusion_df.drop(columns=['yield_kg_ha', 'plot_id', 'crop_mix'])
X_soil = soil_preprocessor.transform(X_soil_raw)


# 3. Prepare Image Data
# We need to associate each plot with a representative image.
# For this simulation, we'll randomly pick an image from a relevant category.
IMG_WIDTH, IMG_HEIGHT = 224, 224
image_paths = []
vision_classes = sorted(os.listdir(image_dir)) # Get class names like 'arecanut_healthy', etc.

for index, row in fusion_df.iterrows():
    # Create a plausible image category, e.g., 'banana_healthy'
    crop_name = row['primary_crop'].lower().replace(' ', '_')
    # Randomly assign a status
    status = random.choice(['healthy', 'pest_damage', 'nutrient_deficiency'])
    plausible_class = f"{crop_name}_{status}"
    
    # If that class folder doesn't exist, default to the first available one
    if plausible_class not in vision_classes:
        plausible_class = vision_classes[0]
        
    class_path = os.path.join(image_dir, plausible_class)
    random_image_name = random.choice(os.listdir(class_path))
    image_paths.append(os.path.join(class_path, random_image_name))

# Function to load and preprocess a single image
def preprocess_image(path):
    img = load_img(path, target_size=(IMG_WIDTH, IMG_HEIGHT))
    img_array = img_to_array(img)
    img_array /= 255.0 # Rescale
    return img_array

X_vision = np.array([preprocess_image(p) for p in image_paths])


# 4. Prepare Target Variable
y = fusion_df['yield_kg_ha'].values

print("Data preparation complete.")
print("Shape of Environmental Input (X_env):", X_env.shape)
print("Shape of Soil Input (X_soil):", X_soil.shape)
print("Shape of Vision Input (X_vision):", X_vision.shape)
print("Shape of Target (y):", y.shape)


Preparing data for the fusion model...
Data preparation complete.
Shape of Environmental Input (X_env): (15, 168, 4)
Shape of Soil Input (X_soil): (15, 9)
Shape of Vision Input (X_vision): (15, 224, 224, 3)
Shape of Target (y): (15,)


In [11]:
# Cell 4: Build the Fusion Model
print("\nBuilding the Fusion Model...")

# Freeze the layers of all base models so their weights are not changed during fusion training.
env_model.trainable = False
soil_model.trainable = False
vision_model.trainable = False

# Define the three input layers for our new model
input_env = Input(shape=(SEQUENCE_LENGTH, X_env.shape[2]), name='environmental_input')
input_soil = Input(shape=(X_soil.shape[1],), name='soil_input')
input_vision = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3), name='vision_input')

# --- FIX STARTS HERE ---
# Rebuild the branches to extract features from the pre-trained models.

# For the Sequential models, we create new models containing all layers except the final output layer.
env_branch = Sequential(env_model.layers[:-1], name="env_feature_extractor")
soil_branch = Sequential(soil_model.layers[:-1], name="soil_feature_extractor")

# The vision model was built with the Functional API, so we can create a branch from its input and intermediate layer.
vision_branch = Model(inputs=vision_model.input, outputs=vision_model.layers[-2].output, name="vision_feature_extractor")
# --- FIX ENDS HERE ---

# Pass the main inputs through their respective feature-extracting branches
env_features = env_branch(input_env)
soil_features = soil_branch(input_soil)
vision_features = vision_branch(input_vision)

# Concatenate (fuse) the features from all three branches
combined_features = Concatenate()([env_features, soil_features, vision_features])

# Add our new "head" model on top of the fused features to make the final prediction
x = Dense(128, activation='relu')(combined_features)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
final_output = Dense(1, name='yield_prediction')(x)

# Create the final fusion model
fusion_model = Model(inputs=[input_env, input_soil, input_vision], outputs=final_output)

# Compile the fusion model
fusion_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

fusion_model.summary()


Building the Fusion Model...


In [12]:
# Cell 5: Train the Fusion Model
print("\nTraining the fusion model...")

# NOTE: Our dataset is very small (15 plots), so training will be quick but not very robust.
# This code demonstrates the process; real-world use would require much more data.
history = fusion_model.fit(
    [X_env, X_soil, X_vision], # A list of the three input datasets
    y,
    epochs=50,
    validation_split=0.2,
    verbose=1
)

print("Fusion model training complete.")


Training the fusion model...
Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12s/step - loss: 1557691.6250 - mean_absolute_error: 1120.3102 - val_loss: 2368218.2500 - val_mean_absolute_error: 1388.8422
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 354ms/step - loss: 1521740.8750 - mean_absolute_error: 1105.7889 - val_loss: 2318738.7500 - val_mean_absolute_error: 1372.5343
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 347ms/step - loss: 1489623.3750 - mean_absolute_error: 1092.5873 - val_loss: 2270357.2500 - val_mean_absolute_error: 1356.3999
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 384ms/step - loss: 1464979.6250 - mean_absolute_error: 1080.6664 - val_loss: 2222358.5000 - val_mean_absolute_error: 1340.2046
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 447ms/step - loss: 1434936.6250 - mean_absolute_error: 1068.5778 - val_loss: 2175855.5000 - val_me

In [15]:
# Cell 6: Save the Final Fusion Model
print("\nSaving the final fusion model...")

model_dir = '../backend/models/'
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, 'fusion_yield_model.h5')
scaler_path = os.path.join(model_dir, 'env_data_scaler.joblib')

# Save the model
fusion_model.save(model_path)
print(f"Fusion model saved successfully to: {model_path}")

# Save the scaler used for the environmental data
joblib.dump(env_scaler, scaler_path)
print(f"Environmental data scaler saved successfully to: {scaler_path}")

print("\nThis is the final, unified model for your backend API.")




Saving the final fusion model...
Fusion model saved successfully to: ../backend/models/fusion_yield_model.h5
Environmental data scaler saved successfully to: ../backend/models/env_data_scaler.joblib

This is the final, unified model for your backend API.
