### Setting up Workspace

#### Importing libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as sk
import tensorflow as tf
import keras
from keras.models import Sequential
import math
from pathlib import Path
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score,recall_score, f1_score
import os
import csv
from zipfile import ZipFile
import json

In [None]:
kaggle_credentials = json.load(open('kaggle.json'))
os.environ['KAGGLE_USERNAME'] = kaggle_credentials['username']
os.environ['KAGGLE_KEY'] = kaggle_credentials['key']

In [None]:
!kaggle datasets download -d abdallahalidev/plantvillage-dataset

Dataset URL: https://www.kaggle.com/datasets/abdallahalidev/plantvillage-dataset
License(s): CC-BY-NC-SA-4.0
Downloading plantvillage-dataset.zip to /content
 97% 1.98G/2.04G [00:02<00:00, 895MB/s]
100% 2.04G/2.04G [00:02<00:00, 969MB/s]


In [None]:
with ZipFile(r'/content/plantvillage-dataset.zip', 'r') as zipObj:
   zipObj.extractall()

In [None]:
base_dir = r'/content/plantvillage dataset/color'

In [None]:
img_size = 224
batch_size = 32

In [None]:
Data_Gen = ImageDataGenerator(
    rescale = 1./255,
    validation_split = 0.2
)

In [None]:
train_generator = Data_Gen.flow_from_directory(
    base_dir,
    target_size = (img_size, img_size),
    batch_size = batch_size,
    class_mode = 'categorical',
    subset = 'training',
    shuffle = False
)

Found 43456 images belonging to 38 classes.


In [None]:
val_generator = Data_Gen.flow_from_directory(
    base_dir,
    target_size = (img_size, img_size),
    batch_size = batch_size,
    class_mode = 'categorical',
    subset = 'validation',
    shuffle = False
)

Found 10849 images belonging to 38 classes.


In [None]:
#Create a mapping from class indices to class names
class_indices = {v: k for k, v in train_generator.class_indices.items()}

#### Weather data parameters

In [None]:
# ---------- 1) disease list (user-provided) ----------
DISEASE_LIST = [
  'Apple___Apple_scab','Apple___Black_rot','Apple___Cedar_apple_rust','Apple___healthy',
  'Blueberry___healthy','Cherry_(including_sour)___Powdery_mildew','Cherry_(including_sour)___healthy',
  'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot','Corn_(maize)___Common_rust_','Corn_(maize)___Northern_Leaf_Blight',
  'Corn_(maize)___healthy','Grape___Black_rot','Grape___Esca_(Black_Measles)','Grape___Leaf_blight_(Isariopsis_Leaf_Spot)',
  'Grape___healthy','Orange___Haunglongbing_(Citrus_greening)','Peach___Bacterial_spot','Peach___healthy',
  'Pepper,_bell___Bacterial_spot','Pepper,_bell___healthy','Potato___Early_blight','Potato___Late_blight',
  'Potato___healthy','Raspberry___healthy','Soybean___healthy','Squash___Powdery_mildew','Strawberry___Leaf_scorch',
  'Strawberry___healthy','Tomato___Bacterial_spot','Tomato___Early_blight','Tomato___Late_blight','Tomato___Leaf_Mold',
  'Tomato___Septoria_leaf_spot','Tomato___Spider_mites Two-spotted_spider_mite','Tomato___Target_Spot',
  'Tomato___Tomato_Yellow_Leaf_Curl_Virus','Tomato___Tomato_mosaic_virus','Tomato___healthy'
]
DISEASE_TO_ID = {name: i for i, name in enumerate(DISEASE_LIST)}

In [None]:
def parse_disease_name(disease_name):
    """Extract plant type and disease type from disease name"""
    parts = disease_name.split('___')
    if len(parts) == 2:
        plant_type = parts[0].strip()
        disease_type = parts[1].strip()
        return plant_type, disease_type
    return disease_name, "unknown"

# Base weather profiles for each plant type (healthy growing conditions)
BASE_PLANT_PROFILES = {
    'Apple': {
        'temp_range': (18, 24),         # °C - optimal growing range
        'humidity_base': 65,            # % - moderate humidity
        'soil_moisture_target': 0.25,   # m3/m3 - well-drained but moist
        'temp_tolerance': 3,            # °C - temperature variation tolerance
    },
    'Tomato': {
        'temp_range': (20, 26),
        'humidity_base': 70,
        'soil_moisture_target': 0.30,
        'temp_tolerance': 2,
    },
    'Corn_(maize)': {
        'temp_range': (22, 28),
        'humidity_base': 60,
        'soil_moisture_target': 0.35,
        'temp_tolerance': 4,
    },
    'Potato': {
        'temp_range': (16, 22),
        'humidity_base': 75,
        'soil_moisture_target': 0.28,
        'temp_tolerance': 2,
    },
    'Grape': {
        'temp_range': (20, 25),
        'humidity_base': 60,
        'soil_moisture_target': 0.20,
        'temp_tolerance': 3,
    },
    'Peach': {
        'temp_range': (21, 27),
        'humidity_base': 65,
        'soil_moisture_target': 0.25,
        'temp_tolerance': 3,
    },
    'Cherry_(including_sour)': {
        'temp_range': (18, 24),
        'humidity_base': 68,
        'soil_moisture_target': 0.22,
        'temp_tolerance': 3,
    },
    'Strawberry': {
        'temp_range': (18, 24),
        'humidity_base': 70,
        'soil_moisture_target': 0.30,
        'temp_tolerance': 2,
    },
    'Pepper,_bell': {
        'temp_range': (21, 27),
        'humidity_base': 65,
        'soil_moisture_target': 0.28,
        'temp_tolerance': 2,
    },
    'Squash': {
        'temp_range': (20, 26),
        'humidity_base': 70,
        'soil_moisture_target': 0.32,
        'temp_tolerance': 3,
    },
    'Orange': {
        'temp_range': (24, 30),
        'humidity_base': 60,
        'soil_moisture_target': 0.25,
        'temp_tolerance': 4,
    },
    'Soybean': {
        'temp_range': (22, 28),
        'humidity_base': 65,
        'soil_moisture_target': 0.30,
        'temp_tolerance': 3,
    },
    'Raspberry': {
        'temp_range': (18, 23),
        'humidity_base': 72,
        'soil_moisture_target': 0.28,
        'temp_tolerance': 2,
    },
    'Blueberry': {
        'temp_range': (16, 22),
        'humidity_base': 75,
        'soil_moisture_target': 0.35,
        'temp_tolerance': 2,
    }
}

# Disease modification rules (pathogen-friendly modifications)
DISEASE_MODIFICATIONS = {
    'healthy': {
        # No modifications - use base profile
        'humidity_shift': 0,
        'temp_stress_factor': 1.0,
        'moisture_shift': 0,
        'leaf_wetness_boost': 0,
    },
    # Fungal diseases - love moisture and moderate temps
    'blight': {
        'humidity_shift': +8,           # Higher humidity
        'temp_stress_factor': 0.8,     # More temperature variation (stress)
        'moisture_shift': +0.08,       # More soil moisture
        'leaf_wetness_boost': +6,      # More leaf wetness hours
    },
    'rust': {
        'humidity_shift': +10,
        'temp_stress_factor': 0.7,
        'moisture_shift': +0.06,
        'leaf_wetness_boost': +8,
    },
    'rot': {
        'humidity_shift': +12,
        'temp_stress_factor': 0.6,
        'moisture_shift': +0.10,
        'leaf_wetness_boost': +10,
    },
    'scab': {
        'humidity_shift': +6,
        'temp_stress_factor': 0.8,
        'moisture_shift': +0.05,
        'leaf_wetness_boost': +5,
    },
    'mildew': {
        'humidity_shift': +15,
        'temp_stress_factor': 0.7,
        'moisture_shift': +0.08,
        'leaf_wetness_boost': +12,
    },
    # Bacterial diseases - also like moisture but different patterns
    'spot': {
        'humidity_shift': +5,
        'temp_stress_factor': 0.9,
        'moisture_shift': +0.04,
        'leaf_wetness_boost': +4,
    },
    # Viral diseases - stress conditions
    'virus': {
        'humidity_shift': -2,
        'temp_stress_factor': 0.5,     # High stress
        'moisture_shift': -0.02,
        'leaf_wetness_boost': +2,
    },
    'mosaic': {
        'humidity_shift': -3,
        'temp_stress_factor': 0.6,
        'moisture_shift': -0.03,
        'leaf_wetness_boost': +1,
    }
}

In [None]:
def get_disease_category(disease_type):
    """Categorize disease type into modification groups"""
    disease_lower = disease_type.lower()

    # Check for specific disease patterns
    for category in DISEASE_MODIFICATIONS.keys():
        if category in disease_lower:
            return category

    # Default categorization based on keywords
    if any(keyword in disease_lower for keyword in ['blight', 'early_blight', 'late_blight']):
        return 'blight'
    elif any(keyword in disease_lower for keyword in ['rust', 'cedar_apple_rust', 'common_rust']):
        return 'rust'
    elif any(keyword in disease_lower for keyword in ['rot', 'black_rot']):
        return 'rot'
    elif any(keyword in disease_lower for keyword in ['spot', 'bacterial_spot', 'leaf_spot']):
        return 'spot'
    elif any(keyword in disease_lower for keyword in ['mildew', 'powdery_mildew']):
        return 'mildew'
    elif any(keyword in disease_lower for keyword in ['virus', 'mosaic']):
        return 'virus'
    elif 'healthy' in disease_lower:
        return 'healthy'
    else:
        return 'spot'  # Default to bacterial-like conditions

#### Build the weather generator function

In [None]:
def build_targeted_hourly_weather(start_dt: datetime, end_dt: datetime,
                                 plant_type: str, disease_type: str, seed=42) -> pd.DataFrame:
    """
    Build hourly weather data tailored to specific plant-disease combinations
    """
    np.random.seed(seed)
    hours = int((end_dt - start_dt).total_seconds() // 3600)
    timestamps = [start_dt + timedelta(hours=i) for i in range(hours)]
    hour_of_day = np.array([t.hour for t in timestamps])

    # Get base plant profile
    plant_profile = BASE_PLANT_PROFILES.get(plant_type, BASE_PLANT_PROFILES['Tomato'])

    # Get disease modifications
    disease_category = get_disease_category(disease_type)
    disease_mods = DISEASE_MODIFICATIONS.get(disease_category, DISEASE_MODIFICATIONS['healthy'])

    # Build temperature with plant-specific ranges and disease stress
    temp_min, temp_max = plant_profile['temp_range']
    temp_center = (temp_min + temp_max) / 2
    temp_tolerance = plant_profile['temp_tolerance'] * disease_mods['temp_stress_factor']

    # Daily temperature cycle with plant-specific center and disease-induced stress
    temp_daily = temp_center + (temp_max - temp_center) * 0.7 * np.sin((hour_of_day - 6) / 24 * 2 * np.pi)
    air_temp = temp_daily + np.random.normal(0, temp_tolerance, hours)

    # Humidity with plant base + disease modification
    target_humidity = plant_profile['humidity_base'] + disease_mods['humidity_shift']
    rel_humidity = target_humidity - 0.9 * (air_temp - temp_center) + np.random.normal(0, 4.5, hours)
    rel_humidity = np.clip(rel_humidity, 20, 100)

    # Precipitation (keep original storm logic but adjust for disease)
    precip = np.zeros(hours)
    days = max(1, (end_dt - start_dt).days)
    n_storms = max(3, days // 7)
    if disease_category in ['blight', 'rust', 'rot', 'mildew']:
        n_storms = int(n_storms * 1.3)  # More rain events for fungal diseases

    storm_centers = np.random.choice(hours, n_storms, replace=False)
    for c in storm_centers:
        dur = np.random.randint(3, 18)
        intens = np.random.uniform(0.5, 20)
        end_idx = min(hours, c + dur)
        span = np.arange(end_idx - c)
        precip[c:end_idx] += intens * np.exp(-0.25 * span)

    # Leaf wetness with disease-specific boost
    leaf_wetness = np.zeros(hours, dtype=int)
    base_wetness_boost = disease_mods['leaf_wetness_boost']
    for i in range(hours):
        prev24 = precip[max(0, i-24):i+1]
        dew = 1 if (rel_humidity[i] > 92 and hour_of_day[i] < 8) else 0
        base_wetness = np.sum(prev24 > 0) + dew*3 + np.random.randint(0,3)
        leaf_wetness[i] = min(24, int(base_wetness + base_wetness_boost))

    # Soil moisture with plant-specific target + disease modification
    target_moisture = plant_profile['soil_moisture_target'] + disease_mods['moisture_shift']
    soil_moisture = np.zeros(hours)
    soil_moisture[0] = target_moisture
    for i in range(1, hours):
        gain = min(0.18, precip[i] / 60.0) if precip[i] > 0 else 0.
        soil_moisture[i] = soil_moisture[i-1] * 0.994 - 0.0007 * max(0, air_temp[i]-temp_center) + gain
    soil_moisture = np.clip(soil_moisture, 0.03, 0.6)

    # Calculate derived parameters
    dew_point = air_temp - (100 - rel_humidity) / 5.0
    es = 0.6108 * np.exp(17.27 * air_temp / (air_temp + 237.3))
    ea = es * (rel_humidity / 100.0)
    vpd = es - ea

    wind_speed = np.abs(np.random.normal(2.5, 1.1, hours))
    solar = np.maximum(0, 600 * np.sin(np.clip((hour_of_day-6)/12 * np.pi, -np.pi, np.pi))) + np.random.normal(0,25,hours)
    solar = np.clip(solar, 0, None)
    soil_temp = air_temp * 0.85 + 2 + np.random.normal(0,0.6,hours)
    frost_flag = (air_temp < 0).astype(int)

    df = pd.DataFrame({
        'timestamp': timestamps,
        'air_temp_C': np.round(air_temp,2),
        'rel_humidity_%': np.round(rel_humidity,1),
        'leaf_wetness_hours_last24': leaf_wetness,
        'precip_mm_hr': np.round(precip,3),
        'soil_moisture_m3m3': np.round(soil_moisture,4),
        'dew_point_C': np.round(dew_point,2),
        'vpd_kPa': np.round(vpd,3),
        'wind_speed_m_s': np.round(wind_speed,2),
        'solar_W_m2': np.round(solar,1),
        'soil_temp_C': np.round(soil_temp,2),
        'frost_flag': frost_flag
    })
    df = df.set_index('timestamp')
    return df

In [None]:
def build_targeted_weather_generator(train_generator, window_days=7, seed=42):
    """
    Build weather generator with plant-disease specific weather patterns
    """
    # Get filepaths from image generator
    if hasattr(train_generator, 'filepaths'):
        all_filepaths = train_generator.filepaths
    else:
        all_filepaths = [os.path.join(train_generator.directory, f) for f in train_generator.filenames]

    # Build DataFrame with plant-disease info
    df = pd.DataFrame({'filepath': all_filepaths})
    df['disease_name'] = df['filepath'].apply(lambda p: Path(p).parent.name)
    df['disease_id'] = df['disease_name'].map(lambda n: DISEASE_TO_ID.get(n, -1))

    # Parse plant and disease types
    parsed = df['disease_name'].apply(parse_disease_name)
    df['plant_type'] = [p[0] for p in parsed]
    df['disease_type'] = [p[1] for p in parsed]

    # Assign timestamps grouped by plant-disease combination for more realistic patterns
    start_date = datetime(2025, 1, 1)
    end_date = datetime(2025, 8, 21)
    df['timestamp'] = pd.NaT

    np.random.seed(seed)
    for (plant, disease), g in df.groupby(['plant_type', 'disease_type']):
        idx = g.index
        n = len(idx)
        if n == 0: continue

        # Create time clusters for same plant-disease combinations
        # This makes weather patterns more realistic within groups
        seconds_span = int((end_date - start_date).total_seconds())
        cluster_centers = np.random.uniform(0, seconds_span, min(n//10 + 1, 20))

        timestamps = []
        for i in range(n):
            center = np.random.choice(cluster_centers)
            jitter = np.random.normal(0, seconds_span * 0.1)  # 10% of total span
            timestamp_secs = np.clip(center + jitter, 0, seconds_span)
            timestamps.append(start_date + timedelta(seconds=int(timestamp_secs)))

        df.loc[idx, 'timestamp'] = timestamps

    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Precompute targeted weather for each sample
    print("Generating plant-disease specific weather patterns...")
    precomputed = {}

    for i, row in df.iterrows():
        # Generate weather for this specific plant-disease combination
        min_ts = row['timestamp'] - timedelta(days=window_days+1)
        max_ts = row['timestamp'] + timedelta(days=1)

        weather_df = build_targeted_hourly_weather(
            min_ts, max_ts,
            row['plant_type'], row['disease_type'],
            seed=seed+i  # Different seed per sample for variety
        )

        # Extract time series for this sample
        timeseries_data = extract_timeseries_features(
            weather_df, row['timestamp'], window_days=window_days
        )
        precomputed[row['filepath']] = timeseries_data

    def targeted_weather_generator():
        """Generator yielding plant-disease specific weather patterns"""
        train_generator.reset()

        while True:
            try:
                image_batch, label_batch = next(train_generator)

                batch_start = train_generator.batch_index * train_generator.batch_size
                batch_end = min(batch_start + train_generator.batch_size, len(all_filepaths))
                batch_paths = all_filepaths[batch_start:batch_end]

                if len(batch_paths) != len(label_batch):
                    batch_paths = batch_paths[:len(label_batch)]

                batch_weather = np.stack([precomputed[p] for p in batch_paths], axis=0)
                yield batch_weather, label_batch

            except StopIteration:
                train_generator.reset()
                continue

    return targeted_weather_generator(), df

In [None]:
def extract_timeseries_features(weather_df: pd.DataFrame, end_ts: datetime, window_days: int = 7) -> np.ndarray:
    """
    Extract raw hourly time series data instead of aggregated features.
    Returns shape: (timesteps, features)
    """
    W = window_days * 24  # 168 hours for 7 days
    start_ts = end_ts - timedelta(hours=W) + timedelta(hours=1)
    window = weather_df.loc[start_ts:end_ts]

    # Pad if shorter than W hours
    if len(window) < W:
        pad_n = W - len(window)
        # Use first available row for padding
        pad_row = weather_df.iloc[0:1].copy()
        pads = pd.concat([pad_row]*pad_n, ignore_index=False)
        pads.index = [start_ts - timedelta(hours=i+1) for i in range(pad_n)][::-1]
        window = pd.concat([pads, window]).sort_index()

    # Take only the last W hours to ensure exact length
    window = window.tail(W)

    # Select the features you want for time series (excluding timestamp)
    feature_cols = [
        'air_temp_C', 'rel_humidity_%', 'leaf_wetness_hours_last24',
        'precip_mm_hr', 'soil_moisture_m3m3', 'dew_point_C',
        'vpd_kPa', 'wind_speed_m_s', 'solar_W_m2', 'soil_temp_C', 'frost_flag'
    ]

    # Convert to numpy array: shape (timesteps, features)
    timeseries_data = window[feature_cols].values.astype(np.float32)

    return timeseries_data

### Generate Weather data

In [None]:
w_train_gen, train_image_index_df = build_targeted_weather_generator(train_generator, window_days=7)
w_val_gen, val_image_index_df = build_targeted_weather_generator(val_generator, window_days=7)

Generating plant-disease specific weather patterns...
Generating plant-disease specific weather patterns...


In [None]:
weather_batch, label_batch = next(w_train_gen)
print("Weather shape:", weather_batch.shape)
print("Label shape:", label_batch.shape)

Weather shape: (32, 168, 11)
Label shape: (32, 38)


In [None]:
weather_batch, label_batch = next(w_val_gen)
print("Weather shape:", weather_batch.shape)
print("Label shape:", label_batch.shape)

Weather shape: (32, 168, 11)
Label shape: (32, 38)


In [None]:
w_train_gen = w_train_gen[:,-48,:]
w_val_gen = w_val_gen[:,-48,:]

TypeError: 'generator' object is not subscriptable

In [None]:
def build_weather_branch_short_window(self):
        """Weather model with shorter time window (24-48 hours)"""
        # Shorter window - last 48 hours instead of 168
        short_timesteps = 48
        input_weather = Input(shape=(short_timesteps, self.n_weather_features), name='weather_short_input')

        # Feature normalization
        x = layers.Lambda(lambda x: (x - tf.reduce_mean(x, axis=1, keepdims=True)) /
                         (tf.math.reduce_std(x, axis=1, keepdims=True) + 1e-8))(input_weather)

        # Lighter LSTM architecture
        x = layers.LSTM(32, return_sequences=True, dropout=0.3)(x)
        x = layers.BatchNormalization()(x)
        x = layers.LSTM(16, return_sequences=False, dropout=0.3)(x)
        x = layers.BatchNormalization()(x)

        # Compact dense layers
        x = layers.Dense(32, activation='relu')(x)
        x = layers.Dropout(0.4)(x)

        weather_features_short = layers.Dense(32, activation='relu', name='weather_features_short')(x)
        weather_pred_short = layers.Dense(self.n_classes, activation='softmax', name='weather_pred_short')(weather_features_short)

        return Model(inputs=input_weather, outputs=[weather_features_short, weather_pred_short],
                    name='weather_short')

### Models

In [None]:
from keras import layers, models
from tensorflow.keras.optimizers import Adam
def create_improved_weather_model(timesteps=168, n_features=11, n_classes=38):
    """
    Improved weather-based LSTM model with better architecture
    """
    model = keras.Sequential([
        # Input normalization
        layers.Input(shape=(timesteps, n_features)),
        layers.Lambda(lambda x: tf.nn.l2_normalize(x, axis=-1)),  # Feature normalization

        # First LSTM layer with more units
        layers.LSTM(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
        layers.BatchNormalization(),

        # Second LSTM layer
        layers.LSTM(64, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
        layers.BatchNormalization(),

        # Third LSTM layer
        layers.LSTM(32, return_sequences=False, dropout=0.2, recurrent_dropout=0.2),
        layers.BatchNormalization(),

        # Dense layers with residual connections
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.4),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.3),

        # Output layer
        layers.Dense(n_classes, activation='softmax')
    ])

    # Use a lower learning rate and better optimizer
    optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)

    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

In [None]:
# Define the number of steps per epoch for training and validation
# This should be the total number of samples divided by the batch size
steps_per_epoch_train = train_generator.samples // train_generator.batch_size
steps_per_epoch_val = val_generator.samples // val_generator.batch_size

model = build_weather_branch_short_window()
model.summary()

# Train the model
history = model.fit(
    w_train_gen,
    steps_per_epoch=steps_per_epoch_train,
    epochs=15, # You can adjust the number of epochs
    validation_data=w_val_gen,
    validation_steps=steps_per_epoch_val
)

NameError: name 'train_generator' is not defined

In [None]:
def create_fixed_weather_model(timesteps=168, n_features=11, n_classes=38):
    """
    Fixed version of your current model with immediate improvements
    """
    from tensorflow.keras.regularizers import l2

    model = Sequential([
        # Input shape: (batch_size, timesteps, features)
        Input(shape=(timesteps, n_features)),

        # Add feature normalization
        Lambda(lambda x: (x - tf.reduce_mean(x, axis=1, keepdims=True)) /
                         (tf.math.reduce_std(x, axis=1, keepdims=True) + 1e-8)),

        # Simplified architecture - your current one might be too complex
        LSTM(64, return_sequences=True, dropout=0.3, recurrent_dropout=0.3,
             kernel_regularizer=l2(0.01)),
        BatchNormalization(),

        LSTM(32, return_sequences=False, dropout=0.3, recurrent_dropout=0.3,
             kernel_regularizer=l2(0.01)),
        BatchNormalization(),

        # Simpler dense layers
        Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.5),
        Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.4),

        # Output layer
        Dense(n_classes, activation='softmax')
    ])

    # Better optimizer settings
    optimizer = Adam(learning_rate=0.001, clipnorm=1.0)  # Add gradient clipping

    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

In [None]:
# Define the number of steps per epoch for training and validation
# This should be the total number of samples divided by the batch size
from keras.layers import Input, LSTM, Dense, Dropout, BatchNormalization, Lambda
from keras.models import Sequential
from tensorflow.keras.optimizers import Adam
steps_per_epoch_train = train_generator.samples // train_generator.batch_size
steps_per_epoch_val = val_generator.samples // val_generator.batch_size

model = create_fixed_weather_model()
model.summary()

# Train the model
history = model.fit(
    w_train_gen,
    steps_per_epoch=steps_per_epoch_train,
    epochs=15, # You can adjust the number of epochs
    validation_data=w_val_gen,
    validation_steps=steps_per_epoch_val
)

In [None]:
# Combined Data Generator for Fusion Model
import tensorflow as tf

def create_combined_generator(image_gen, weather_gen, soil_x, soil_y, batch_size=32):
    """
    Creates a combined generator that yields (images, weather, soil) data and labels

    Args:
        image_gen: Keras ImageDataGenerator (train_gen, validation_gen, etc.)
        weather_gen: Weather data generator (w_train_gen, w_val_gen, etc.)
        soil_x: Soil features array
        soil_y: Soil labels array (should match image labels)
        batch_size: Batch size (should match image_gen batch_size)

    Yields:
        ([image_batch, weather_batch, soil_batch], label_batch)
    """
    # Reset generators to ensure alignment
    image_gen.reset()

    # Convert soil data to match image generator ordering
    # Get the file order from image generator
    if hasattr(image_gen, 'filepaths'):
        image_files = image_gen.filepaths
    else:
        image_files = image_gen.filenames

    # Create index mapping for soil data alignment
    soil_indices = []
    for i in range(len(image_files)):
        # Get the class index for this image
        class_idx = image_gen.classes[i] if hasattr(image_gen, 'classes') else i // (len(image_files) // len(soil_x))
        soil_indices.append(class_idx)

    batch_idx = 0
    total_samples = len(image_files)

    while True:
        try:
            # Get image batch and labels
            image_batch, label_batch = next(image_gen)

            # Get corresponding weather batch
            weather_batch, _ = next(weather_gen)

            # Get corresponding soil batch
            start_idx = batch_idx * batch_size
            end_idx = min(start_idx + batch_size, total_samples)

            # Handle soil data batching
            if start_idx < len(soil_x):
                actual_end = min(end_idx, len(soil_x))
                soil_batch = soil_x[start_idx:actual_end]

                # Pad if necessary to match batch size
                if len(soil_batch) < len(image_batch):
                    padding_needed = len(image_batch) - len(soil_batch)
                    padding = np.tile(soil_batch[-1:], (padding_needed, 1))
                    soil_batch = np.vstack([soil_batch, padding])
                elif len(soil_batch) > len(image_batch):
                    soil_batch = soil_batch[:len(image_batch)]
            else:
                # Cycle back to beginning if we've exhausted soil data
                soil_batch = soil_x[:len(image_batch)]

            # Ensure all batches have the same length
            min_batch_size = min(len(image_batch), len(weather_batch), len(soil_batch))

            yield (
                [
                    image_batch[:min_batch_size],
                    weather_batch[:min_batch_size],
                    soil_batch[:min_batch_size]
                ],
                label_batch[:min_batch_size]
            )

            batch_idx += 1

        except StopIteration:
            # Reset all generators when image generator is exhausted
            image_gen.reset()
            batch_idx = 0
            continue

# Create the combined generators using your existing data
print("Creating combined training generator...")
combined_train_gen = create_combined_generator(
    train_gen, w_train_gen, x_train_soil, y_train_soil, batch_size=batch_size
)

print("Creating combined validation generator...")
combined_val_gen = create_combined_generator(
    validation_gen, w_val_gen, x_val_soil, y_val_soil, batch_size=batch_size
)

# Test the combined generator
print("\nTesting combined generator...")
test_batch = next(combined_train_gen)
images, weather, soil = test_batch[0]
labels = test_batch[1]

print(f"Image batch shape: {images.shape}")
print(f"Weather batch shape: {weather.shape}")
print(f"Soil batch shape: {soil.shape}")
print(f"Labels shape: {labels.shape}")

# Keras-compatible combined data generator using tf.keras.utils.Sequence
class CombinedDataSequence(tf.keras.utils.Sequence):
    """
    Keras-compatible combined data generator that inherits from tf.keras.utils.Sequence
    """

    def __init__(self, image_gen, weather_gen, soil_x, soil_y):
        self.image_gen = image_gen
        self.weather_gen = weather_gen
        self.soil_x = soil_x
        self.soil_y = soil_y
        self.batch_size = image_gen.batch_size

        # Calculate total number of batches and samples
        self.n_samples = len(image_gen.classes) if hasattr(image_gen, 'classes') else image_gen.n
        self.samples = self.n_samples  # Add samples attribute for Keras compatibility
        self.total_batches = int(np.ceil(self.n_samples / self.batch_size))

        # Pre-generate weather data for better performance
        self._pregenerate_weather_data()

    def _pregenerate_weather_data(self):
        """Pre-generate all weather data to avoid synchronization issues"""
        print("Pre-generating weather data...")
        self.weather_data = []
        temp_gen = iter(self.weather_gen)

        for _ in range(self.total_batches):
            try:
                weather_batch, _ = next(temp_gen)
                self.weather_data.append(weather_batch)
            except StopIteration:
                # If weather generator is exhausted, cycle back
                temp_gen = iter(self.weather_gen)
                weather_batch, _ = next(temp_gen)
                self.weather_data.append(weather_batch)

    def __len__(self):
        """Number of batches per epoch"""
        return self.total_batches

    def __getitem__(self, idx):
        """Get batch at index idx"""
        # Get image batch using direct indexing approach
        start_idx = idx * self.batch_size
        end_idx = min(start_idx + self.batch_size, self.n_samples)

        # Calculate which samples we need
        sample_indices = list(range(start_idx, end_idx))

        # Get image data by manually calling the generator's flow
        if hasattr(self.image_gen, '_get_batches_of_transformed_samples'):
            # Use internal method to get specific batch
            batch_indices = [i % self.image_gen.n for i in sample_indices]
            image_batch = np.array([
                self.image_gen._get_batches_of_transformed_samples([i])[0][0]
                for i in batch_indices
            ])
            label_batch = np.array([self.image_gen.classes[i] for i in batch_indices])
            label_batch = tf.keras.utils.to_categorical(label_batch, num_classes=self.image_gen.num_classes)
        else:
            # Fallback: reset and skip to position
            self.image_gen.reset()
            target_batch = idx % len(self.image_gen)
            for _ in range(target_batch):
                next(self.image_gen)
            image_batch, label_batch = next(self.image_gen)

        # Get weather batch
        weather_batch = self.weather_data[idx % len(self.weather_data)]

        # Get soil batch
        if start_idx < len(self.soil_x):
            soil_batch = self.soil_x[start_idx:end_idx]
        else:
            # Cycle through soil data
            cycle_start = start_idx % len(self.soil_x)
            cycle_end = min(cycle_start + self.batch_size, len(self.soil_x))
            soil_batch = self.soil_x[cycle_start:cycle_end]

            # Handle wraparound if needed
            if len(soil_batch) < self.batch_size:
                remaining = self.batch_size - len(soil_batch)
                additional = self.soil_x[:remaining]
                soil_batch = np.vstack([soil_batch, additional])

        # Ensure all batches have consistent size
        actual_batch_size = min(len(image_batch), len(weather_batch), len(soil_batch))

        # Return as dictionary format (recommended for multi-input models)
        return (
            {
                'image_input': image_batch[:actual_batch_size],
                'weather_input': weather_batch[:actual_batch_size],
                'soil_input': soil_batch[:actual_batch_size]
            },
            label_batch[:actual_batch_size]
        )

    def on_epoch_end(self):
        """Called at the end of each epoch"""
        if hasattr(self.image_gen, 'on_epoch_end'):
            self.image_gen.on_epoch_end()

# Create Keras-compatible combined generators using your existing data
print("Creating Keras-compatible combined generators...")
keras_train_gen = CombinedDataSequence(train_gen, w_train_gen, x_train_soil, y_train_soil)
keras_val_gen = CombinedDataSequence(validation_gen, w_val_gen, x_val_soil, y_val_soil)

# Test the Keras-compatible generator
print("\nTesting Keras-compatible generator...")
test_batch = keras_train_gen[0]  # Get first batch using indexing
input_dict, labels = test_batch

print(f"Image batch shape: {input_dict['image_input'].shape}")
print(f"Weather batch shape: {input_dict['weather_input'].shape}")
print(f"Soil batch shape: {input_dict['soil_input'].shape}")
print(f"Labels shape: {labels.shape}")
print(f"Total batches in training generator: {len(keras_train_gen)}")

# Now you can use these with model.fit()
# fusion_model.fit(keras_train_gen, epochs=10, validation_data=keras_val_gen)

# Helper function to create test generator when needed
def create_combined_test_sequence():
    """Create combined test generator when needed"""
    w_test_gen, _ = build_targeted_weather_generator(Test_gen, window_days=7)
    return CombinedDataSequence(Test_gen, w_test_gen, x_test_soil, y_test_soil)