In [None]:

import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Dropout, Embedding, LSTM, Concatenate, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Configuration
class Config:
    IMAGE_SIZE = (224, 224)
    MAX_LEN = 100
    VOCAB_SIZE = 5000
    BATCH_SIZE = 32
    EPOCHS = 50
    LEARNING_RATE = 0.001
    CSV_FILE = "symptoms.csv"
    IMAGE_FOLDER = r"C:\Users\User\Downloads\Skin-disease-dataset\train"
    MODEL_SAVE_PATH = "best_multimodal_model.h5"
    VALIDATION_SPLIT = 0.2
    TEST_SPLIT = 0.1

In [None]:

class MultimodalDiseaseDetector:
    def __init__(self, config):
        self.config = config
        self.tokenizer = None
        self.label_encoder = None
        self.model = None
        
    def load_and_preprocess_data(self):
        """Load CSV data and create image-text pairs"""
        print("Loading and preprocessing data...")
        
        # Load CSV
        df = pd.read_csv(self.config.CSV_FILE)
        df.dropna(inplace=True)
        
        print(f"Loaded {len(df)} samples")
        print(f"Unique labels: {df['label'].unique()}")
        
        # Label encoding
        self.label_encoder = LabelEncoder()
        df["label_id"] = self.label_encoder.fit_transform(df["label"])
        
        # Tokenize symptoms
        self.tokenizer = Tokenizer(num_words=self.config.VOCAB_SIZE, oov_token="<OOV>")
        self.tokenizer.fit_on_texts(df["symptom"])
        
        # Convert symptoms to sequences
        sequences = self.tokenizer.texts_to_sequences(df["symptom"])
        df["token_ids"] = [seq for seq in sequences]
        
        # Create balanced image paths
        image_paths = self._create_balanced_image_paths(df)
        df["image_path"] = image_paths
        
        # Remove samples with missing images
        df = df[df["image_path"].notna()].reset_index(drop=True)
        
        print(f"Final dataset size: {len(df)} samples")
        return df

In [None]:
def _create_balanced_image_paths(self, df):
    """Create balanced image paths for each label"""
    image_paths = []
    
    for idx, row in df.iterrows():
        label = row["label"]
        label_dir = os.path.join(self.config.IMAGE_FOLDER, label)
        
        if not os.path.isdir(label_dir):
            print(f"Warning: Folder not found for label: {label}")
            image_paths.append(None)
            continue
            
        image_list = [f for f in os.listdir(label_dir) 
                     if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))]
        
        if not image_list:
            print(f"Warning: No images found in folder: {label_dir}")
            image_paths.append(None)
            continue
            
        # Use modulo for balanced selection instead of random
        img_idx = idx % len(image_list)
        img_path = os.path.join(label_dir, image_list[img_idx])
        image_paths.append(img_path)
        
    return image_paths

In [None]:
def create_tf_dataset(self, df, is_training=True):
        """Create TensorFlow dataset with proper preprocessing"""
        
        # Pad sequences
        padded_sequences = pad_sequences(
            df["token_ids"].tolist(), 
            maxlen=self.config.MAX_LEN, 
            padding='post'
        )
        
        dataset = tf.data.Dataset.from_tensor_slices({
            "image_path": df["image_path"].values,
            "tokens": padded_sequences,
            "labels": df["label_id"].values
        })
        
        # Map preprocessing function
        dataset = dataset.map(
            self._preprocess_function,
            num_parallel_calls=tf.data.AUTOTUNE
        )
        
        # Filter out invalid samples
        dataset = dataset.filter(lambda x, y: tf.not_equal(y, -1))
        
        if is_training:
            dataset = dataset.shuffle(buffer_size=1000)
            
        dataset = dataset.batch(self.config.BATCH_SIZE)
        dataset = dataset.prefetch(tf.data.AUTOTUNE)
        
        return dataset

✅ Progress saved to: multimodal_dataset.csv


In [None]:
def _preprocess_function(self, data):
        """Preprocess image and text data"""
        def preprocess_py(image_path, tokens, label):
            try:
                # Load and preprocess image
                image = tf.io.read_file(image_path)
                image = tf.image.decode_image(image, channels=3)
                image = tf.image.resize(image, self.config.IMAGE_SIZE)
                image = tf.cast(image, tf.float32) / 255.0
                
                # Ensure proper shapes
                image = tf.reshape(image, (*self.config.IMAGE_SIZE, 3))
                
                return image, tokens, label
                
            except Exception as e:
                # Return placeholder data for invalid samples
                return (tf.zeros((*self.config.IMAGE_SIZE, 3), dtype=tf.float32),
                       tf.zeros([self.config.MAX_LEN], dtype=tf.int32),
                       tf.constant(-1, dtype=tf.int32))
        
        image, tokens, label = tf.py_function(
            preprocess_py,
            [data["image_path"], data["tokens"], data["labels"]],
            [tf.float32, tf.int32, tf.int32]
        )
        
        # Set shapes
        image.set_shape((*self.config.IMAGE_SIZE, 3))
        tokens.set_shape([self.config.MAX_LEN])
        label.set_shape([])
        
        return {"image_input": image, "text_input": tokens}, label
    

In [None]:
def build_model(self):
        """Build the multimodal model"""
        print("Building multimodal model...")
        
        # Image branch - MobileNetV2
        img_input = Input(shape=(*self.config.IMAGE_SIZE, 3), name="image_input")
        cnn_base = MobileNetV2(
            include_top=False, 
            weights='imagenet', 
            input_tensor=img_input
        )
        
        # Fine-tune last few layers
        for layer in cnn_base.layers[:-20]:
            layer.trainable = False
        for layer in cnn_base.layers[-20:]:
            layer.trainable = True
            
        x1 = GlobalAveragePooling2D()(cnn_base.output)
        x1 = BatchNormalization()(x1)
        x1 = Dense(256, activation='relu')(x1)
        x1 = Dropout(0.3)(x1)
        
        # Text branch - LSTM
        txt_input = Input(shape=(self.config.MAX_LEN,), name="text_input")
        x2 = Embedding(
            input_dim=self.config.VOCAB_SIZE, 
            output_dim=128, 
            mask_zero=True
        )(txt_input)
        x2 = LSTM(128, dropout=0.2, recurrent_dropout=0.2)(x2)
        x2 = BatchNormalization()(x2)
        x2 = Dense(128, activation='relu')(x2)
        x2 = Dropout(0.3)(x2)
        
        # Fusion layer
        merged = Concatenate()([x1, x2])
        x = Dense(256, activation='relu')(merged)
        x = BatchNormalization()(x)
        x = Dropout(0.4)(x)
        x = Dense(128, activation='relu')(x)
        x = Dropout(0.3)(x)
        
        # Output layer
        num_classes = len(self.label_encoder.classes_)
        output = Dense(num_classes, activation='softmax', name='predictions')(x)
        
        # Create model
        self.model = Model(inputs=[img_input, txt_input], outputs=output)
        
        # Compile with custom optimizer
        optimizer = Adam(learning_rate=self.config.LEARNING_RATE)
        self.model.compile(
            optimizer=optimizer,
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy', 'top_3_accuracy']
        )
        
        return self.model

: 

In [None]:
def train(self, train_dataset, val_dataset):
        """Train the model with callbacks"""
        print("Starting training...")
        
        # Callbacks
        callbacks = [
            EarlyStopping(
                monitor='val_accuracy',
                patience=10,
                restore_best_weights=True,
                verbose=1
            ),
            ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=5,
                min_lr=1e-7,
                verbose=1
            ),
            ModelCheckpoint(
                self.config.MODEL_SAVE_PATH,
                monitor='val_accuracy',
                save_best_only=True,
                verbose=1
            )
        ]
        
        # Train model
        history = self.model.fit(
            train_dataset,
            validation_data=val_dataset,
            epochs=self.config.EPOCHS,
            callbacks=callbacks,
            verbose=1
        )
        
        return history

❌ Error processing: C:\Users\User\Downloads\Skin-disease-dataset\train\Acne\pigmentation_0_1889.jpeg, Small red, tender bumps called papules that can get worse.
   ⚠️ Exception: Failed copying input tensor from /job:localhost/replica:0/task:0/device:GPU:0 to /job:localhost/replica:0/task:0/device:CPU:0 in order to run ResizeBilinear: Dst tensor is not initialized. [Op:ResizeBilinear]
❌ Error processing: C:\Users\User\Downloads\Skin-disease-dataset\train\Acne\aug_3761_acne-histology-5.jpg, Small red, tender bumps called papules
   ⚠️ Exception: {{function_node __wrapped__DecodeJpeg_device_/job:localhost/replica:0/task:0/device:CPU:0}} OOM when allocating tensor with shape[488,720,3] and type uint8 on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu [Op:DecodeJpeg]
❌ Error processing: C:\Users\User\Downloads\Skin-disease-dataset\train\Acne\aug_128_hidradenitis-suppurativa-64.jpg, Small red, tender bumps called papules mostly visible.
   ⚠️ Exception: Failed copying input ten

In [None]:
def evaluate(self, test_dataset, df_test):
        """Evaluate model and generate reports"""
        print("Evaluating model...")
        
        # Predictions
        predictions = self.model.predict(test_dataset)
        y_pred = np.argmax(predictions, axis=1)
        y_true = df_test['label_id'].values
        
        # Classification report
        print("\nClassification Report:")
        print(classification_report(
            y_true, y_pred, 
            target_names=self.label_encoder.classes_
        ))
        
        # Confusion matrix
        cm = confusion_matrix(y_true, y_pred)
        plt.figure(figsize=(10, 8))
        sns.heatmap(
            cm, 
            annot=True, 
            fmt='d', 
            xticklabels=self.label_encoder.classes_,
            yticklabels=self.label_encoder.classes_
        )
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.show()
        
        return y_pred, predictions

ValueError: Invalid `predicate`. `predicate` must return a `tf.bool` scalar tensor, but its return type is TensorSpec(shape=(None,), dtype=tf.bool, name=None).

In [None]:
def predict_single(self, image_path=None, text="", confidence_threshold=0.5):
        """Make prediction on single sample"""
        if image_path is None:
            image = np.zeros((*self.config.IMAGE_SIZE, 3))
        else:
            try:
                img = Image.open(image_path).convert("RGB")
                img = img.resize(self.config.IMAGE_SIZE)
                image = np.array(img) / 255.0
            except Exception as e:
                print(f"Error loading image: {e}")
                image = np.zeros((*self.config.IMAGE_SIZE, 3))
                
        image = np.expand_dims(image, axis=0)
        
        # Process text
        if not text:
            text = "no symptoms described"
            
        tokens = self.tokenizer.texts_to_sequences([text])
        tokens = pad_sequences(tokens, maxlen=self.config.MAX_LEN)
        
        # Predict
        predictions = self.model.predict({
            "image_input": image, 
            "text_input": tokens
        })
        
        # Get top predictions
        pred_probs = predictions[0]
        top_indices = np.argsort(pred_probs)[::-1][:3]
        
        results = []
        for idx in top_indices:
            confidence = pred_probs[idx]
            if confidence >= confidence_threshold:
                results.append({
                    'disease': self.label_encoder.classes_[idx],
                    'confidence': float(confidence)
                })
        
        return results if results else [{'disease': 'uncertain', 'confidence': 0.0}]


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 image_input (InputLayer)       [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 112, 112, 32  864         ['image_input[0][0]']            
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 112, 112, 32  128         ['Conv1[0][0]']                  
                                )                                                           

In [None]:
def main():
    config = Config()
    detector = MultimodalDiseaseDetector(config)
    
    # Load and preprocess data
    df = detector.load_and_preprocess_data()
    
    # Split data
    train_df, temp_df = train_test_split(
        df, test_size=config.VALIDATION_SPLIT + config.TEST_SPLIT, 
        stratify=df['label_id'], random_state=42
    )
    
    val_df, test_df = train_test_split(
        temp_df, test_size=config.TEST_SPLIT/(config.VALIDATION_SPLIT + config.TEST_SPLIT),
        stratify=temp_df['label_id'], random_state=42
    )
    
    print(f"Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")
    
    # Create datasets
    train_dataset = detector.create_tf_dataset(train_df, is_training=True)
    val_dataset = detector.create_tf_dataset(val_df, is_training=False)
    test_dataset = detector.create_tf_dataset(test_df, is_training=False)
    
    # Build and train model
    model = detector.build_model()
    model.summary()
    
    # Train
    history = detector.train(train_dataset, val_dataset)
    
    # Evaluate
    detector.evaluate(test_dataset, test_df)
    
    # Example prediction
    results = detector.predict_single(
        image_path="path/to/test/image.jpg",
        text="red itchy bumps on skin"
    )
    print(f"Prediction results: {results}")
if __name__ == "__main__":
    main()

Epoch 1/10


InvalidArgumentError: Graph execution error:

2 root error(s) found.
  (0) INVALID_ARGUMENT:  ValueError: Tensor conversion requested dtype int64 for Tensor with dtype int32: <tf.Tensor: shape=(), dtype=int32, numpy=0>
Traceback (most recent call last):

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\ops\script_ops.py", line 269, in __call__
    return func(device, token, args)

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\ops\script_ops.py", line 147, in __call__
    outputs = self._call(device, args)

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\ops\script_ops.py", line 163, in _call
    outputs = [

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\ops\script_ops.py", line 164, in <listcomp>
    _maybe_copy_to_context_device(self._convert(x, dtype=dtype),

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\ops\script_ops.py", line 131, in _convert
    return ops.convert_to_tensor(value, dtype=dtype)

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\profiler\trace.py", line 183, in wrapped
    return func(*args, **kwargs)

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1599, in convert_to_tensor
    raise ValueError(

ValueError: Tensor conversion requested dtype int64 for Tensor with dtype int32: <tf.Tensor: shape=(), dtype=int32, numpy=0>


	 [[{{node EagerPyFunc}}]]
	 [[IteratorGetNext]]
	 [[IteratorGetNext/_6]]
  (1) INVALID_ARGUMENT:  ValueError: Tensor conversion requested dtype int64 for Tensor with dtype int32: <tf.Tensor: shape=(), dtype=int32, numpy=0>
Traceback (most recent call last):

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\ops\script_ops.py", line 269, in __call__
    return func(device, token, args)

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\ops\script_ops.py", line 147, in __call__
    outputs = self._call(device, args)

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\ops\script_ops.py", line 163, in _call
    outputs = [

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\ops\script_ops.py", line 164, in <listcomp>
    _maybe_copy_to_context_device(self._convert(x, dtype=dtype),

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\ops\script_ops.py", line 131, in _convert
    return ops.convert_to_tensor(value, dtype=dtype)

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\profiler\trace.py", line 183, in wrapped
    return func(*args, **kwargs)

  File "c:\Users\User\anaconda3\envs\tf_gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1599, in convert_to_tensor
    raise ValueError(

ValueError: Tensor conversion requested dtype int64 for Tensor with dtype int32: <tf.Tensor: shape=(), dtype=int32, numpy=0>


	 [[{{node EagerPyFunc}}]]
	 [[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_28913]

In [None]:

def load_image(img_path):
    img = Image.open(img_path).convert("RGB")
    img = img.resize(IMAGE_SIZE)
    img = np.array(img) / 255.0
    return img

def predict_disease(image=None, text=None):
    if image is None:
        image = np.zeros((224, 224, 3))
    else:
        image = load_image(image)
    image = np.expand_dims(image, axis=0)

    if text is None:
        text = "<no description>"
    tokens = tokenizer.texts_to_sequences([text])
    tokens = pad_sequences(tokens, maxlen=MAX_LEN)

    preds = model.predict({"image_input": image, "text_input": tokens})
    pred_idx = np.argmax(preds)
    return label_encoder.classes_[pred_idx]

# Example usage:
# predict_disease(image="dataset/acne/img1.jpg", text="red bumps on face")
