# Disease Detection Fusion Model
This notebook creates a multimodal fusion model for disease detection using YOLO (image) and TabNet (symptoms).

## Get YOLO Outputs for Each Disease Image

In [2]:
from ultralytics import YOLO
import numpy as np
import os

# Load disease YOLO model (update path to your disease model)
model = YOLO('Disease_training/kaggle/working/runs/segment/train/weights/best.pt')  # Update this path

# List of disease image paths
image_folder = 'Disease_training/kaggle/working/dataset/images/train'  
image_files = sorted([os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(('.jpg', '.jpeg', '.png'))])[:200]

yolo_scores_disease = []
for img_path in image_files:
    results = model(img_path)
    # Get max confidence for any detection in the image, or 0 if no detection
    if len(results[0].boxes) > 0:
        conf = float(results[0].boxes.conf.max())
    else:
        conf = 0.0
    yolo_scores_disease.append(conf)

yolo_scores_disease = np.array(yolo_scores_disease).reshape(-1, 1)
np.save('yolo_disease_confidences.npy', yolo_scores_disease)


image 1/1 c:\Users\hemes\Desktop\AGRITHON\Disease_training\kaggle\working\dataset\images\train\disease10_aug0.jpg: 640x640 1 disease, 16.6ms
Speed: 9.1ms preprocess, 16.6ms inference, 197.3ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 c:\Users\hemes\Desktop\AGRITHON\Disease_training\kaggle\working\dataset\images\train\disease10_aug1.jpg: 640x640 1 disease, 16.5ms
Speed: 2.2ms preprocess, 16.5ms inference, 4.5ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 c:\Users\hemes\Desktop\AGRITHON\Disease_training\kaggle\working\dataset\images\train\disease10_aug3.jpg: 640x640 1 disease, 17.0ms
Speed: 2.4ms preprocess, 17.0ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 c:\Users\hemes\Desktop\AGRITHON\Disease_training\kaggle\working\dataset\images\train\disease10_aug4.jpg: 640x640 1 disease, 20.1ms
Speed: 2.9ms preprocess, 20.1ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 c:\Users\hemes\Desktop\AGRITHON

## Get TabNet Outputs for Each Disease Symptom Row

In [3]:
import pandas as pd
from pytorch_tabnet.tab_model import TabNetClassifier

# Load disease symptom data
symptom_df = pd.read_csv('disease_symptom_dataset_200.csv').iloc[:200]
X_symptoms = symptom_df.drop(columns=['Disease_Present_Label']).values
y_labels = symptom_df['Disease_Present_Label'].values

# Load disease TabNet model
tabnet_model_disease = TabNetClassifier()
tabnet_model_disease.load_model('tabnet_disease.zip.zip')

tabnet_probs_disease = tabnet_model_disease.predict_proba(X_symptoms)[:, 1].reshape(-1, 1)
np.save('tabnet_disease_probs.npy', tabnet_probs_disease)
np.save('disease_labels.npy', y_labels)

  saved_state_dict = torch.load(f, map_location=self.device)


## Build the Disease Fusion Dataset

In [4]:
yolo_scores_disease = np.load('yolo_disease_confidences.npy')
tabnet_probs_disease = np.load('tabnet_disease_probs.npy')
y_labels_disease = np.load('disease_labels.npy')

yolo_weight = 2.0  # Increase this value to give more priority to YOLO
fusion_features_disease = np.concatenate([yolo_scores_disease * yolo_weight, tabnet_probs_disease], axis=1)

## Train and Evaluate Disease Fusion Classifier

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

X_train, X_val, y_train, y_val = train_test_split(fusion_features_disease, y_labels_disease, test_size=0.2, random_state=42)

# Use MLP for disease fusion
clf_disease = MLPClassifier(hidden_layer_sizes=(16, 8), max_iter=200, random_state=42)
clf_disease.fit(X_train, y_train)

y_pred = clf_disease.predict(X_val)
print("Disease Fusion Model Accuracy:", accuracy_score(y_val, y_pred))
print("Precision:", precision_score(y_val, y_pred))
print("Recall:", recall_score(y_val, y_pred))
print("F1 Score:", f1_score(y_val, y_pred))

Disease Fusion Model Accuracy: 0.8
Precision: 0.8
Recall: 1.0
F1 Score: 0.8888888888888888


## Save Disease Fusion Model

In [6]:
import joblib
joblib.dump(clf_disease, 'fusion_disease_classifier.joblib')
# To load: clf_disease = joblib.load('fusion_disease_classifier.joblib')

['fusion_disease_classifier.joblib']

## Disease Inference: Predict for a New Image and Symptom Input

In [7]:
from ultralytics import YOLO
from pytorch_tabnet.tab_model import TabNetClassifier
import numpy as np
import pandas as pd
import joblib

# Load disease models
yolo_model_disease = YOLO('Disease_training/kaggle/working/runs/segment/train/weights/best.pt')  # Update this path
tabnet_model_disease = TabNetClassifier()
tabnet_model_disease.load_model('tabnet_disease.zip.zip')
fusion_clf_disease = joblib.load('fusion_disease_classifier.joblib')

def predict_disease(image_path, symptom_answers, yolo_weight=2.0):
    """
    image_path: str, path to the disease image file
    symptom_answers: list or np.array of 30 binary values (0/1)
    """
    # 1. YOLO inference
    yolo_result = yolo_model_disease(image_path)
    if len(yolo_result[0].boxes) > 0:
        yolo_conf = float(yolo_result[0].boxes.conf.max())
    else:
        yolo_conf = 0.0

    # 2. TabNet inference
    X_symptom = np.array(symptom_answers).reshape(1, -1)
    tabnet_prob = tabnet_model_disease.predict_proba(X_symptom)[0, 1]

    # 3. Fusion (apply YOLO weight)
    fusion_input = np.array([[yolo_conf * yolo_weight, tabnet_prob]])
    fusion_pred = fusion_clf_disease.predict(fusion_input)[0]
    fusion_proba = fusion_clf_disease.predict_proba(fusion_input)[0, 1]

    print(f"YOLO confidence (weighted): {yolo_conf * yolo_weight:.2f}")
    print(f"YOLO confidence (unweighted): {yolo_conf:.2f}")
    print(f"TabNet probability: {tabnet_prob:.2f}")
    print(f"Disease Fusion prediction: {fusion_pred} (probability: {fusion_proba:.2f})")
    return fusion_pred, fusion_proba

  saved_state_dict = torch.load(f, map_location=self.device)


## Test Disease Prediction

In [13]:
# Example disease symptoms (30 binary values)
test_disease_symptoms = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
test_disease_image_path = "Disease_training/kaggle/working/dataset/images/val/disease2_aug4.jpg"  # Update with your test disease image

predict_disease(test_disease_image_path, test_disease_symptoms)


image 1/1 c:\Users\hemes\Desktop\AGRITHON\Disease_training\kaggle\working\dataset\images\val\disease2_aug4.jpg: 640x640 1 disease, 186.9ms
Speed: 555.4ms preprocess, 186.9ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 640)
YOLO confidence (weighted): 1.84
YOLO confidence (unweighted): 0.92
TabNet probability: 0.94
Disease Fusion prediction: 1 (probability: 0.80)


(np.int64(1), np.float64(0.7963536423104928))