In [None]:
import numpy as np
import json
from transformers import TFDistilBertForSequenceClassification, DistilBertTokenizer
from tensorflow.keras.models import load_model
import joblib

Some layers from the model checkpoint at DistilBERT_Final_model/DistilBERT_with_LRDecay_model_5e-5 were not used when initializing TFDistilBertForSequenceClassification: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at DistilBERT_Final_model/DistilBERT_with_LRDecay_model_5e-5 and are newly initialized: ['dropout_59']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inf

DistilBERT model and tokenizer loaded successfully!
VGG16 model loaded successfully!
Text LabelEncoder loaded successfully!
Image LabelEncoder loaded successfully!


In [None]:
# Load the Pre-trained Models
# Load the DistilBERT model and Tokenizer from the saved folder (when saved using huggingface) (text model, saved in Transformers format)
text_model = TFDistilBertForSequenceClassification.from_pretrained('DistilBERT_Final_model/DistilBERT_with_LRDecay_model_5e-5')
tokenizer = DistilBertTokenizer.from_pretrained('DistilBERT_Final_model/DistilBERT_with_LRDecay_tokenizer_5e-5')
print("DistilBERT model and tokenizer loaded successfully!")



In [None]:
# Load the VGG16 model saved as .keras (image model, saved in .keras format)
image_model = load_model('my_model_VGG16_reducelr_1e-5.keras')
print("VGG16 model loaded successfully!")

In [None]:
# Load Label Encoders for both text and image classifier
# Text classification label encoder
text_label_encoder = joblib.load('text_label_encoder.joblib')
print("Text LabelEncoder loaded successfully!")

# Image classification label encoder
with open('class_indices_my_model_VGG16.json', 'r') as f:
    image_class_indices = json.load(f)
    image_label_decoder = {v: k for k, v in image_class_indices.items()}  # Reverse mapping
print("Image LabelEncoder loaded successfully!")



In [None]:
# Define Preprocessing Functions for text
def preprocess_text(text):
    """Preprocess text for the DistilBERT model."""
    inputs = tokenizer(text, return_tensors="tf", padding=True, truncation=True, max_length=128)
    return inputs['input_ids'], inputs['attention_mask']

In [None]:
# Define Preprocessing Functions for image
def preprocess_image(img_path):
    """Preprocess image for the VGG16 model."""
    from tensorflow.keras.preprocessing import image
    from tensorflow.keras.applications.vgg16 import preprocess_input
    img = image.load_img(img_path, target_size=(224, 224)) #resize the image
    img_array = image.img_to_array(img)  #image to array( to height, width and color channel)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension (converts 3 dim to 4 dim (batchsize, height, width, color channel))
    img_array = preprocess_input(img_array) #Normalize the image (Channel layers/255)
    return img_array

In [None]:
# Define Late Fusion with Weighted Soft Voting
def late_fusion_weighted_soft_voting(text_input, img_path, text_label_encoder, image_label_decoder, text_weight=0.61, image_weight=0.39):
    """
    Perform late fusion using weighted soft voting with probability outputs from both models.
    
    Args:
        text_input (str): Text input for the DistilBERT model.
        img_path (str): Path to the image for the VGG16 model.
        text_label_encoder (LabelEncoder): The saved label encoder for the text model.
        image_label_decoder (dict): Decoder for the image model class indices.
        text_weight (float): Weight for the text model predictions.
        image_weight (float): Weight for the image model predictions.
    
    Returns:
        Final predicted class label after weighted soft voting fusion.
    """
    # Preprocess text inputs
    text_ids, text_mask = preprocess_text(text_input)
    img_array = preprocess_image(img_path)
    
    # Get probability predictions from each model
    text_logits = text_model(text_ids, attention_mask=text_mask).logits.numpy() #generate logits (raw score before softmax from text model)
    text_probs = np.exp(text_logits) / np.sum(np.exp(text_logits), axis=1, keepdims=True)  # Softmax conversion

    # Preprocess image inputs
    image_probs = image_model.predict(img_array)

    # Apply weights to the probabilities
    weighted_text_probs = text_probs * text_weight
    weighted_image_probs = image_probs * image_weight

    # Combine the weighted probabilities
    combined_probs = weighted_text_probs + weighted_image_probs

    # Get final class index with maximum probability
    final_class = np.argmax(combined_probs, axis=1)[0]
    
    # Decode the final class index to original labels
    final_label = text_label_encoder.inverse_transform([final_class])[0]

    return final_label

In [None]:
# Example Usage
text_input = "oliva"
img_path = "C:/Users/User/OneDrive - ingenium digital diagnostics GmbH/Desktop/DataScientest/Rakuten project/images/images/image_train/image_1325918866_product_4239126071.jpg"  # Replace with the path to your image
final_prediction = late_fusion_weighted_soft_voting(text_input, img_path, text_label_encoder, image_label_decoder, text_weight=0.61, image_weight=0.39)
print(f"Final Predicted Class: {final_prediction}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 293ms/step
Final Predicted Class: 10
