**Importing Libraries** 

In [None]:
import os
import cv2
import time
import torch
import random
import warnings
import numpy as np
import pandas as pd
import torch 
from PIL import Image
from glob import glob
from json import loads,dumps
import matplotlib.pyplot as plt

**Loading Model** 

In [None]:
!pip install open_clip_torch==2.23.0 transformers==4.35.2 matplotlib

In [None]:
from open_clip import create_model_from_pretrained, get_tokenizer # works on open-clip-torch>=2.23.0, timm>=0.9.8

model, preprocess = create_model_from_pretrained('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
tokenizer = get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')

**Preparing Data** 

In [None]:
# Get a list of all image ids

invasive_ids = os.listdir('/kaggle/input/bach-breast-cancer-histology-images/ICIAR2018_BACH_Challenge/ICIAR2018_BACH_Challenge/Photos/Invasive')
normal_ids = os.listdir('/kaggle/input/bach-breast-cancer-histology-images/ICIAR2018_BACH_Challenge/ICIAR2018_BACH_Challenge/Photos/Normal')

In [None]:
# Final image IDs & labels (considering only benign & invasive classes) 

final_ids = [] 
final_labels = [] 

final_ids.extend(normal_ids) 
final_ids.extend(invasive_ids) 
final_labels.extend(['Normal'] * len(normal_ids)) 
final_labels.extend(['Invasive'] * len(invasive_ids)) 

print(len(final_ids)) 
print(len(final_labels)) 

In [None]:
# Obtaining final image paths and binary labels 

img_paths = [] 
img_labels = [] 

for i in range(len(final_ids)): 
    if final_ids[i].endswith('tif'): 
        # Path to the TIF file 
        img_paths.append('/kaggle/input/bach-breast-cancer-histology-images/ICIAR2018_BACH_Challenge/ICIAR2018_BACH_Challenge/Photos/' + final_labels[i] + '/' + final_ids[i]) 
        if final_labels[i] == 'Normal': 
            img_labels.append(0) 
        else: 
            img_labels.append(1) 

print(len(img_paths)) 
print(len(img_labels))

**Running Model** 

In [None]:
str1 = 'Well-defined cell patterns with normal tissue architecture, do not invade beyond their original layer. Nuclei uniform in size, shape, with minimal atypia. Low mitotic activity with few dividing cells. Little to no stromal reaction.'
str2 = 'Irregular cells, disorganized arrangements, infiltrating surrounding tissues, Blurred boundaries, breaking through basement membranes. Nuclei showing pleomorphism, variations in size, shape, staining intensity, larger, irregular.Increased mitotic figures, rapid cell division. Desmoplastic (fibrous) stromal response.'
labels = [str1, str2]

In [None]:
context_length = 500

labels = [
    str1,
    str2
]

predicted_labels = [] 
model.eval()

for (i, img_path) in enumerate(img_paths): 
    image = Image.open(img_path)
    image = preprocess(image)
    texts = tokenizer([label for label in labels], context_length=context_length)
    
    with torch.no_grad():
        image = image.unsqueeze(0)
        image_features, text_features, logit_scale = model(image, texts)
        
        logits = (logit_scale * image_features @ text_features.t()).softmax(dim=-1)
        pred_index = torch.argmax(logits, dim=-1).item()
        
        predicted_label = labels[pred_index]
        print(f"Primary label: {img_labels[i]}, Predicted label: {pred_index}, Logits: {logits}")
        i += 1
        
        predicted_labels.append(pred_index)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

report = classification_report(img_labels, predicted_labels) 
print(report) 

cm = confusion_matrix(img_labels, predicted_labels)

plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Invasive Carcinoma'], yticklabels=['Normal', 'Invasive Carcinoma'])
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()