In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Installing and Uninstalling Required Packages
!pip install transformers datasets accelerate evaluate gradio torchvision torch matplotlib
!pip uninstall openai -y
!pip install openai==0.28.0

# Importing Libraries and Packages
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import ViltForQuestionAnswering, ViltProcessor
from torch.utils.data import DataLoader
from torch.amp import autocast, GradScaler
from transformers import AdamW, get_scheduler
from sklearn.utils.class_weight import compute_class_weight
from evaluate import load
from PIL import Image
import pandas as pd
import numpy as np
import os
import gradio as gr
import matplotlib.pyplot as plt
from torchvision import models, transforms
from tqdm.auto import tqdm


Collecting datasets
  Downloading datasets-3.0.1-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting gradio
  Downloading gradio-5.1.0-py3-none-any.whl.metadata (15 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.0 (from gradio)
  Downloading gradio_client-1.4.0-py3-none-any.whl

In [3]:

# Loading Dataset
df = pd.read_csv('/content/drive/MyDrive/iLab/Data/Single Answer Data/Final/Final Single Answer Dataset.csv')
df2 = pd.read_csv('/content/drive/MyDrive/iLab/Data/Single Answer Data/skincap_v240623(1).csv')

df['image_path'] = df['skincap_file_path'].apply(lambda x: os.path.join('/content/drive/MyDrive/iLab/Images/skincap', x))

binary_column_names = ['Papule','Plaque','Crust','White(Hypopigmentation)','Erosion',
      'Nodule', 'Scale', 'Brown(Hyperpigmentation)', 'Erythema', 'Yellow']

df = pd.merge(df, df2[['id']+binary_column_names])

df['binary_variables'] = df[binary_column_names].values.tolist()

train_data_raw = pd.read_csv('/content/drive/MyDrive/iLab/img_train.csv')
val_data_raw = pd.read_csv('/content/drive/MyDrive/iLab/img_val.csv')
test_data_raw = pd.read_csv('/content/drive/MyDrive/iLab/img_test.csv')

train_data_combined = pd.concat([train_data_raw,val_data_raw])

#train_df = df[df['skincap_file_path'].isin(np.unique(train_data_raw['skincap_file_path']))]
#val_df = df[df['skincap_file_path'].isin(np.unique(val_data_raw['skincap_file_path']))]
#test_df = df[df['skincap_file_path'].isin(np.unique(test_data_raw['skincap_file_path']))]

train_df = df[df['skincap_file_path'].isin(np.unique(train_data_combined['skincap_file_path']))]
test_df = df[df['skincap_file_path'].isin(np.unique(test_data_raw['skincap_file_path']))]

# Function to Load Image
def load_image(image_path):
    image = Image.open(image_path).convert("RGB")
    image = image.resize((384, 384))
    return image

# **Dataset Creation and Preprocessing**




In [4]:
# Defining SkinCap Dataset Class
class SkinCapDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, processor):
        self.data = dataframe
        self.processor = processor

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.iloc[idx]['image_path']
        image = load_image(img_name)
        question = self.data.iloc[idx]['question']
        answer = self.data.iloc[idx]['answer']

        binary_variables = self.data.iloc[idx]['binary_variables']

        encoding = self.processor(image, question, return_tensors="pt")
        return encoding, answer, torch.tensor(binary_variables)

# Initialize Processor and Dataset
processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
train_data = SkinCapDataset(train_df, processor)
val_data = SkinCapDataset(test_df, processor)


# Function for Batch Collation
from torch.nn.utils.rnn import pad_sequence

def collate_fn(batch):
    input_ids = [item[0]["input_ids"].squeeze(0) for item in batch]
    pixel_values = [item[0]["pixel_values"].squeeze(0) for item in batch]
    attention_mask = [item[0]["attention_mask"].squeeze(0) for item in batch]
    answers = [item[1] for item in batch]
    binary_variables = [item[2] for item in batch]

    input_ids_padded = pad_sequence(input_ids, batch_first=True)
    attention_mask_padded = pad_sequence(attention_mask, batch_first=True)

    binary_variables_tensor = torch.stack(binary_variables)

    return {"input_ids": input_ids_padded, "pixel_values": torch.stack(pixel_values), "attention_mask": attention_mask_padded, "binary_variables": binary_variables_tensor}, answers

# Dataloaders
train_dataloader = DataLoader(train_data, shuffle=True, batch_size=8, collate_fn=collate_fn)
val_dataloader = DataLoader(val_data, batch_size=8, collate_fn=collate_fn)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/251 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/320 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



# **Model Architecture with Dropout Layer**

In [5]:
# Defining Model Class with Dropout Layer
class ViltForQuestionAnsweringWithBinary(nn.Module):
    # Add num_binary_features
    def __init__(self, model_name, num_labels, num_binary_features):
        super(ViltForQuestionAnsweringWithBinary, self).__init__()
        self.model = ViltForQuestionAnswering.from_pretrained(model_name)
        self.dropout = nn.Dropout(p=0.1)
        # Updated classifier to incorporate binary features
        self.classifier = nn.Linear(self.model.config.hidden_size + num_binary_features, num_labels)

    def forward(self, input_ids, pixel_values, attention_mask, binary_variables):
        outputs = self.model.vilt(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask)
        pooled_output = outputs[0][:, 0]

        # Concatenate pooled output with binary variables
        combined_output = torch.cat((pooled_output, binary_variables), dim = 1)

        combined_output = self.dropout(combined_output)
        logits = self.classifier(combined_output)
        return logits

# **Class Weight Calculation and Model Training Setup**

In [6]:
# Compute Class Weights
unique_answers = df['answer'].unique()
answer_to_idx = {answer: idx for idx, answer in enumerate(unique_answers)}
class_weights = compute_class_weight(class_weight='balanced', classes=np.array(list(answer_to_idx.values())), y=df['answer'].map(answer_to_idx).values)
weights = torch.tensor(class_weights, dtype=torch.float).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Initializing Model and Moving to GPU
model = ViltForQuestionAnsweringWithBinary("dandelin/vilt-b32-finetuned-vqa", num_labels=len(df['answer'].unique()), num_binary_features=len(binary_column_names))
model = model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Loss Function, Optimizer, and Learning Rate Scheduler
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
scaler = GradScaler()
lr_scheduler = get_scheduler(name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=20*len(train_dataloader))

config.json:   0%|          | 0.00/136k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/470M [00:00<?, ?B/s]

# **Train Test Model Training Loop**

In [12]:
# Training Loop with Validation
num_epochs = 20
progress_bar = tqdm(range(num_epochs * len(train_dataloader)))

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    # Training phase
    for batch in train_dataloader:
        encoding, answers = batch
        input_ids = encoding['input_ids'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
        pixel_values = encoding['pixel_values'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
        attention_mask = encoding['attention_mask'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
        labels = torch.tensor([answer_to_idx[ans] for ans in answers], device=torch.device("cuda" if torch.cuda.is_available() else "cpu")).long()

        binary_variables = encoding['binary_variables'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

        with autocast(device_type='cuda'):
            outputs = model(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask, binary_variables=binary_variables)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        running_loss += loss.item()
        progress_bar.update(1)

    avg_train_loss = running_loss / len(train_dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}")

    # Validation phase
    model.eval()  # Set model to evaluation mode
    val_running_loss = 0.0

    with torch.no_grad():  # No need to compute gradients during validation
        for batch in val_dataloader:
            encoding, answers = batch
            input_ids = encoding['input_ids'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
            pixel_values = encoding['pixel_values'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
            attention_mask = encoding['attention_mask'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
            labels = torch.tensor([answer_to_idx[ans] for ans in answers], device=torch.device("cuda" if torch.cuda.is_available() else "cpu")).long()

            binary_variables = encoding['binary_variables'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

            outputs = model(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask, binary_variables=binary_variables)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item()

    avg_val_loss = val_running_loss / len(val_dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}")



  0%|          | 0/76460 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
#Save Model Function
def save_model(model, path="/content/drive/MyDrive/iLab/vilt_skincap_model_with_binary_split_single_answer_9_annot_train_val.pth"):
   torch.save(model.state_dict(), path)
   print(f"Model saved at {path}")

save_model(model)

NameError: name 'model' is not defined

In [None]:
#Saving optimizer and other states along with the model
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'epoch': epoch,
    'loss': loss,
}, '/content/drive/MyDrive/iLab/single_answer_model_checkpoint_9_annot_train_val.pth')

# Loading the checkpoint later
#checkpoint = torch.load('model_checkpoint.pth')
#model.load_state_dict(checkpoint['model_state_dict'])

# **Model Training Loop**

In [13]:
#Save Model Function
def save_model(model, path):
   torch.save(model.state_dict(), path)
   print(f"Model saved at {path}")

# Training Loop
num_epochs = 20
progress_bar = tqdm(range(num_epochs * len(train_dataloader)))

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch in train_dataloader:
        encoding, answers = batch
        input_ids = encoding['input_ids'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
        pixel_values = encoding['pixel_values'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
        attention_mask = encoding['attention_mask'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
        labels = torch.tensor([answer_to_idx[ans] for ans in answers], device=torch.device("cuda" if torch.cuda.is_available() else "cpu")).long()


        binary_variables = encoding['binary_variables'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

        with autocast(device_type='cuda'):
            outputs = model(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask, binary_variables=binary_variables)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        running_loss += loss.item()
        progress_bar.update(1)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataloader):.4f}")

    if epoch > 5 and (epoch+1) % 5 == 0:
        pth = f"/content/drive/MyDrive/iLab/vilt_skincap_model_with_binary_split_single_answer_10_Annot_Epoch_{epoch+1}.pth"
        save_model(model, pth)



  0%|          | 0/76460 [00:00<?, ?it/s]

Epoch [1/20], Loss: 7.7339
Epoch [2/20], Loss: 7.0349
Epoch [3/20], Loss: 6.4435
Epoch [4/20], Loss: 5.9671
Epoch [5/20], Loss: 5.4791
Epoch [6/20], Loss: 5.0747
Epoch [7/20], Loss: 4.6730
Epoch [8/20], Loss: 4.2851
Epoch [9/20], Loss: 3.9129
Epoch [10/20], Loss: 3.5529
Model saved at /content/drive/MyDrive/iLab/vilt_skincap_model_with_binary_split_single_answer_10_Annot_Epoch_10.pth
Epoch [11/20], Loss: 3.1853
Epoch [12/20], Loss: 2.8330
Epoch [13/20], Loss: 2.4806
Epoch [14/20], Loss: 2.1814
Epoch [15/20], Loss: 1.8687
Model saved at /content/drive/MyDrive/iLab/vilt_skincap_model_with_binary_split_single_answer_10_Annot_Epoch_15.pth
Epoch [16/20], Loss: 1.6103
Epoch [17/20], Loss: 1.3570
Epoch [18/20], Loss: 1.1075
Epoch [19/20], Loss: 0.9222
Epoch [20/20], Loss: 0.7450
Model saved at /content/drive/MyDrive/iLab/vilt_skincap_model_with_binary_split_single_answer_10_Annot_Epoch_20.pth


In [None]:
#Save Model Function
def save_model(model, path="/content/drive/MyDrive/iLab/vilt_skincap_model_with_binary_split_single_answer_9_annot.pth"):
   torch.save(model.state_dict(), path)
   print(f"Model saved at {path}")

save_model(model)

In [None]:
#Saving optimizer and other states along with the model
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'epoch': epoch,
    'loss': loss,
}, '/content/drive/MyDrive/iLab/single_answer_model_checkpoint_9_annot.pth')

# Loading the checkpoint later
#checkpoint = torch.load('model_checkpoint.pth')
#model.load_state_dict(checkpoint['model_state_dict'])

# **Validation with BERTScore**

In [26]:
#Assume the model architecture is the same as the one you're saving
model = ViltForQuestionAnsweringWithBinary("dandelin/vilt-b32-finetuned-vqa", num_labels=len(df['answer'].unique()), num_binary_features=len(binary_column_names))
model = model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

In [27]:
# Loading the checkpoint later
checkpoint = torch.load('/content/drive/MyDrive/iLab/vilt_skincap_model_with_binary_split_single_answer_10_Annot_Epoch_20.pth')
model.load_state_dict(checkpoint)

  checkpoint = torch.load('/content/drive/MyDrive/iLab/vilt_skincap_model_with_binary_split_single_answer_10_Annot_Epoch_20.pth')


<All keys matched successfully>

In [20]:
# Function to test a single image, question, and binary variable combination
def test_single_example(model, processor, test_df, idx):

    sample = test_df.iloc[idx]
    image_path = sample['image_path']
    question = sample['question']
    binary_variables = torch.tensor(sample['binary_variables'], device=torch.device("cuda" if torch.cuda.is_available() else "cpu")).unsqueeze(0)

    image = load_image(image_path)

    print(question)
    print(sample['answer'])

    encoding = processor(image, question, return_tensors="pt", padding=True)
    input_ids = encoding['input_ids'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    pixel_values = encoding['pixel_values'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    attention_mask = encoding['attention_mask'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    model.eval()
    with torch.no_grad():
        logits = model(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask, binary_variables=binary_variables)

    predicted_class_idx = logits.argmax(dim=-1).item()

    predicted_answer = list(answer_to_idx.keys())[list(answer_to_idx.values()).index(predicted_class_idx)]

    return predicted_answer



In [10]:
idx = 15
predicted_answer = test_single_example(model, processor, test_df, idx)
print(f"Predicted Answer: {predicted_answer}")

Which skin condition is observed in the image?
Seborrheic Keratosis
Predicted Answer: Melanocytic Nevi


In [11]:
!pip install bert_score
!pip install nltk
!pip install pycocoevalcap
from bert_score import score
import nltk
from bert_score import score
from pycocoevalcap.meteor.meteor import Meteor
from pycocoevalcap.cider.cider import Cider

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13
Collecting pycocoevalcap
  Downloading pycocoevalcap-1.2-py3-none-any.whl.metadata (3.2 kB)
Downloading pycocoevalcap-1.2-py3-none-any.whl (104.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.3/104.3 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pycocoevalcap
Successfully installed pycocoevalcap-1.2


In [12]:
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [21]:
idx_to_answer = {v: k for k, v in answer_to_idx.items()}

In [28]:
# Evaluating the Model with BERTScore, METEOR, and CIDEr
model.eval()

all_predicted_answers = []
all_ground_truth_answers = []

for batch in val_dataloader:
    encoding, answers = batch
    input_ids = encoding['input_ids'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    pixel_values = encoding['pixel_values'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    attention_mask = encoding['attention_mask'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    binary_variables = encoding['binary_variables'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    #print(binary_variables)
    # Get model predictions
    outputs = model(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask, binary_variables=binary_variables)
    predicted = outputs.argmax(-1)

    # Convert predicted indices back to text (answers)
    predicted_answers = [idx_to_answer[pred.item()] for pred in predicted]

    # Add predicted and ground truth answers to lists
    all_predicted_answers.extend(predicted_answers)
    all_ground_truth_answers.extend(answers)

# Compute BERTScore
P, R, F1 = score(all_predicted_answers, all_ground_truth_answers, lang="en", rescale_with_baseline=True)
average_f1 = F1.mean().item() * 100
print(f"Validation BERTScore F1: {average_f1:.2f}%")

# Tokenization function for METEOR
def tokenize(text):
    return nltk.word_tokenize(text.lower())

# Compute METEOR score using nltk
meteor = nltk.translate.meteor_score.meteor_score
meteor_scores = [meteor([tokenize(gt)], tokenize(pred)) for gt, pred in zip(all_ground_truth_answers, all_predicted_answers)]
average_meteor = sum(meteor_scores) / len(meteor_scores) * 100
print(f"Validation METEOR Score: {average_meteor:.2f}%")

# Prepare for CIDEr calculation (requires pycocoevalcap library)
cider_scorer = Cider()
cider_scores, _ = cider_scorer.compute_score(
    {i: [gt] for i, gt in enumerate(all_ground_truth_answers)},
    {i: [pred] for i, pred in enumerate(all_predicted_answers)}
)
print(f"Validation CIDEr Score: {cider_scores:.2f}")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation BERTScore F1: 67.54%
Validation METEOR Score: 30.14%
Validation CIDEr Score: 1.48


In [15]:
predicted_answers_output = pd.DataFrame(columns=["Predicted Answer","Actual Answer"])
predicted_answers_output['Predicted Answer'] = all_predicted_answers
predicted_answers_output['Actual Answer'] = all_ground_truth_answers
predicted_answers_output.to_csv('/content/drive/MyDrive/iLab/Single_Answer_predicted_answers_output_9_annot.csv')

In [29]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from sklearn.metrics import f1_score
import numpy as np

# Download necessary NLTK resources
nltk.download('punkt')

# Initialize BLEU smoothing function (to handle short sentences)
smooth_fn = SmoothingFunction().method1

# Initialize variables for scores
total_exact_matches = 0
all_bleu_scores = []
all_f1_scores = []

# Iterate over the predicted and ground-truth answers
for pred_answer, true_answer in zip(all_predicted_answers, all_ground_truth_answers):

    # 1. Accuracy (Exact Match)
    if pred_answer == true_answer:
        total_exact_matches += 1

    # Tokenize predicted and true answers for BLEU and F1
    pred_tokens = nltk.word_tokenize(pred_answer)
    true_tokens = nltk.word_tokenize(true_answer)

    # 2. BLEU Score (we'll use BLEU-1 and BLEU-2)
    bleu_1 = sentence_bleu([true_tokens], pred_tokens, weights=(1, 0, 0, 0), smoothing_function=smooth_fn)
    bleu_2 = sentence_bleu([true_tokens], pred_tokens, weights=(0.5, 0.5, 0, 0), smoothing_function=smooth_fn)

    all_bleu_scores.append((bleu_1, bleu_2))

    # 3. F1 Score (Token-level matching)
    # Convert tokens to sets for calculating precision and recall
    pred_set = set(pred_tokens)
    true_set = set(true_tokens)

    common_tokens = pred_set & true_set

    if len(pred_set) == 0 or len(true_set) == 0:
        f1 = 0
    else:
        precision = len(common_tokens) / len(pred_set)
        recall = len(common_tokens) / len(true_set)
        if precision + recall == 0:
            f1 = 0
        else:
            f1 = 2 * (precision * recall) / (precision + recall)

    all_f1_scores.append(f1)

# Calculate final metrics

# Accuracy
accuracy = total_exact_matches / len(all_predicted_answers)

# BLEU Score (Average BLEU-1 and BLEU-2)
average_bleu_1 = np.mean([score[0] for score in all_bleu_scores])
average_bleu_2 = np.mean([score[1] for score in all_bleu_scores])

# F1 Score (Average)
average_f1 = np.mean(all_f1_scores)

# Output results
print(f"Exact Match Accuracy: {accuracy * 100:.2f}%")
print(f"Average BLEU-1 Score: {average_bleu_1:.4f}")
print(f"Average BLEU-2 Score: {average_bleu_2:.4f}")
print(f"Average F1 Score: {average_f1:.4f}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Exact Match Accuracy: 54.42%
Average BLEU-1 Score: 0.5532
Average BLEU-2 Score: 0.1952
Average F1 Score: 0.5556


# **Validation**

In [None]:
# Evaluating the Model
metric = load("accuracy")

model.eval()
correct = 0
total = 0
for batch in val_dataloader:
    encoding, answers = batch
    input_ids = encoding['input_ids'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    pixel_values = encoding['pixel_values'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    attention_mask = encoding['attention_mask'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    labels = torch.tensor([answer_to_idx[ans] for ans in answers], device=torch.device("cuda" if torch.cuda.is_available() else "cpu")).long()

    binary_variables = encoding['binary_variables'].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    outputs = model(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask, binary_variables=binary_variables)
    predicted = outputs.argmax(-1)
    correct += (predicted == labels).sum().item()
    total += labels.size(0)

accuracy = correct / total
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

# Prediction with Uncertainty **Estimation**

In [None]:
# Function to Predict with Uncertainty
def predict_with_uncertainty(image, question, num_samples=100):
    image = image.convert("RGB")
    image = image.resize((384, 384))

    model.train()
    encoding = processor(image, question, return_tensors="pt").to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    logits_list = []
    for _ in range(num_samples):
        outputs = model(input_ids=encoding['input_ids'], pixel_values=encoding['pixel_values'], attention_mask=encoding['attention_mask'])
        logits_list.append(outputs)

    model.eval()
    logits = torch.stack(logits_list)
    mean_logits = logits.mean(dim=0)
    std_logits = logits.std(dim=0)

    predicted_answer_idx = mean_logits.argmax(-1).item()
    uncertainty = std_logits.mean().item()

    return unique_answers[predicted_answer_idx], uncertainty

# **Model Saving**

In [None]:
# Save Model Function
def save_model(model, path="/content/drive/MyDrive/iLab/vilt_skincap_model_with_binary.pth"):
    torch.save(model.state_dict(), path)
    print(f"Model saved at {path}")

save_model(model)

Model saved at /content/drive/MyDrive/iLab/vilt_skincap_model.pth


# **Integration with OpenAI for Detailed Descriptions**

In [None]:
# Import OpenAI
import openai
import gradio as gr

openai.api_key = "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"

# Fetching Detailed Explanation from OpenAI
def get_detailed_answer(predicted_answer):
    prompt = f"Please provide a vivid and detailed explanation about the skin condition '{predicted_answer}'."

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful medical assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=400,
            temperature=0.7
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        return f"Error: {str(e)}"

# **Gradio Interface**

In [None]:
# Gradio Interface for Skintelligence

def predict(image, question):
    predicted_answer, uncertainty = predict_with_uncertainty(image, question)

    if uncertainty > 1.5:
        return f"Skintelligence Predicted Answer: {predicted_answer}, but the model is uncertain.", ""

    #detailed_answer = get_detailed_answer(predicted_answer)

    return f"Skintelligence Predicted Answer: {predicted_answer}"#, f"Vivid Description: {detailed_answer}"

# CSS Styling for Gradio Interface
css = """
body {
    background: linear-gradient(to right, #1a1f36, #283c86);
    color: #ffffff;
    font-family: 'Roboto', sans-serif;
}

.gradio-container {
    background-color: rgba(255, 255, 255, 0.1);
    border-radius: 15px;
    padding: 30px;
    box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
}

.gradio-title {
    font-family: 'Poppins', sans-serif;
    font-size: 4em;
    text-align: center;
    color: #00acc1;
    margin-top: 100px;
}

.gradio-description {
    font-family: 'Lato', sans-serif;
    font-size: 1.5em;
    text-align: center;
    margin-bottom: 50px;
    color: #cfd8dc;
    max-width: 900px;
    margin-left: auto;
    margin-right: auto;
}

.gradio-inputs, .gradio-outputs {
    margin-top: 30px;
    border-top: 2px solid #00acc1;
    padding-top: 20px;
}

.gradio-button {
    background-color: #00acc1;
    color: #ffffff;
    font-size: 1.3em;
    padding: 12px 30px;
    border-radius: 8px;
    transition: box-shadow 0.3s ease;
    display: block;
    margin: 40px auto;
}

.gradio-button:hover {
    box-shadow: 0 0 20px rgba(0, 172, 193, 0.8);
}

.gradio-clear-button {
    background-color: #ff6f61;
    color: #ffffff;
    font-size: 1.1em;
    padding: 10px 25px;
    border-radius: 8px;
    transition: box-shadow 0.3s ease;
    display: block;
    margin: 20px auto;
}

.gradio-clear-button:hover {
    box-shadow: 0 0 20px rgba(255, 111, 97, 0.8);
}

.gradio-image-box {
    border: 2px solid #00acc1;
    border-radius: 12px;
    transition: border-color 0.3s ease;
}

.gradio-image-box:hover {
    border-color: #00acc1;
}

.gradio-textbox {
    font-size: 1.1em;
    padding: 15px;
    background-color: #283c86;
    border-radius: 10px;
    color: #ffffff;
    border: 1px solid #00acc1;
}

.gradio-outputs textarea {
    font-size: 1.2em;
    line-height: 1.6;
    background-color: #1a1f36;
    color: #ffffff;
    border: 1px solid #00acc1;
    padding: 20px;
    border-radius: 10px;
}
"""

# Launching Gradio Interface
with gr.Blocks(css=css) as demo:

    with gr.Column():
        gr.Markdown("""
        <div style="text-align: center;">
            <h1 style="font-size: 5em; color: #00acc1; font-family: 'Poppins', sans-serif;">
                Welcome to Skintelligence
            </h1>
            <p style="font-size: 1.8em; color: #cfd8dc; font-family: 'Roboto', sans-serif; max-width: 900px; margin: 0 auto;">
                The future of dermatology is here! Upload an image of any skin condition, ask your question, and let our cutting-edge AI analyze and provide a smart, intuitive diagnosis with a vivid explanation. Revolutionizing skin health, one scan at a time.
            </p>
        </div>
        """)
        start_button = gr.Button("Start Your Diagnosis")

    with gr.Row(visible=False) as interface_row:
        with gr.Column():
            image_input = gr.Image(type="pil", label="Upload Skin Image", elem_id="gradio-image-box")
            question_input = gr.Textbox(lines=2, placeholder="Ask a question about the skin condition", label="Your Question", elem_id="gradio-textbox")
            submit_button = gr.Button("Get Diagnosis", elem_id="gradio-button")
            clear_button = gr.Button("Clear", elem_id="gradio-clear-button")

        with gr.Column():
            output_predicted = gr.Textbox(label="Skintelligence Predicted Answer", elem_id="gradio-outputs")
            output_vivid = gr.Textbox(label="Vivid Description", elem_id="gradio-outputs")

        submit_button.click(predict, inputs=[image_input, question_input], outputs=[output_predicted, output_vivid])

        clear_button.click(lambda: (None, "", "", ""), inputs=[], outputs=[image_input, question_input, output_predicted, output_vivid])

    start_button.click(lambda: gr.update(visible=False), outputs=[start_button])
    start_button.click(lambda: gr.update(visible=True), outputs=[interface_row])

demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://6eb069041aeceaa9cf.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


