In [1]:
# استيراد المكتبات اللازمة
# استيراد المكتبات اللازمة
import os
import torch
import numpy as np
from datasets import load_dataset
from PIL import Image
import requests
from io import BytesIO
from transformers import ViltProcessor, ViltForQuestionAnswering, AutoProcessor
from transformers import TrainingArguments, Trainer
import json
import pandas as pd
from dataclasses import dataclass
from typing import Dict, List, Optional, Union
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import random


In [2]:
# تعيين البذرة العشوائية لضمان إمكانية تكرار النتائج
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
set_seed(42)

# تعيين الأجهزة
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:

# استخدام نموذج أكثر تقدمًا لتحسين الأداء
model_name = "dandelin/vilt-b32-mlm"  # نموذج أساسي أفضل

# تنزيل المعالج المناسب
processor = AutoProcessor.from_pretrained(model_name)


preprocessor_config.json:   0%|          | 0.00/251 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/320 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/653 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [4]:
# تحميل بيانات JSON
json_path = "/kaggle/input/vizwiz/Annotations/Annotations/train.json"
image_folder = "/kaggle/input/vizwiz/train/train/"

In [5]:
# تحميل البيانات وتنظيفها
print("Loading dataset from JSON...")
try:
    train_dataset = load_dataset("json", data_files=json_path)["train"]
    print(f"Dataset loaded successfully with {len(train_dataset)} examples")
    
    # تنظيف البيانات - إزالة الأمثلة ذات الإجابات الفارغة أو unanswerable
    def is_valid_example(example):
        if "answers" not in example or not example["answers"]:
            return False
            
        if isinstance(example["answers"], list):
            for answer in example["answers"]:
                answer_text = answer.get("answer", "") if isinstance(answer, dict) else answer
                if answer_text and answer_text != "unanswerable":
                    return True
        return False
    
    train_dataset = train_dataset.filter(is_valid_example)
    print(f"Dataset cleaned: {len(train_dataset)} valid examples remaining")
    
except Exception as e:
    print(f"Error loading dataset: {e}")
    raise

# تعزيز البيانات (Data Augmentation)
def augment_example(example):
    """تطبيق تقنيات تعزيز البيانات على الأمثلة"""
    # نسخة طبق الأصل من المثال
    return example

# تقسيم البيانات مع تحسين توزيع الفئات
print("Splitting dataset...")
train_val_split = train_dataset.train_test_split(test_size=0.15, seed=42)
train_data = train_val_split["train"]
val_data = train_val_split["test"]

# تحليل توزيع الإجابات
print("Analyzing answer distribution...")
answer_counts = {}
for example in train_data:
    if "answers" in example and example["answers"]:
        for answer in example["answers"]:
            answer_text = answer.get("answer", "") if isinstance(answer, dict) else answer
            if answer_text and answer_text != "unanswerable":
                answer_counts[answer_text] = answer_counts.get(answer_text, 0) + 1

# ترتيب الإجابات حسب تكرارها
sorted_answers = sorted(answer_counts.items(), key=lambda x: x[1], reverse=True)
print(f"Top 10 answers: {sorted_answers[:10]}")

# اختيار الإجابات الأكثر شيوعًا فقط (للحد من الفئات وتحسين الدقة)
min_answer_freq = 3  # الحد الأدنى للتكرار
answer_list = [answer for answer, count in sorted_answers if count >= min_answer_freq]
print(f"Using {len(answer_list)} answers that appear at least {min_answer_freq} times")

# إضافة فئة "أخرى" للإجابات النادرة
answer_list.append("other")

# إنشاء قاموس الإجابات
answer2id = {answer: idx for idx, answer in enumerate(answer_list)}
id2answer = {idx: answer for answer, idx in answer2id.items()}

# تحميل النموذج الأساسي
model = ViltForQuestionAnswering.from_pretrained(model_name)
model.config.id2label = id2answer
model.config.label2id = answer2id
# إعادة تهيئة المصنف بعدد الفئات الجديد
classifier_dropout = 0.1  # إضافة dropout لتقليل الـ overfitting
hidden_size = model.config.hidden_size
num_labels = len(answer2id)

Loading dataset from JSON...


Generating train split: 0 examples [00:00, ? examples/s]

Dataset loaded successfully with 20523 examples


Filter:   0%|          | 0/20523 [00:00<?, ? examples/s]

Dataset cleaned: 20484 valid examples remaining
Splitting dataset...
Analyzing answer distribution...
Top 10 answers: [('unsuitable', 21493), ('no', 4511), ('yes', 3959), ('white', 2063), ('grey', 1923), ('black', 1713), ('blue', 1562), ('red', 1011), ('pink', 721), ('brown', 703)]
Using 6523 answers that appear at least 3 times


pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of ViltForQuestionAnswering were not initialized from the model checkpoint at dandelin/vilt-b32-mlm and are newly initialized: ['classifier.0.bias', 'classifier.0.weight', 'classifier.1.bias', 'classifier.1.weight', 'classifier.3.bias', 'classifier.3.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
# تحسين المصنف باستخدام طبقات إضافية
class EnhancedClassifier(nn.Module):
    def __init__(self, hidden_size, num_labels, dropout_prob=0.1):
        super().__init__()
        self.dense1 = nn.Linear(hidden_size, hidden_size)
        self.dropout1 = nn.Dropout(dropout_prob)
        self.dense2 = nn.Linear(hidden_size, hidden_size // 2)
        self.dropout2 = nn.Dropout(dropout_prob)
        self.classifier = nn.Linear(hidden_size // 2, num_labels)
        
    def forward(self, x):
        x = self.dropout1(F.gelu(self.dense1(x)))
        x = self.dropout2(F.gelu(self.dense2(x)))
        return self.classifier(x)

# تطبيق المصنف المحسن
model.classifier = EnhancedClassifier(hidden_size, num_labels, classifier_dropout)
model.to(device)

ViltForQuestionAnswering(
  (vilt): ViltModel(
    (embeddings): ViltEmbeddings(
      (text_embeddings): TextEmbeddings(
        (word_embeddings): Embedding(30522, 768)
        (position_embeddings): Embedding(40, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (patch_embeddings): ViltPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32))
      )
      (token_type_embeddings): Embedding(2, 768)
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViltEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViltLayer(
          (attention): ViltAttention(
            (attention): ViltSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=76

In [7]:
# تحسين وظيفة تحميل الصور مع المعالجة المسبقة
def load_image(image_name):
    image_path = os.path.join(image_folder, image_name)
    if os.path.exists(image_path):
        try:
            img = Image.open(image_path).convert("RGB")
            
            # تطبيق تقنيات معالجة الصور المتقدمة
            img = img.resize((224, 224))
            
            # زيادة التباين قليلاً
            from PIL import ImageEnhance
            enhancer = ImageEnhance.Contrast(img)
            img = enhancer.enhance(1.1)
            
            return img
        except Exception as e:
            print(f"Error loading image {image_name}: {e}")
            return Image.new("RGB", (224, 224), color="white")
    else:
        print(f"⚠️ Warning: Image not found - {image_name}")
        return Image.new("RGB", (224, 224), color="white")


In [8]:
# تعريف معالج خاص محسن لتجميع البيانات
@dataclass
class CustomDataCollator:
    processor: any
    
    def __call__(self, features):
        if not features:
            return {}
            
        batch = {}
        
        # معالجة الحقول
        if "pixel_values" in features[0]:
            batch["pixel_values"] = torch.stack([f["pixel_values"] for f in features])
        
        for field in ["input_ids", "attention_mask", "token_type_ids"]:
            if field in features[0]:
                # استخدام padding أكثر كفاءة
                values = [f[field] for f in features]
                max_length = max(len(v) for v in values)
                
                padded_values = []
                for v in values:
                    padding = [0] * (max_length - len(v))
                    padded_values.append(v + padding)
                
                batch[field] = torch.tensor(padded_values)
        
        if "labels" in features[0]:
            batch["labels"] = torch.tensor([f["labels"] for f in features])
        
        return batch

# دالة معالجة بيانات محسنة
def preprocess_function(examples):
    valid_questions = []
    valid_images = []
    valid_labels = []
    
    for i in range(len(examples.get("question", []))):
        # التحقق من وجود البيانات المطلوبة
        if "image" not in examples or i >= len(examples["image"]) or "answers" not in examples or i >= len(examples["answers"]):
            continue
            
        image_name = examples["image"][i]
        img = load_image(image_name)
        
        # معالجة الإجابات
        has_valid_answer = False
        if isinstance(examples["answers"][i], list) and examples["answers"][i]:
            for answer in examples["answers"][i]:
                answer_text = answer.get("answer", "") if isinstance(answer, dict) else answer
                if answer_text and answer_text != "unanswerable":
                    if answer_text in answer2id:
                        valid_labels.append(answer2id[answer_text])
                    else:
                        # استخدام فئة "أخرى" للإجابات النادرة
                        valid_labels.append(answer2id["other"])
                    has_valid_answer = True
                    break
        
        if has_valid_answer:
            valid_questions.append(examples["question"][i])
            valid_images.append(img)
    
    # التحقق من وجود بيانات صالحة
    if not valid_questions:
        return {"input_ids": [], "attention_mask": [], "pixel_values": [], "labels": []}
    
    # معالجة البيانات
    try:
        encoding = processor(
            images=valid_images,
            text=valid_questions,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        
        # إضافة التسميات
        encoding["labels"] = valid_labels
        
        # تحويل التنسورات إلى قوائم
        result = {k: v.tolist() if isinstance(v, torch.Tensor) else v for k, v in encoding.items()}
        return result
    except Exception as e:
        print(f"Error in preprocessing: {e}")
        return {"input_ids": [], "attention_mask": [], "pixel_values": [], "labels": []}

# استخدام المزيد من البيانات للتدريب مع مراعاة حدود الذاكرة
print("Preparing datasets for training...")
train_sample_size = min(8000, len(train_data))  # زيادة حجم البيانات للتدريب
eval_sample_size = min(1200, len(val_data))

# اختيار العينات بطريقة موزونة لتحسين تمثيل الفئات النادرة
weighted_train_indices = []
answer_probabilities = {}

# حساب احتمالية اختيار كل إجابة بشكل عكسي مع تكرارها
total_answers = sum(answer_counts.values())
for answer, count in answer_counts.items():
    answer_probabilities[answer] = 1.0 / (count / total_answers)

# تطبيع الاحتمالات
max_prob = max(answer_probabilities.values())
for answer in answer_probabilities:
    answer_probabilities[answer] /= max_prob

# إنشاء قائمة بالمؤشرات الموزونة
for idx, example in enumerate(train_data):
    if idx >= len(train_data):
        break
    
    if "answers" in example and example["answers"]:
        for answer in example["answers"]:
            answer_text = answer.get("answer", "") if isinstance(answer, dict) else answer
            if answer_text in answer_probabilities:
                # إضافة المؤشر بناءً على الاحتمالية
                if random.random() < answer_probabilities[answer_text]:
                    weighted_train_indices.append(idx)
                    break

# التأكد من أن لدينا ما يكفي من البيانات
if len(weighted_train_indices) < train_sample_size:
    # إضافة مؤشرات عشوائية إضافية إذا لزم الأمر
    additional_indices = random.sample(
        [i for i in range(len(train_data)) if i not in weighted_train_indices],
        min(train_sample_size - len(weighted_train_indices), len(train_data) - len(weighted_train_indices))
    )
    weighted_train_indices.extend(additional_indices)

# التأكد من عدم تجاوز الحد الأقصى
weighted_train_indices = weighted_train_indices[:train_sample_size]
eval_indices = random.sample(range(len(val_data)), eval_sample_size)

small_train_dataset = train_data.select(weighted_train_indices)
small_eval_dataset = val_data.select(eval_indices)

print(f"Selected {len(small_train_dataset)} examples for training")
print(f"Selected {len(small_eval_dataset)} examples for evaluation")


Preparing datasets for training...
Selected 8000 examples for training
Selected 1200 examples for evaluation


In [9]:

print("Preprocessing training data...")
train_dataset = small_train_dataset.map(
    preprocess_function,
    batched=True,
    batch_size=4,  # تحسين حجم الدفعة
    remove_columns=train_data.column_names
)



Preprocessing training data...


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

In [10]:
print("Preprocessing evaluation data...")
eval_dataset = small_eval_dataset.map(
    preprocess_function,
    batched=True,
    batch_size=4,
    remove_columns=val_data.column_names
)

Preprocessing evaluation data...


Map:   0%|          | 0/1200 [00:00<?, ? examples/s]

In [11]:

# تنظيف البيانات
train_dataset = train_dataset.filter(lambda x: len(x["input_ids"]) > 0)
eval_dataset = eval_dataset.filter(lambda x: len(x["input_ids"]) > 0)

print(f"Preprocessed training examples: {len(train_dataset)}")
print(f"Preprocessed evaluation examples: {len(eval_dataset)}")

Filter:   0%|          | 0/8000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1200 [00:00<?, ? examples/s]

Preprocessed training examples: 8000
Preprocessed evaluation examples: 1200


In [12]:
!pip install evaluate
import evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [13]:

accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred

    # ✅ تحويل القيم الاحتمالية إلى التصنيفات المتوقعة
    predicted_classes = np.argmax(logits, axis=1)

    # ✅ تحويل `one-hot encoding` إلى أرقام صحيحة
    true_classes = np.argmax(labels, axis=1)

    # ✅ حساب الدقة
    acc = accuracy.compute(predictions=predicted_classes, references=true_classes)

    return {"accuracy": acc["accuracy"]}

class CustomDataCollator:
    def __init__(self, processor, num_classes=6524):  # حدد عدد الفئات
        self.processor = processor
        self.num_classes = num_classes

    def __call__(self, features):
        batch = {}

        batch["pixel_values"] = torch.stack(
            [torch.tensor(f["pixel_values"]) if isinstance(f["pixel_values"], list) else f["pixel_values"]
             for f in features]
        )

        batch["input_ids"] = torch.stack(
            [torch.tensor(f["input_ids"]) if isinstance(f["input_ids"], list) else f["input_ids"]
             for f in features]
        )

        batch["attention_mask"] = torch.stack(
            [torch.tensor(f["attention_mask"]) if isinstance(f["attention_mask"], list) else f["attention_mask"]
             for f in features]
        )

        # ✅ تحويل `labels` إلى One-Hot
        labels = [f["labels"] for f in features]
        labels_tensor = torch.zeros((len(labels), self.num_classes))  # إنشاء مصفوفة أصفار
        labels_tensor.scatter_(1, torch.tensor(labels).unsqueeze(1), 1)  # تحويل إلى One-Hot

        batch["labels"] = labels_tensor

        return batch



# إنشاء معالج تجميع البيانات
data_collator = CustomDataCollator(processor=processor)

# تعيين معلمات التدريب
training_args = TrainingArguments(
    output_dir="./vqa_finetuned_model",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    evaluation_strategy="steps",
    eval_steps=100,
    save_strategy="steps",
    save_steps=100,
    num_train_epochs=3,
    learning_rate=3e-5,
    weight_decay=0.01,
    fp16=True,
    gradient_accumulation_steps=4,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    report_to="none",
    remove_unused_columns=False,
    warmup_steps=200,
    dataloader_num_workers=2,
    lr_scheduler_type="cosine",
)

class MixupTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mixup_alpha = 0.2  # معامل المزج
        
    def training_step(self, model, inputs, num_items_in_batch):
        """تطبيق تقنية المزج على دفعة التدريب"""
        if self.mixup_alpha > 0 and "pixel_values" in inputs and "labels" in inputs:
            # تطبيق المزج بين الصور
            batch_size = inputs["pixel_values"].size(0)
            if batch_size > 1:  # نحتاج على الأقل صورتين للمزج
                # توليد معاملات المزج
                lam = np.random.beta(self.mixup_alpha, self.mixup_alpha, batch_size)
                lam = torch.from_numpy(lam).float().to(inputs["pixel_values"].device)
                lam = lam.view(-1, 1, 1, 1)
                
                # تشويش الفهارس
                index = torch.randperm(batch_size).to(inputs["pixel_values"].device)
                
                # مزج الصور
                mixed_pixel_values = lam * inputs["pixel_values"] + (1 - lam) * inputs["pixel_values"][index, :]
                inputs["pixel_values"] = mixed_pixel_values
                
                # لا نقوم بمزج التسميات، بل نستخدم التسمية الأصلية (هذا يكفي لمهمة VQA)
        
        # التأكد من وجود input_ids أو inputs_embeds
        if "input_ids" not in inputs and "inputs_embeds" not in inputs:
            raise ValueError("يجب توفير input_ids أو inputs_embeds للنموذج")
            
        # استدعاء دالة training_step الأصلية وتمرير جميع الوسائط المطلوبة
        return super().training_step(model, inputs, num_items_in_batch)



# إعداد المدرب
trainer = MixupTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
)

# ✅ تجميد الطبقات السفلى لتسريع التدريب
for name, param in model.vilt.named_parameters():
    if "encoder.layer.0" in name or "encoder.layer.1" in name or "encoder.layer.2" in name:
        param.requires_grad = False

# عرض المعلمات القابلة للتدريب
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Total parameters: 114,992,636
Trainable parameters: 79,553,276 (69.18%)




In [14]:
print("training ...")
trainer.train()

training ...




Step,Training Loss,Validation Loss,Accuracy
100,4348.1816,4297.930176,0.0
200,491.3007,254.207611,0.0
300,8.4464,6.272337,0.3475
400,7.5885,5.873587,0.3475
500,7.6896,5.678341,0.3475
600,7.0552,5.717703,0.3475
700,7.6287,5.682053,0.3475




TrainOutput(global_step=750, training_loss=953.8862706197103, metrics={'train_runtime': 6153.0314, 'train_samples_per_second': 3.901, 'train_steps_per_second': 0.122, 'total_flos': 527143380480000.0, 'train_loss': 953.8862706197103, 'epoch': 3.0})

In [15]:
# حفظ النموذج النهائي
print("Saving final model...")
model.save_pretrained("./vqa_finetuned_model_final")
processor.save_pretrained("./vqa_finetuned_model_final")
print("Model saved successfully!")

Saving final model...
Model saved successfully!


In [16]:
# كود محسن للتحقق من أداء النموذج
def test_model(image_name, question):
    img = load_image(image_name)
    inputs = processor(img, question, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    # الحصول على أفضل 3 إجابات محتملة
    logits = outputs.logits
    probs = F.softmax(logits, dim=-1)[0]
    top_probs, top_indices = probs.topk(3)
    
    results = []
    for prob, idx in zip(top_probs.cpu().numpy(), top_indices.cpu().numpy()):
        answer = model.config.id2label.get(idx, "غير معروف")
        results.append((answer, prob * 100))
    
    print(f"سؤال: {question}")
    print(f"الإجابة الأكثر احتمالا: {results[0][0]} (الثقة: {results[0][1]:.2f}%)")
    print(f"إجابات أخرى محتملة: {results[1][0]} ({results[1][1]:.2f}%), {results[2][0]} ({results[2][1]:.2f}%)")
    
    return results

In [19]:

# دالة للتحقق من دقة النموذج على مجموعة البيانات
def evaluate_model_accuracy(dataset, num_samples=100):
    if len(dataset) == 0:
        print("لا توجد بيانات للتقييم")
        return 0
    
    # اختيار عينة عشوائية للتقييم
    indices = random.sample(range(len(dataset)), min(num_samples, len(dataset)))
    correct = 0
    
    for idx in indices:
        example = dataset[idx]
        if "image" not in example or "question" not in example or "answers" not in example:
            continue
            
        img = load_image(example["image"])
        question = example["question"]
        
        # الحصول على الإجابة الصحيحة
        correct_answer = None
        if isinstance(example["answers"], list) and example["answers"]:
            answer = example["answers"][0]
            correct_answer = answer.get("answer", "") if isinstance(answer, dict) else answer
        
        if not correct_answer or correct_answer == "unanswerable":
            continue
            
        # التنبؤ بالإجابة
        inputs = processor(img, question, return_tensors="pt", truncation=True, padding="max_length", max_length=40)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model(**inputs)
        
        # الحصول على الإجابة المتوقعة
        logits = outputs.logits
        predicted_answer_id = logits.argmax(-1).item()
        predicted_answer = model.config.id2label.get(predicted_answer_id, "غير معروف")
        
        # مقارنة الإجابة المتوقعة بالصحيحة
        if predicted_answer.lower() == correct_answer.lower():
            correct += 1
    
    accuracy = correct / len(indices) * 100
    print(f"دقة النموذج على {len(indices)} عينة: {accuracy:.2f}%")
    return accuracy

In [20]:
# تقييم النموذج بعد التدريب
print("Evaluating final model...")
final_accuracy = evaluate_model_accuracy(val_data, num_samples=200)
print(f"Final model accuracy: {final_accuracy:.2f}%")

Evaluating final model...
دقة النموذج على 200 عينة: 4.00%
Final model accuracy: 4.00%


In [21]:
for i in range(5):
    sample = val_data[i]
    print(f"🔹 السؤال: {sample['question']}")
    print(f"✅ الإجابة الصحيحة: {sample['answers']}")
    print("-" * 50)


🔹 السؤال: what number is the needle pointing to?
✅ الإجابة الصحيحة: [{'answer': 'illegible', 'answer_confidence': 'no'}, {'answer': 'unsuitable', 'answer_confidence': 'yes'}, {'answer': 'unsuitable', 'answer_confidence': 'yes'}, {'answer': 'unsuitable', 'answer_confidence': 'yes'}, {'answer': 'unsuitable', 'answer_confidence': 'yes'}, {'answer': 'unsuitable', 'answer_confidence': 'yes'}, {'answer': 'unsuitable', 'answer_confidence': 'yes'}, {'answer': 'blurry', 'answer_confidence': 'no'}, {'answer': 'unsuitable', 'answer_confidence': 'yes'}, {'answer': 'unsuitable', 'answer_confidence': 'no'}]
--------------------------------------------------
🔹 السؤال: What color is the keyboard?
✅ الإجابة الصحيحة: [{'answer': 'black', 'answer_confidence': 'yes'}, {'answer': 'black', 'answer_confidence': 'yes'}, {'answer': 'black', 'answer_confidence': 'yes'}, {'answer': 'black', 'answer_confidence': 'yes'}, {'answer': 'black', 'answer_confidence': 'yes'}, {'answer': 'black', 'answer_confidence': 'yes

In [None]:
!zip -r vqa_finetuned_model_final.zip ./vqa_finetuned_model_final


In [None]:
from IPython.display import FileLink
FileLink(r'vqa_finetuned_model_final.zip')