In [None]:
import torch
from PIL import Image
import requests
from lavis.models import load_model_and_preprocess
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/LAVIS/assets/merlion.png' 
#raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
raw_image = Image.open("C:\\Users\\dasdi\\Desktop\\5.jpeg").convert('RGB')
display(raw_image.resize((596, 437)))

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"

In [None]:
import torch
import os
from torch.cuda.amp import autocast, GradScaler
from accelerate import Accelerator

torch.cuda.empty_cache()

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'

accelerator = Accelerator(cpu=True)

def clear_memory():
    torch.cuda.empty_cache()
    torch.cuda.synchronize()

clear_memory()

scaler = GradScaler()

try:
    with autocast():
        model, vis_processors, _ = load_model_and_preprocess(
            name="blip2_t5", model_type="caption_coco_flant5xl", is_eval=True, device="cpu"  # Load on CPU first
        )
    
    if hasattr(model, 'gradient_checkpointing_enable'):
        model.gradient_checkpointing_enable()
    model = accelerator.prepare(model)

except RuntimeError as e:
    print(f"Error during model loading: {e}")
    clear_memory()

In [None]:
print(model.state_dict().keys())

In [None]:
import pickle
def save_model(model, vis_processors, model_path, components_path):
    torch.save(model.state_dict(), model_path)
    
    components = {
        'vis_processors': vis_processors
    }
    with open(components_path, "wb") as f:
        pickle.dump(components, f)
save_model(model, vis_processors, "model_state_dict_caption_coco_flant5xl.pth", "components_caption_coco_flant5xl.pkl")


In [None]:
model.load_state_dict(torch.load("model_state_dict_caption_coco_flant5xl.pth", map_location=device))

In [None]:
import torch
import os
from torch.cuda.amp import autocast, GradScaler
from accelerate import Accelerator
from lavis.models.blip2_models.blip2_t5 import Blip2T5
import pickle

torch.cuda.empty_cache()

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'

accelerator = Accelerator(cpu=True)

def clear_memory():
    torch.cuda.empty_cache()
    torch.cuda.synchronize()

clear_memory()

scaler = GradScaler()

def quantize_model(model):
    """
    Apply dynamic quantization to the model.
    """
    try:
        with autocast():
            model, vis_processors, _ = load_model_and_preprocess(
                name="blip2_t5", model_type="caption_coco_flant5xl", is_eval=True, device="cpu"  # Load on CPU first
            )
        model = accelerator.prepare(model)

    except RuntimeError as e:
        print(f"Error during model loading: {e}")
        clear_memory()
    model.load_state_dict(torch.load("model_state_dict_caption_coco_flant5xl.pth", map_location=device))
    model = torch.quantization.quantize_dynamic(
        model, {torch.nn.Linear}, dtype=torch.qint8
    )
        
    model.to(device)
    model.eval()
    return model
def save_model(model, vis_processors, model_path, components_path):
    torch.save(model.state_dict(), model_path)
    
    components = {
        'vis_processors': vis_processors
    }
    with open(components_path, "wb") as f:
        pickle.dump(components, f)
quantized_model = quantize_model(model)
save_model(quantized_model, vis_processors, "model_state_dict_caption_coco_flant5xl_quantized.pth", "components_caption_coco_flant5xl_quantized.pkl")


In [None]:
import torch
from torchvision import transforms
from PIL import Image
from lavis.models.blip2_models.blip2_t5 import Blip2T5

def load_model(model_path, components_path, device):
    try:
        with autocast():
            model, vis_processors, _ = load_model_and_preprocess(
                name="blip2_t5", model_type="caption_coco_flant5xl", is_eval=True, device="cpu"  # Load on CPU first
            )
        model = accelerator.prepare(model)
    except RuntimeError as e:
        print(f"Error during model loading: {e}")
        clear_memory()
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    with open(components_path, 'rb') as f:
        components = pickle.load(f)
    return model, components


In [None]:
print(state_dict.keys())


In [None]:
model = Blip2T5(img_size=224)  # Ensure the img_size matches what was used during training

In [None]:
import torch.nn as nn
model = nn.Module()

In [None]:
model.load_state_dict(torch.load("model_state_dict_caption_coco_flant5xl.pth", map_location=device))

In [None]:
base_model_state_dict_path = "base_model_state_dict.pth"
torch.save(model.state_dict(), base_model_state_dict_path)

In [None]:
from lavis.models.blip2_models import Blip2T5

In [None]:
model = blip2(pretrained=False, image_size=384, vision_width=64, vision_layers=12, vision_patch_size=16,
                  embed_dim=768, depth=12, num_heads=12, caption_max_length=30, text_vocab_size=10000)


In [None]:
from lavis.models.blip2_models.blip2_t5 import Blip2T5

In [None]:
def initialize_quantized_blip2_model():
    # Initialize the model with the configuration used during quantization
    model = Blip2T5(
        vit_model="eva_clip_g",  # specify the correct Vision Transformer model
        img_size=384,            # the input image size
        drop_path_rate=0,        # dropout path rate, if used during quantization
        use_grad_checkpoint=False,
        vit_precision="fp16",    # precision, this should match your quantization settings
        freeze_vit=True,         # whether to freeze the Vision Transformer
        num_query_token=32,      # number of query tokens
        t5_model="google/flan-t5-xl",  # specify the correct T5 model
        prompt="",               # prompt used during training
        max_txt_len=32,          # maximum text length
        apply_lemmatizer=False   # any post-processing, usually for NLP tasks
    )
    model.eval()  # Ensure the model is in evaluation mode
    return model