In [None]:
# 目前的一些局限：
# 1、文本局限在77tokens，目前是固定的，因为CLIP的限制

In [2]:
from zipfile import ZipFile
import os
import torch
from PIL import Image
import open_clip
from docx import Document
from io import BytesIO
from llama_index.readers.file.docs import DocxReader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pymilvus import MilvusClient, FieldSchema, CollectionSchema, DataType, Collection

In [12]:
# 1. 提取文本
def extract_text_with_docxreader(docx_path):
    docx_reader = DocxReader()
    documents = docx_reader.load_data(docx_path)
    for i in range(len(documents)):
        documents[i] = documents[i].to_langchain_format()
    return documents  # 返回的是 List[Document]，可直接用于 LlamaIndex 等

# 2. 提取图片
def extract_images_as_pil(docx_path, output_folder="images"):
    images = []
    with ZipFile(docx_path, 'r') as docx_zip:
        image_files = [f for f in docx_zip.namelist() if f.startswith("word/media/")]
        # os.makedirs(output_folder, exist_ok=True)
        for image_file in image_files:
            image_data = docx_zip.read(image_file)
            image = Image.open(BytesIO(image_data))
            images.append(image)
            # image_filename = os.path.basename(image_file)
            # with open(os.path.join(output_folder, image_filename), "wb") as img_out:
            #     img_out.write(image_data)
    return images

In [13]:
docx_path = "/home/longquan/project/learning/LLM/docs/Spring Bulbs.docx"
output_dir = "/home/longquan/project/learning/LLM/images"

In [14]:
docs = extract_text_with_docxreader(docx_path)
images = extract_images_as_pil(docx_path)

In [15]:
docs[0].page_content

"It's Time To Plant Spring Bulbs\n\n\n\nSpectacular spring bulbs will brighten up your garden with their vibrant colour and delightful fragrance. Plant now in autumn to enjoy beautiful blooms once spring arrives. With a huge range of bulbs to choose from, you can create stunning displays in garden beds, borders, or pots.\n\nPlanting\n\nThe best time to plant spring bulbs is in autumn, but specific planting times vary depending on the variety. Check out our top varieties below for detailed tips, or ask a friendly team member instore if you’re unsure. Spring bulbs grow well in both garden beds and pots. When planting in the ground, ensure the soil is well-draining and nutrient rich. Mix in Kings Compost and pumice sand at planting time, or use a specialised mix like Tui Bulb Mix for best results. If your garden beds tend to get soggy during wetter months, plant your bulbs into a slight mound to improve drainage. For pots, choose a container large enough for the bulbs to grow into, and en

In [16]:
docs[0].metadata

{'file_name': 'Spring Bulbs.docx'}

In [17]:
images

[<PIL.PngImagePlugin.PngImageFile image mode=RGB size=749x422>]

In [18]:
textSplitter = RecursiveCharacterTextSplitter(
    chunk_size=1024, chunk_overlap=100
)

In [19]:
text_chunks = textSplitter.split_documents(docs)
text_chunks

[Document(metadata={'file_name': 'Spring Bulbs.docx'}, page_content="It's Time To Plant Spring Bulbs\n\n\n\nSpectacular spring bulbs will brighten up your garden with their vibrant colour and delightful fragrance. Plant now in autumn to enjoy beautiful blooms once spring arrives. With a huge range of bulbs to choose from, you can create stunning displays in garden beds, borders, or pots.\n\nPlanting"),
 Document(metadata={'file_name': 'Spring Bulbs.docx'}, page_content='Planting\n\nThe best time to plant spring bulbs is in autumn, but specific planting times vary depending on the variety. Check out our top varieties below for detailed tips, or ask a friendly team member instore if you’re unsure. Spring bulbs grow well in both garden beds and pots. When planting in the ground, ensure the soil is well-draining and nutrient rich. Mix in Kings Compost and pumice sand at planting time, or use a specialised mix like Tui Bulb Mix for best results. If your garden beds tend to get soggy during 

In [20]:
# open_clip.list_pretrained()
model, _, preprocess = open_clip.create_model_and_transforms(
    'ViT-B-32', 
    pretrained='/home/longquan/model/CLIP-ViT-B-32-laion2B-s34B-b79K/open_clip_pytorch_model.bin', 
    load_weights_only=True
)

In [21]:
model.eval()  # model in train mode by default, impacts some models with BatchNorm or stochastic depth active
tokenizer = open_clip.get_tokenizer('ViT-B-32')

In [22]:
def pil_2_vec(image_pil):
    image = preprocess(image_pil).unsqueeze(0)
    with torch.no_grad(), torch.autocast("cuda"):
        image_features = model.encode_image(image)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    return image_features

In [23]:
image_tensor = torch.cat(
    [pil_2_vec(image) for image in images], 
    dim=0
    )
image_tensor.shape

torch.Size([1, 512])

In [24]:
text = [i.page_content for i in text_chunks]
text

["It's Time To Plant Spring Bulbs\n\n\n\nSpectacular spring bulbs will brighten up your garden with their vibrant colour and delightful fragrance. Plant now in autumn to enjoy beautiful blooms once spring arrives. With a huge range of bulbs to choose from, you can create stunning displays in garden beds, borders, or pots.\n\nPlanting",
 'Planting\n\nThe best time to plant spring bulbs is in autumn, but specific planting times vary depending on the variety. Check out our top varieties below for detailed tips, or ask a friendly team member instore if you’re unsure. Spring bulbs grow well in both garden beds and pots. When planting in the ground, ensure the soil is well-draining and nutrient rich. Mix in Kings Compost and pumice sand at planting time, or use a specialised mix like Tui Bulb Mix for best results. If your garden beds tend to get soggy during wetter months, plant your bulbs into a slight mound to improve drainage. For pots, choose a container large enough for the bulbs to gro

In [25]:
text_token = tokenizer(text)
text_token.shape

torch.Size([2, 77])

In [26]:
def text_2_vec(text):
    with torch.no_grad(), torch.autocast("cuda"):
        text_features = model.encode_text(text)
    text_features /= text_features.norm(dim=-1, keepdim=True)
    return text_features

In [42]:
text_tensor = text_2_vec(text_token)
text_tensor.shape

torch.Size([2, 512])

In [None]:
connection_args = {
    "uri": "./milvus_demo.db"
}

In [None]:
# 你的向量（512维）
clip_text_vector = text_2_vec(text_token).tolist()  # List[float]
clip_image_vector = image_tensor.tolist()  # List[float]

vectors = [clip_text_vector, clip_image_vector]  # List[List[float]]
# metadatas = [
#     {"type": "text", "original": "A photo of a cat."},
#     {"type": "image", "path": "cat.jpg"}
# ]

In [3]:
connection_args = {'uri': './milvus_demo.db'}

vector_store = MilvusClient(**connection_args)

In [5]:
collection_name = "clip_collection"

In [7]:
if not vector_store.has_collection(collection_name):
    vector_store.create_collection(
            collection_name=collection_name,
            vector_field_name="vector",
            dimension=512,
            auto_id=True,
            enable_dynamic_field=True,
        )

In [43]:
# 准备数据
clip_image_vector = image_tensor.squeeze(0).tolist()  # List[float]
data_image = {
    "vector": clip_image_vector,  # 向量字段
    "type": "text",              # 类型字段
    "filename": "cat.jpg",    # 文件名字段(动态字段)
}

vector_store.insert(collection_name=collection_name, data=data_image)

{'insert_count': 1, 'ids': [457250782843502598], 'cost': 0}

In [48]:
clip_text_vector =  text_tensor  # torch.Size([2, 512])
for vector in clip_text_vector:
    data_text = {
        "vector": vector,  # 向量字段
        "type": "text",              # 类型字段
        "filename": "cat.jpg",    # 文件名字段(动态字段)
    }
    vector_store.insert(collection_name=collection_name, data=data_text)

In [52]:
vector_store

<pymilvus.milvus_client.milvus_client.MilvusClient at 0x7fc829539150>

In [7]:
a = Image.open("/home/longquan/project/learning/LLM/images/image1.png")

In [18]:
image = preprocess(Image.open("/home/longquan/project/learning/LLM/images/08fdd9b671b1cf624289cc0fd7837b83.jpeg")).unsqueeze(0)
text = tokenizer(["a sunflower", "a dog", "a flower"])

In [19]:
image.shape, text.shape

(torch.Size([1, 3, 224, 224]), torch.Size([3, 77]))

In [20]:
with torch.no_grad(), torch.autocast("cuda"):
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)

    text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)

print("Label probs:", text_probs)  # prints: [[1., 0., 0.]]

Label probs: tensor([[9.9093e-01, 1.8791e-07, 9.0733e-03]])


In [23]:
image_features.shape, text_features.shape

(torch.Size([1, 512]), torch.Size([3, 512]))