In [1]:
import pandas as pd 
from PIL import Image 
import matplotlib.pyplot as plt 
import os 
import json 


with open("./data/query.json", "r") as f:
    query = json.load(f)
    df_query = pd.DataFrame(query)
df_train = pd.read_csv("./data/train_annotation.csv", sep="\t")
print(f"Shape of df_query: {df_query.shape}")
display(df_query.head(2))
print(f"Shape of df_train: {df_train.shape}")
display(df_train.head(2))

Shape of df_query: (1497, 3)


Unnamed: 0,question,related_image,answer
0,请对给定的图片进行描述。,vwsscflkvakdictzacfx.jpg,
1,这款运动裤是什么材质做的？,jjxjzgkbrfizjwfngwis.jpg,


Shape of df_train: (12768, 2)


Unnamed: 0,image,text
0,scqxwrymypdzdefummyj.jpg,无拘2019女夏新款衬衫裙夏装格纹收腰气质显瘦蕾丝腰带衬衫连衣裙
1,chvgdtmndrqwfkabrgoh.jpg,2019夏季新款高端气质不对称肩带chic修身显瘦日常V领连衣裙女潮


In [2]:
from transformers import ChineseCLIPProcessor, ChineseCLIPModel
import os
import torch 


device = "cuda" if torch.cuda.is_available() else "cpu"
# 设置镜像端点
os.environ["HF_ENDPOINT"] = "https://huggingface.co"
os.environ["TRANSFORMERS_CACHE"] = "hf-mirror"
clip_model_path = "/root/onethingai-tmp/models--OFA-Sys--chinese-clip-vit-huge-patch14/snapshots/503e16b560aff94c1922f13a86a7693d36957a4f"
model = ChineseCLIPModel.from_pretrained(clip_model_path).to(device)
processor = ChineseCLIPProcessor.from_pretrained(clip_model_path)

2024-08-31 22:54:20.959074: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-31 22:54:20.986268: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-31 22:54:21.014974: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-31 22:54:21.023744: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-31 22:54:21.050380: I tensorflow/core/platform/cpu_feature_guar

In [3]:
from peft import get_peft_model, LoraConfig
import torch 
import transformers


# 只微调qkv
target_modules = []
for i in range(24):
    target_modules.append(f"text_model.encoder.layer.{i}.attention.self.query")
    target_modules.append(f"text_model.encoder.layer.{i}.attention.self.key")
    target_modules.append(f"text_model.encoder.layer.{i}.attention.self.value")
    
for i in range(32):
    target_modules.append(f"vision_model.emcoder.layers.{i}.self_attn.k_proj")
    target_modules.append(f"vision_model.emcoder.layers.{i}.self_attn.v_proj")
    target_modules.append(f"vision_model.emcoder.layers.{i}.self_attn.q_proj")

# LoRA配置
lora_config = LoraConfig(
    r=64,
    lora_alpha=96,
    lora_dropout=0.1,
    bias="none",
    target_modules=target_modules
)

# 将 LoRA 应用于模型
lora_model = get_peft_model(model, lora_config)

In [4]:
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split
from tqdm import tqdm 
import torch 
import numpy as np 

df_train = pd.read_csv("./data/train_annotation.csv", sep="\t")
# df_train, df_valid = train_test_split(df_train, test_size=1250, random_state=42)
img_path = "./data/image"
df_train["image_path"] = df_train["image"].apply(lambda x: os.path.join(img_path, x))
# df_valid["image_path"] = df_valid["image"].apply(lambda x: os.path.join(img_path, x))
# print(df_train.shape, df_valid.shape)
print(df_train.shape)
train_dataset = Dataset.from_pandas(df_train)
# valid_dataset = Dataset.from_pandas(df_valid)

(12768, 3)


In [5]:
import torch
import torch.nn.functional as F
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader
from peft import PeftModel
import logging

# 配置 logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler("train_all.log"),
        logging.StreamHandler()
    ]
)

# 定义优化器
optimizer = torch.optim.AdamW(lora_model.parameters(), lr=4e-5)
# 定义批量
batch_size = 625
# 定义dataloader
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size)

# 训练模型的自定义循环
def train_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []
    progress_bar = tqdm(dataloader, desc="Training", leave=False, dynamic_ncols=True)
    
    for batch in progress_bar:
        # 将数据移动到GPU
        images = []
        for image_path in batch["image_path"]:
            try:
                images.append(Image.open(image_path).transpose(Image.FLIP_LEFT_RIGHT).convert("RGB"))
            except:
                images.append(Image.fromarray(np.zeros((224, 224, 3), dtype=np.uint8)).convert("RGB"))
        inputs = processor(text=batch["text"], images=images, return_tensors="pt", padding=True, truncation=True, max_length=52).to(device)
        outputs = model(**inputs)
        logits_per_image = outputs["logits_per_image"]
        logits_per_text = outputs["logits_per_text"]
        labels = torch.arange(logits_per_image.size(0), device=device)
        # 计算损失
        loss_text = F.cross_entropy(logits_per_text, labels)
        loss_image = F.cross_entropy(logits_per_image, labels)
        loss = (loss_text + loss_image) / 2
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        # 保存预测和标签
        preds = torch.argmax(logits_per_text, dim=1).detach().cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.detach().cpu().numpy())
        progress_bar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(dataloader)
    acc = accuracy_score(all_labels, all_preds)
    
    return avg_loss, acc

def evaluate_epoch(model, dataloader, device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    progress_bar = tqdm(dataloader, desc="Evaluating", leave=False, dynamic_ncols=True)
    
    with torch.no_grad():
        for batch in progress_bar:
            # 将数据移动到GPU
            images = []
            for image_path in batch["image_path"]:
                try:
                    images.append(Image.open(image_path).convert("RGB"))
                except:
                    images.append(Image.fromarray(np.zeros((224, 224, 3), dtype=np.uint8)).transpose(Image.FLIP_LEFT_RIGHT).convert("RGB"))
            inputs = processor(text=batch["text"], images=images, return_tensors="pt", padding=True, truncation=True, max_length=52).to(device)
            outputs = model(**inputs)
            logits_per_image = outputs["logits_per_image"]
            logits_per_text = outputs["logits_per_text"]
            labels = torch.arange(logits_per_image.size(0), device=device)
            # 计算损失
            loss_text = F.cross_entropy(logits_per_text, labels)
            loss_image = F.cross_entropy(logits_per_image, labels)
            loss = (loss_text + loss_image) / 2
            total_loss += loss.item()
            # 保存预测和标签
            preds = torch.argmax(logits_per_text, dim=1).detach().cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.detach().cpu().numpy())
            progress_bar.set_postfix(loss=loss.item())
    
        avg_loss = total_loss / len(dataloader)
        acc = accuracy_score(all_labels, all_preds)
    
    return avg_loss, acc

# 设置Epoch
num_epochs = 10
# 设置设备为GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lora_model.to(device)
# logging.info(f"Epoch 0/{num_epochs}")
# valid_loss, valid_acc = evaluate_epoch(lora_model, valid_dataloader, device)
# logging.info(f"Eval Loss: {valid_loss}, Acc: {valid_acc}")
# train_loss, train_acc = evaluate_epoch(lora_model, train_dataloader, device)
# logging.info(f"Eval Loss: {train_loss}, Acc: {train_acc}")
# 训练循环
for epoch in range(num_epochs):
    logging.info(f"Epoch {epoch + 1}/{num_epochs}")
    # 训练一个epoch
    train_loss, train_acc = train_epoch(lora_model, train_dataloader, optimizer, device)
    logging.info(f"Training Loss: {train_loss}, Acc: {train_acc}")
    # # 在验证集上评估
    # valid_loss, valid_acc = evaluate_epoch(lora_model, valid_dataloader, device)
    # logging.info(f"Validation Loss: {valid_loss}, Acc: {valid_acc}")
    lora_model.save_pretrained(f"./adapter/adapter_{epoch+1}")

2024-08-31 22:54:24,856 [INFO] Epoch 1/10


Training:   0%|          | 0/21 [00:00<?, ?it/s]

  return self.preprocess(images, **kwargs)
2024-08-31 23:02:25,122 [INFO] Training Loss: 0.8639091522920699, Acc: 0.7596334586466166
2024-08-31 23:02:25,251 [INFO] Epoch 2/10


Training:   0%|          | 0/21 [00:00<?, ?it/s]

  return self.preprocess(images, **kwargs)
2024-08-31 23:10:24,095 [INFO] Training Loss: 0.7898721013750348, Acc: 0.7761591478696742
2024-08-31 23:10:24,214 [INFO] Epoch 3/10


Training:   0%|          | 0/21 [00:00<?, ?it/s]

  return self.preprocess(images, **kwargs)
2024-08-31 23:18:20,991 [INFO] Training Loss: 0.7430265659377688, Acc: 0.7817982456140351
2024-08-31 23:18:21,105 [INFO] Epoch 4/10


Training:   0%|          | 0/21 [00:00<?, ?it/s]

  return self.preprocess(images, **kwargs)
2024-08-31 23:26:18,980 [INFO] Training Loss: 0.7115312275432405, Acc: 0.7945645363408521
2024-08-31 23:26:19,104 [INFO] Epoch 5/10


Training:   0%|          | 0/21 [00:00<?, ?it/s]

  return self.preprocess(images, **kwargs)
2024-08-31 23:34:15,632 [INFO] Training Loss: 0.6880423426628113, Acc: 0.7950344611528822
2024-08-31 23:34:15,768 [INFO] Epoch 6/10


Training:   0%|          | 0/21 [00:00<?, ?it/s]

  return self.preprocess(images, **kwargs)
2024-08-31 23:42:14,506 [INFO] Training Loss: 0.6610280615942818, Acc: 0.8046679197994987
2024-08-31 23:42:14,628 [INFO] Epoch 7/10


Training:   0%|          | 0/21 [00:00<?, ?it/s]

  return self.preprocess(images, **kwargs)
2024-08-31 23:50:11,960 [INFO] Training Loss: 0.6421245989345369, Acc: 0.8049812030075187
2024-08-31 23:50:12,076 [INFO] Epoch 8/10


Training:   0%|          | 0/21 [00:00<?, ?it/s]

  return self.preprocess(images, **kwargs)
2024-08-31 23:58:11,826 [INFO] Training Loss: 0.6175007195699782, Acc: 0.8122650375939849
2024-08-31 23:58:11,941 [INFO] Epoch 9/10


Training:   0%|          | 0/21 [00:00<?, ?it/s]

  return self.preprocess(images, **kwargs)
2024-09-01 00:06:10,692 [INFO] Training Loss: 0.596953272819519, Acc: 0.8176691729323309
2024-09-01 00:06:10,813 [INFO] Epoch 10/10


Training:   0%|          | 0/21 [00:00<?, ?it/s]

  return self.preprocess(images, **kwargs)
2024-09-01 00:14:10,262 [INFO] Training Loss: 0.5897957909674871, Acc: 0.818609022556391


In [6]:
torch.save(lora_model, "CLIP_LoRA_625_10.pth")